#! /usr/bin/env perl
my
%opts
;
$opts
{
'keys'
} = 1;
my
$whandler
=
$SIG
{__WARN__};
$SIG
{__WARN__} =
sub
{
print
STDERR
"$0: @_"
;};
unless
(GetOptions (\
%opts
,
'cleanse'
,
'columns|c=s'
,
'file|f=s'
,
'format=s'
,
'headline|h'
,
'insert-only|i'
,
'rows=s'
,
'keys|k=s'
,
'map|m=s'
,
'map-filter=s'
,
'match-sql=s'
,
'routine|r=s'
,
'skipbadlines'
,
'table|t=s'
,
'update-only|o'
)) {
exit
1;
}
$SIG
{__WARN__} =
$whandler
;
my
$format
=
'TAB'
;
my
%inforef
= ();
my
%funcref
= (
CSV
=> {
get_columns
=> \
&get_columns_csv
},
TAB
=> {
get_columns
=> \
&get_columns_tab
},
XLS
=> {
get_columns
=> \
&get_columns_xls
});
my
%mfref
= (
lc
=>
sub
{
lc
(
shift
)});
my
$sep_char
=
','
;
my
$mfsub
;
if
(
$opts
{
'cleanse'
} ||
$opts
{
'headline'
}) {
unless
(
$opts
{
'table'
}) {
die
(
"$0: missing table name\n"
);
}
}
if
(
$opts
{
'map-filter'
}) {
unless
(
exists
(
$mfref
{
$opts
{
'map-filter'
}})) {
die
qq{$0: unknown column name filter "$opts{'map-filter'}
"},
"\n"
;
}
$mfsub
=
$mfref
{
$opts
{
'map-filter'
}};
}
if
(
$opts
{
'format'
}) {
$format
=
uc
(
$opts
{
'format'
});
if
(
$format
=~ /^CSV/) {
$format
=
'CSV'
;
if
($') {
$sep_char
= $';
$sep_char
=~ s/^\s+//;
$sep_char
=~ s/\s+$//;
}
eval
{
};
if
($@) {
die
"$0: couldn't load module Text::CSV_XS\n"
;
}
$inforef
{object} = new Text::CSV_XS ({
'binary'
=> 1,
'sep_char'
=>
$sep_char
});
}
elsif
(
$format
eq
'XLS'
) {
eval
{
};
if
($@) {
die
"$0: couldn't load module Spreadsheet::ParseExcel\n"
;
}
$inforef
{object} = new Spreadsheet::ParseExcel;
}
else
{
die
(
"$0: unknown format \""
.
$opts
{
'format'
} .
"\"\n"
);
}
}
my
%fieldmap
;
my
$fd_input
;
my
(
$sth
,
$keyfield
,
$update
,
$msg
);
my
(
$table
,
$fieldnames
,
@values
,
$headline
);
my
(
@columns
,
$routine
,
%colmap
);
my
$linebuf
=
''
;
my
$colflag
= 1;
my
%usecol
;
my
$rowflag
;
my
%userow
;
my
$currow
= 0;
my
$startcol
;
my
(
%matchmap
,
$matchcol
);
if
(
$opts
{
'columns'
}) {
$colflag
= ! (
$opts
{
'columns'
} =~ s/\s*[\!^]//);
for
(
@columns
=
split
(/\s*,\s*/,
$opts
{
'columns'
})) {
$usecol
{
$_
} =
$colflag
;
}
}
if
(
$opts
{
'rows'
}) {
my
@rows
;
$rowflag
= ! (
$opts
{
'rows'
} =~ s/\s*[^\!]//);
for
(
@rows
=
split
(/\s*,\s*/,
$opts
{
'rows'
})) {
unless
(/^\d+$/) {
die
"$0: row number \"$_\" is not numeric\n"
;
}
$userow
{
$_
} =
$rowflag
;
}
}
if
(
$opts
{
'file'
}) {
$fd_input
= new IO::File;
$fd_input
->
open
(
$opts
{
'file'
})
||
die
"$0: couldn't open $opts{'file'}: $!\n"
;
}
else
{
$fd_input
= new IO::Handle;
$fd_input
->fdopen(
fileno
(STDIN),
'r'
);
}
if
(
$opts
{
'map'
}) {
my
(
$head
,
$name
);
foreach
(
split
(/;/,
$opts
{
'map'
})) {
(
$head
,
$name
) =
split
/=/;
$colmap
{
$head
} =
$name
;
}
}
my
$csv
;
if
(
$opts
{
'headline'
}) {
my
%hcolmap
;
my
@columns
;
if
(
$funcref
{
$format
}->{get_columns}(\
%inforef
,
$fd_input
,\
@columns
) <= 0) {
die
"$0: couldn't find headline\n"
;
}
if
(
$opts
{
'map-filter'
}) {
@columns
=
map
{
$mfsub
->(
$_
)}
@columns
;
}
map
{s/^\s+//; s/\s+$//;
$hcolmap
{
$_
} = 1;}
@columns
;
if
(
$opts
{
'map'
}) {
my
@newcolumns
;
foreach
(
@columns
) {
if
(
exists
$colmap
{
$_
}) {
push
(
@newcolumns
,
$colmap
{
$_
});
$hcolmap
{
$colmap
{
$_
}} = 1;
}
else
{
push
(
@newcolumns
,
$_
);
}
}
@columns
=
@newcolumns
;
}
for
(
sort
(
keys
%usecol
)) {
next
if
$hcolmap
{
$_
};
next
unless
exists
$usecol
{
$_
};
next
unless
$usecol
{
$_
};
push
(
@columns
,
$_
);
}
$table
=
$opts
{
'table'
};
$fieldmap
{
$table
} = \
@columns
;
}
if
(
$opts
{
'routine'
}) {
$routine
=
eval
$opts
{
'routine'
};
if
($@) {
die
"$0: invalid filter routine: $@: \n"
;
}
if
(
ref
(
$routine
) ne
'CODE'
) {
die
"$0: invalid filter routine\n"
;
}
}
if
(
$opts
{
'table'
}) {
$table
=
$opts
{
'table'
};
if
(!
$opts
{
'headline'
} &&
$opts
{
'columns'
}) {
$fieldmap
{
$table
} = \
@columns
;
}
}
my
$dbif
;
my
$pwdused
= 0;
my
(
$driver
,
$database
,
$user
) =
@ARGV
;
$dbif
= new DBIx::Easy (
$driver
,
$database
,
$user
);
$dbif
-> install_handler (\
&fatal
);
$dbif
->
connect
;
my
(
@keys
,
@cleansekeys
,
%cleansemap
,
$numkeysleft
,
%recmap
,
@names
);
if
(
$opts
{
'cleanse'
}) {
@names
=
&column_names
(
$dbif
,
$table
);
$fieldnames
= \
@names
;
%cleansemap
=
&key_names
(
$dbif
,
$table
,
$opts
{
'keys'
}, 1);
@cleansekeys
=
sort
(
keys
%cleansemap
);
my
(
$row
,
$href
,
$i
);
$sth
=
$dbif
-> process (
'SELECT '
.
join
(
', '
,
@cleansekeys
)
.
" FROM $table"
);
while
(
$row
=
$sth
-> fetch()) {
$href
= \
%recmap
;
for
(
$i
= 0;
$i
<
$#cleansekeys
;
$i
++) {
unless
(
exists
$href
->{
$$row
[
$i
]}) {
$href
->{
$$row
[
$i
]} = {};
}
$href
=
$href
->{
$$row
[
$i
]};
}
if
(
exists
$href
->{
$$row
[
$i
]}) {
die
"$0: duplicate key: "
,
join
(
","
,
@$row
),
"\n"
;
}
if
(
defined
$$row
[
$i
]) {
$href
->{
$$row
[
$i
]} = 1;
}
else
{
$href
->{
$$row
[
$i
]} =
''
;
}
}
}
if
(
$opts
{
'match-sql'
}) {
unless
(
$opts
{
'match-sql'
} =~ /^(.*?):\{(.*?)\}$/) {
die
"$0: invalid format for option --match-sql: $opts{'match-sql'}\n"
;
}
$matchcol
= $1;
$sth
=
$dbif
-> process ($2);
my
$row
;
while
(
$row
=
$sth
->fetch()) {
$matchmap
{
$$row
[0]} = 1;
}
}
my
$gcsub
=
$funcref
{
$format
}->{get_columns};
MAIN:
while
(
$gcsub
->(\
%inforef
,
$fd_input
, \
@columns
)) {
my
(
@data
);
if
(
$opts
{
'headline'
} ||
$opts
{
'table'
}) {
@values
=
@columns
;
}
else
{
if
(
$format
eq
'TAB'
) {
(
$table
,
@values
) =
split
/\t/;
}
elsif
(
$format
eq
'CSV'
) {
next
unless
csv_parseline (
$csv
, \
$linebuf
,
$_
, [
$table
,
@values
]);
}
if
(
$table
=~ /(.+?)\.(.+)/) {
$table
= $1;
$startcol
= $2;
unless
(
$startcol
=~ /^\d+$/) {
$msg
=
"$0: $.: start column not a number: \""
.
$startcol
.
"\"\n"
;
if
(
$opts
{
'skipbadlines'
}) {
warn
(
$msg
);
next
;
}
else
{
die
(
$msg
);
}
}
}
if
(
$table
=~ /\s/) {
warn
(
"$0: $.: skipping record (\"$table\" not accepted as table name)\n"
);
next
;
}
}
$currow
++;
if
(
defined
$rowflag
) {
if
(
$rowflag
&& !
exists
$userow
{
$currow
}) {
next
;
}
if
(!
$rowflag
&&
exists
$userow
{
$currow
}) {
next
;
}
}
@names
=
&column_names
(
$dbif
,
$table
,
$startcol
);
$fieldnames
= \
@names
;
if
(
$opts
{
'routine'
}) {
next
unless
filter_input (
$routine
,
$table
,
$fieldnames
, \
@values
);
}
MATCHSQL: {
if
(
$opts
{
'match-sql'
}) {
for
(
my
$i
= 0;
$i
<
@$fieldnames
;
$i
++) {
if
(
$$fieldnames
[
$i
] eq
$matchcol
) {
last
MATCHSQL
if
$matchmap
{
$values
[
$i
]};
print
"Not accepted record @values\n"
;
next
MAIN;
}
}
}
}
my
$typeref
=
$dbif
-> typemap (
$table
);
my
$sizeref
=
$dbif
-> sizemap (
$table
);
for
(
my
$i
= 0;
$i
<=
$#$fieldnames
;
$i
++) {
if
(
keys
%usecol
) {
if
(
$colflag
&& !
exists
$usecol
{
$$fieldnames
[
$i
]}) {
next
;
}
if
(!
$colflag
&&
exists
$usecol
{
$$fieldnames
[
$i
]}) {
next
;
}
}
if
(
defined
$values
[
$i
]) {
$values
[
$i
] =~ s/\\n/\n/g;
$values
[
$i
] =~ s/\\t/\t/g;
}
unless
(
exists
$$typeref
{
$$fieldnames
[
$i
]}) {
warn
(
"$0: No type information for column $$fieldnames[$i] found\n"
);
next
;
}
unless
(
exists
$$sizeref
{
$$fieldnames
[
$i
]}) {
warn
(
"$0: No size information for column $$fieldnames[$i] found\n"
);
next
;
}
if
(
$$typeref
{
$$fieldnames
[
$i
]} == DBI::SQL_CHAR) {
if
(
defined
$values
[
$i
]) {
if
(
length
(
$values
[
$i
]) >
$$sizeref
{
$$fieldnames
[
$i
]}) {
warn
(prefix() .
"Data for field $$fieldnames[$i] truncated: $values[$i]\n"
);
$values
[
$i
] =
substr
(
$values
[
$i
], 0,
$$sizeref
{
$$fieldnames
[
$i
]});
}
}
else
{
$values
[
$i
] =
''
;
}
}
elsif
(
$$typeref
{
$$fieldnames
[
$i
]} == DBI::SQL_VARCHAR) {
if
(
defined
$values
[
$i
]) {
if
(
length
(
$values
[
$i
]) >
$$sizeref
{
$$fieldnames
[
$i
]}) {
warn
(prefix() .
"Data for field $$fieldnames[$i] truncated: $values[$i]\n"
);
$values
[
$i
] =
substr
(
$values
[
$i
], 0,
$$sizeref
{
$$fieldnames
[
$i
]});
}
}
else
{
$values
[
$i
] =
''
;
}
}
}
my
%keymap
=
&key_names
(
$dbif
,
$table
,
$opts
{
'keys'
}, 1);
@keys
= (
keys
(
%keymap
));
my
@terms
=
map
{
$_
.
' = '
.
$dbif
->quote(
$values
[
$keymap
{
$_
}])}
(
@keys
);
$sth
=
$dbif
-> process (
'SELECT '
.
join
(
', '
,
@keys
)
.
" FROM $table WHERE "
.
join
(
' AND '
,
@terms
));
while
(
$sth
-> fetch) {}
if
(
$sth
-> rows () > 1) {
$" =
', '
;
die
(
"$0: duplicate key(s) @keys in table $table\n"
);
}
$update
=
$sth
-> rows ();
$sth
-> finish ();
for
(
my
$i
= 0;
$i
<=
$#$fieldnames
;
$i
++) {
if
(
keys
%usecol
) {
if
(
$colflag
&& !
exists
$usecol
{
$$fieldnames
[
$i
]}) {
next
;
}
if
(!
$colflag
&&
exists
$usecol
{
$$fieldnames
[
$i
]}) {
next
;
}
}
if
(
defined
$values
[
$i
]) {
$values
[
$i
] =~ s/\\n/\n/g;
}
push
(
@data
,
$$fieldnames
[
$i
],
$values
[
$i
]);
}
if
(
$update
) {
if
(
$opts
{
'insert-only'
}) {
next
;
}
$dbif
-> update (
$table
,
join
(
' AND '
,
@terms
),
@data
);
}
else
{
if
(
$opts
{
'update-only'
}) {
next
;
}
$dbif
-> insert (
$table
,
@data
);
}
if
(
$opts
{
'cleanse'
} &&
$update
) {
my
(
$href
,
$i
);
$href
= \
%recmap
;
if
(
$dbif
->{DRIVER} eq
'mysql'
) {
for
(
$i
= 0;
$i
<
@cleansekeys
;
$i
++) {
if
(
$$typeref
{
$cleansekeys
[
$i
]}
== DBI::SQL_VARCHAR) {
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}] =~ s/\s+$//;
}
}
}
for
(
$i
= 0;
$i
<
@cleansekeys
;
$i
++) {
if
(
$$typeref
{
$cleansekeys
[
$i
]} == DBI::SQL_CHAR) {
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}]
=
substr
(
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}],
0,
$$sizeref
{
$cleansekeys
[
$i
]});
}
}
for
(
$i
= 0;
$i
<
$#cleansekeys
;
$i
++) {
unless
(
exists
$href
->{
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}]}) {
die
(
"$0: internal error: key $cleansekeys[$i] not found: "
,
join
(
","
,
@values
),
"\n"
);
}
$href
=
$href
->{
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}]};
}
unless
(
exists
$href
->{
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}]}) {
die
(
"$0: internal error: key $cleansekeys[$i] not found: "
,
join
(
","
,
@values
),
"\n"
);
}
if
(
$href
->{
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}]} == 0) {
my
$j
= 0;
warn
(prefix () .
"duplicate key(s) in input: "
,
join
(
", "
,
map
{
"$_ = \""
.
$values
[
$cleansemap
{
$cleansekeys
[
$j
++]}] .
"\""
}
@cleansekeys
) .
"\n"
);
}
$href
->{
$values
[
$cleansemap
{
$cleansekeys
[
$i
]}]} = 0;
}
}
if
(
$opts
{
'cleanse'
} && !
$opts
{
'insert-only'
}) {
my
$href
;
$href
= \
%recmap
;
my
@keylist
=
keys
%recmap
;
my
(
@tmpkeys
,
@reckeys
,
$thiskey
,
$keyval
,
@conds
);
for
(
keys
%recmap
) {
push
(
@reckeys
, [
$recmap
{
$_
},
$_
]);
}
for
(
my
$i
= 1;
$i
<
@cleansekeys
;
$i
++) {
@tmpkeys
=
@reckeys
;
undef
@reckeys
;
for
$thiskey
(
@tmpkeys
) {
$href
=
shift
@$thiskey
;
for
(
keys
%$href
) {
push
(
@reckeys
, [
$href
->{
$_
},
@$thiskey
,
$_
]);
}
}
}
for
(
@reckeys
) {
undef
@conds
;
next
unless
shift
(
@$_
);
for
(
my
$i
= 0;
$i
<
@cleansekeys
;
$i
++) {
push
(
@conds
,
$cleansekeys
[
$i
] .
' = '
.
$dbif
->quote (
$_
->[
$i
]));
}
$dbif
-> process (
"DELETE FROM $table WHERE "
.
join
(
' AND '
,
@conds
));
}
}
if
(
length
$linebuf
) {
if
(
$opts
{
'skipbadlines'
}) {
warn
(
"$0: unexpected EOF"
);
}
else
{
die
(
"$0: unexpected EOF"
);
}
}
undef
$dbif
;
if
(
$opts
{
'file'
}) {
$fd_input
->
close
;
}
sub
get_columns_tab {
my
(
$iref
,
$fd
,
$colref
) =
@_
;
my
$line
;
while
(
defined
(
$line
= <
$fd
>)) {
next
if
$line
=~ /^\
chomp
(
$line
);
$line
=~ s/\r$//;
@$colref
=
split
(/\t/,
$line
);
return
@$colref
;
}
}
sub
get_columns_csv {
my
(
$iref
,
$fd
,
$colref
) =
@_
;
my
(
$line
,
$buffer
);
unless
(
$iref
->{parser}) {
$iref
->{parser} = Text::CSV_XS->new ({
'binary'
=> 1,
'sep_char'
=>
','
});
}
while
(
defined
(
$line
= <
$fd
>)) {
if
(
$iref
->{parser}->parse(
$line
)) {
@$colref
=
$iref
->{parser}->fields();
$buffer
=
''
;
return
@$colref
;
}
if
((
$line
=~
tr
/
"/"
/) % 2) {
$buffer
=
$line
;
}
else
{
$msg
=
"$0: $.: line not in CSV format: "
.
$iref
->{parser}->error_input() .
"\n"
;
die
(
$msg
);
}
}
}
sub
get_columns_xls {
my
(
$iref
,
$fd
,
$colref
) =
@_
;
unless
(
$iref
->{workbook}) {
$iref
->{workbook} =
$iref
->{object}->Parse(
$fd
);
unless
(
$iref
->{workbook}) {
die
"$0: couldn't parse spreadsheet\n"
;
}
$iref
->{worksheet} =
$iref
->{workbook}->{Worksheet}[0];
$iref
->{row} = 0;
}
if
(
$iref
->{row} <=
$iref
->{worksheet}->{MaxRow}) {
@$colref
=
map
{
defined
$_
?
$_
->{Val} :
undef
}
@{
$iref
->{worksheet}->{Cells}[
$iref
->{row}++]};
return
@$colref
;
}
}
sub
column_names ($$) {
my
(
$dbif
,
$table
,
$start
) =
@_
;
my
(
$names
,
$sth
);
$start
= 0
unless
$start
;
if
(
exists
$fieldmap
{
$table
}) {
$names
=
$fieldmap
{
$table
};
}
else
{
$sth
=
$dbif
-> process (
"SELECT * FROM $table WHERE 0 = 1"
);
$names
=
$fieldmap
{
$table
} =
$sth
-> {NAME};
$sth
-> finish ();
}
@$names
[
$start
..
$#$names
];
}
sub
key_names () {
my
(
$dbif
,
$table
,
$keyspec
,
$hash
) =
@_
;
my
(
$numkeysleft
,
$i
);
my
@columns
= column_names (
$dbif
,
$table
);
my
(
@keys
,
%kmap
);
$keyspec
=~ s/^\s+//;
$keyspec
=~ s/\s+$//;
if
(
$keyspec
=~ /^\d+$/) {
$numkeysleft
=
$keyspec
;
for
(
$i
= 0;
$i
<
$numkeysleft
&&
$i
<
@columns
;
$i
++) {
if
(
keys
%usecol
) {
if
(
$colflag
&& !
exists
$usecol
{
$columns
[
$i
]}) {
$numkeysleft
++;
next
;
}
if
(!
$colflag
&&
exists
$usecol
{
$columns
[
$i
]}) {
$numkeysleft
++;
next
;
}
}
if
(
$hash
) {
$kmap
{
$columns
[
$i
]} =
$i
;
}
else
{
push
(
@keys
,
$columns
[
$i
]);
}
}
}
else
{
my
%colmap
;
for
(
$i
= 0;
$i
<
@columns
;
$i
++) {
$colmap
{
$columns
[
$i
]} =
$i
;
}
for
(
split
(/\s*,\s*/,
$keyspec
)) {
unless
(
exists
$colmap
{
$_
}) {
die
"$0: key \"$_\" appears not in column list\n"
;
}
if
(
$hash
) {
$kmap
{
$_
} =
$colmap
{
$_
};
}
else
{
push
(
@keys
,
$_
);
}
}
}
return
$hash
?
%kmap
:
@keys
;
}
sub
filter_input {
my
(
$routine
,
$table
,
$fieldnames
,
$valref
) =
@_
;
my
(
%colmap
,
$ret
);
for
(
my
$i
= 0;
$i
<=
$#$fieldnames
;
$i
++) {
$colmap
{
$$fieldnames
[
$i
]} =
$$valref
[
$i
];
}
$ret
=
&$routine
(
$table
, \
%colmap
);
for
(
my
$i
= 0;
$i
<=
$#$fieldnames
;
$i
++) {
$$valref
[
$i
] =
$colmap
{
$$fieldnames
[
$i
]};
}
$ret
;
}
sub
prefix {
my
@frags
= ($0);
if
($.) {
if
(
$opts
{
'file'
}) {
push
(
@frags
,
$opts
{
'file'
});
}
push
(
@frags
, $.);
}
join
(
': '
,
@frags
,
''
);
}
sub
fatal {
my
(
$statement
,
$err
,
$msg
) =
@_
;
my
$pwd
;
my
$prefix
= prefix ();
if
(
$dbif
->is_auth_error (
$err
)) {
unless
(
$pwdused
) {
print
"We need a password.\n"
;
$pwd
= querypwd();
$pwdused
= 1;
if
(
length
(
$pwd
)) {
$dbif
= new DBIx::Easy (
$driver
,
$database
,
$user
,
$pwd
);
$dbif
-> install_handler (\
&fatal
);
$dbif
->
connect
();
return
;
}
else
{
die
(
"$prefix$statement: $msg\n"
);
}
}
}
die
(
"$prefix$statement: $msg\n"
);
}
sub
querypwd () {
my
$pwd
;
print
"Password: "
;
ReadMode (
'noecho'
);
$pwd
= ReadLine (0);
ReadMode (
'restore'
);
print
"\n"
;
chomp
(
$pwd
);
$pwd
;
}