$CSV::Processor::VERSION
=
'1.01'
;
sub
new {
my
(
$class
,
%param
) =
@_
;
my
$prefix
=
$param
{prefix} ||
'p_'
;
die
"No input file defined"
unless
(
$param
{file} ||
$param
{in_file} );
$param
{in_file} =
$param
{file}
unless
defined
$param
{in_file};
my
$csv
= Text::AutoCSV->new(
in_file
=>
$param
{in_file},
encoding
=>
$param
{encoding} ||
'UTF-8'
,
out_file
=>
$param
{out_file} || make_prefix(
$param
{in_file},
$prefix
),
out_encoding
=>
'UTF-8'
,
verbose
=>
$param
{verbose} || 0
);
$param
{auto_csv} =
$csv
;
$param
{human_numbering} = 0
if
!
defined
$params
{human_numbering};
bless
{
%param
},
$class
;
}
sub
auto_csv {
shift
->{auto_csv};
}
sub
rw_wrapper {
my
(
$self
,
$in_field
,
$out_field
,
$callback
,
%params
) =
@_
;
$params
{verbose} =
$self
->auto_csv->{verbose} ||
$self
->{verbose}
if
!
defined
$params
{verbose};
if
(
$in_field
=~ /
$RE
{num}{
int
}/ &&
$out_field
=~ /
$RE
{num}{
int
}/ ) {
say
"Assuming that you specified column numbers at in and out parameters"
if
$params
{verbose};
if
(
$self
->{human_numbering} ||
$params
{human_numbering} ) {
say
"Human numbering in use, first column index is 1 not 0"
if
$params
{verbose};
$in_field
++;
$out_field
++;
}
my
$row_number
= 0;
$self
->auto_csv->set_walker_ar(
sub
{
my
$row_arrayed
=
$_
[0];
print
"Row $row_number\t"
;
if
(
$row_arrayed
->[
$in_field
] ne
''
) {
print
'In: '
.
$row_arrayed
->[
$in_field
] .
"\t"
if
$params
{verbose};
my
$res
=
$callback
->(
$row_arrayed
->[
$in_field
], );
print
'Out : '
.
$res
.
"\n"
if
$params
{verbose};
insert_after_index(
$out_field
- 1,
$res
,
$row_arrayed
);
}
else
{
print
"In: undef\tOut: undef\n"
if
$params
{verbose};
}
$row_number
++;
return
$row_arrayed
;
}
)->
write
();
}
else
{
my
@fields
=
$self
->auto_csv->get_fields_names();
say
"Assuming that you specified column names at in and out parameters"
if
$params
{verbose};
say
"Auto detected field names : "
.
join
(
','
,
@fields
)
if
$params
{verbose};
$self
->auto_csv->field_add_computed(
$out_field
,
sub
{
my
$hr
=
$_
[1];
print
'In: '
.
$hr
->{
$in_field
} .
"\t"
if
$params
{verbose};
$hr
->{
$out_field
} =
$callback
->(
$hr
->{
$in_field
} );
print
'Out : '
.
$hr
->{
$out_field
} .
"\n"
if
$params
{verbose};
return
$hr
->{
$out_field
};
}
)->
write
();
}
}
sub
add_email {
my
(
$self
,
$in_field
,
$out_field
,
%params
) =
@_
;
$params
{attempts} = 5
if
!
defined
$params
{attempts};
$in_field
=
'URL'
if
!
defined
$in_field
;
$out_field
=
'EMAIL'
if
!
defined
$out_field
;
$self
->rw_wrapper(
$in_field
,
$out_field
,
sub
{
my
$url
=
shift
;
my
$crawler
= Email::Extractor->new(
verbose
=>
$params
{verbose} );
my
$emails
=
$crawler
->search_until_attempts(
$url
,
$params
{attempts} );
my
$emails_str
=
join
(
','
,
@$emails
);
return
$emails_str
;
}
);
}
sub
add_same {
my
(
$self
,
$in_field
,
$out_field
,
%params
) =
@_
;
die
"Output field is not specified"
unless
defined
$params
{value};
$self
->rw_wrapper(
$in_field
,
$out_field
,
sub
{
return
$params
{value} } );
}
1;