#!/usr/bin/perl
my
(
$outfilename
,
$logfile
,
$inputlistfile
,
$verbose
,
$configfile
);
my
(
$quiet
,
$help
,
$man
,
$select
,
$xmlmode
,
$loosemode
,
$pretty
);
my
(
$unapimode
,
$defaultconfig
,
$pxml
,
$nullmode
,
$countmode
,
$statmode
);
my
(
$limit
,
$offset
);
my
%fieldstat_a
;
my
%fieldstat_e
;
my
%fieldstat_r
;
GetOptions(
'auto'
=> \
$defaultconfig
,
'config:s'
=> \
$configfile
,
"output:s"
=> \
$outfilename
,
"log:s"
=> \
$logfile
,
"files:s"
=> \
$inputlistfile
,
"quiet"
=> \
$quiet
,
"help|?"
=> \
$help
,
"man"
=> \
$man
,
"select=s"
=> \
$select
,
"count"
=> \
$countmode
,
"stats=s"
=> \
$statmode
,
"limit=i"
=> \
$limit
,
"unapi"
=> \
$unapimode
,
"verbose"
=> \
$verbose
,
"null"
=> \
$nullmode
,
"pxml:s"
=> \
$pxml
,
"pretty:s"
=> \
$pretty
,
"xml:s"
=> \
$xmlmode
) or pod2usage(2);
pod2usage(1)
if
$help
;
pod2usage(
-verbose
=> 2)
if
$man
;
$logfile
=
"-"
if
defined
$logfile
and
$logfile
eq
""
;
if
(
defined
$logfile
and
$logfile
ne
"-"
) {
open
LOG,
">$logfile"
or
die
(
"Error opening $logfile\n"
);
}
elsif
( not
$quiet
and (
$logfile
eq
"-"
or
$verbose
) ) {
*LOG
=
*STDOUT
;
}
else
{
open
LOG,
'>/dev/null'
;
}
$configfile
=
'-'
if
$defaultconfig
;
my
$source
= PICA::Source->new(
config
=> (
$configfile
eq
'-'
?
undef
:
$configfile
) )
if
$configfile
;
if
(
defined
$pretty
) {
$outfilename
=
$pretty
unless
defined
$outfilename
||
$pretty
==
""
;
$pretty
= 1;
}
if
(
defined
$pxml
and
$pxml
ne
""
) {
$xmlmode
=
$pxml
;
$pretty
= 1;
}
if
(
defined
$xmlmode
and
$xmlmode
ne
""
) {
$outfilename
=
$xmlmode
if
not
defined
$outfilename
;
}
if
(
defined
$statmode
or
defined
$countmode
) {
$nullmode
= 1
if
"$outfilename"
eq
""
;
}
$outfilename
=
"/dev/null"
if
$nullmode
;
$outfilename
=
"-"
unless
defined
$outfilename
;
print
LOG
"Output to $outfilename\n"
if
$outfilename
ne
"-"
;
my
@p
= (
$outfilename
ne
"-"
?
$outfilename
: \
*STDOUT
);
push
@p
, (
'format'
=>
'XML'
)
if
defined
$xmlmode
;
my
$writer
= PICA::Writer->new(
@p
,
pretty
=>
$pretty
,
stats
=>
$statmode
);
if
(
$inputlistfile
) {
if
(
$inputlistfile
eq
"-"
) {
*INFILES
=
*STDIN
;
}
else
{
print
LOG
"Reading input files from $inputlistfile\n"
;
open
INFILES,
$inputlistfile
or
die
(
"Error opening $inputlistfile"
);
}
}
my
$_field_handler
= \
&field_handler
;
my
$_record_handler
= \
&record_handler
;
my
$field_regex
;
my
$subfield_select
=
""
;
if
(
$select
) {
my
(
$tag
,
$subfield
) = (
""
,
""
);
if
(
$select
=~ /^...+[\
$_
]/ ) {
(
$tag
,
$subfield
) =
split
(/[\
$_
]/,
$select
);
}
else
{
$tag
=
$select
;
}
$field_regex
=
qr/^$tag$/
;
$subfield_select
=
$subfield
if
$subfield
ne
""
;
$_field_handler
= \
&select_field_handler
;
undef
$_record_handler
;
if
(
$subfield_select
ne
""
) {
print
LOG
"Selecting subfield: $select\n"
;
}
else
{
print
LOG
"Selecting field: $select\n"
;
}
}
my
$remote_counter
= 0;
my
%options
;
$limit
= 10
if
!
$limit
or
$limit
<= 0;
$options
{Limit} =
$limit
;
$options
{Proceed} = 1;
my
$parser
= PICA::Parser->new(
Field
=>
$_field_handler
,
Record
=>
$_record_handler
,
%options
);
my
$filename
;
if
(
@ARGV
> 0) {
if
(
$inputlistfile
) {
print
STDERR
"You can only specify either an input file or a file list!\n"
;
exit
0;
}
if
(
$source
and
$source
->baseURL ) {
unshift
@ARGV
,
$source
->baseURL
unless
$ARGV
[0] =~ /^http:\/\// or
$ARGV
[0] =~ /^[^\\:]+:\d+/;
}
while
((
$filename
=
shift
@ARGV
)) {
my
$remote_parser
;
if
(
$filename
=~ /^http:\/\//) {
my
$baseurl
=
$filename
;
my
$query
=
shift
@ARGV
||
print
STDERR
"query missing!\n"
;
if
(
$query
=~ /=/) {
print
LOG
"SRU query '$query' to $baseurl\n"
;
my
$server
= PICA::Source->new(
SRU
=>
$baseurl
,
Limit
=>
$limit
);
$remote_parser
=
$server
->cqlQuery(
$query
,
Field
=>
$_field_handler
,
Record
=>
$_record_handler
,
Limit
=>
$limit
,
);
}
else
{
my
$prefix
=
$unapimode
?
"gvk"
:
""
;
if
(
$unapimode
) {
print
LOG
"unAPI query '$query' from $baseurl\n"
;
$source
= PICA::Source->new(
unAPI
=>
$baseurl
);
}
else
{
print
LOG
"PSI get PPN '$query' from $baseurl\n"
;
$source
= PICA::Source->new(
PSI
=>
$baseurl
);
}
my
$r
=
$source
->getPPN(
$query
,
$prefix
);
$parser
->parsedata(
$r
)
if
$r
;
}
}
elsif
(
$filename
=~ /^[^\\:]+:\d+/) {
my
$z3950host
=
$filename
;
my
$query
=
shift
@ARGV
||
print
STDERR
"query missing!\n"
;
print
LOG
"Z3950 query '$query' to $z3950host\n"
;
my
$server
= PICA::Source->new(
Z3950
=>
$z3950host
);
$remote_parser
=
$server
->z3950Query(
$query
,
Field
=>
$_field_handler
,
Record
=>
$_record_handler
);
}
else
{
print
LOG
"Reading $filename\n"
;
$parser
->parsefile(
$filename
);
}
$remote_counter
+=
$remote_parser
->counter()
if
defined
$remote_parser
;
}
}
elsif
(
$inputlistfile
) {
while
(<INFILES>) {
chomp
;
next
if
$_
eq
""
;
$filename
=
$_
;
print
LOG
"Reading $filename\n"
;
my
(
$record
) = PICA::Parser->parsefile(
$filename
,
Limit
=> 1)->records;
}
}
else
{
print
LOG
"Reading standard input\n"
;
$parser
->parsefile( \
*STDIN
);
}
$writer
->end();
print
LOG
"Input records:\t"
. (
$parser
->counter() +
$remote_counter
) .
"\nOutput records:\t"
.
$writer
->counter() .
"\nOutput fields:\t"
.
$writer
->fields() .
"\n"
;
if
(
$countmode
) {
foreach
my
$tag
(
sort
keys
%fieldstat_a
) {
print
"$tag\t"
.
$fieldstat_a
{
$tag
} .
"\t"
;
print
$fieldstat_r
{
$tag
};
print
"\n"
;
}
}
if
(
$statmode
) {
print
join
(
"\n"
,
$writer
->statlines).
"\n"
;
}
sub
field_handler {
my
$field
=
shift
;
if
(
$countmode
) {
my
$tag
=
$field
->tag;
if
(
defined
$fieldstat_a
{
$tag
}) {
$fieldstat_a
{
$tag
}++;
}
else
{
$fieldstat_a
{
$tag
} = 1;
}
$fieldstat_e
{
$tag
} = 1;
}
return
$field
;
}
sub
select_field_handler {
my
$field
=
shift
;
return
unless
$field
->tag() =~
$field_regex
;
if
(
$subfield_select
ne
""
) {
my
@sf
=
$field
->subfield(
$subfield_select
);
print
{
$writer
->{io} }
join
(
"\n"
,
@sf
) .
"\n"
if
@sf
;
}
else
{
$writer
->
write
(
$field
);
}
return
undef
;
}
sub
record_handler {
my
$record
=
shift
;
$writer
->
write
(
$record
);
if
(
$countmode
) {
foreach
my
$tag
(
keys
%fieldstat_e
) {
if
(
defined
$fieldstat_r
{
$tag
}) {
$fieldstat_r
{
$tag
}++;
}
else
{
$fieldstat_r
{
$tag
} = 1;
}
}
%fieldstat_e
= ();
}
if
(
$verbose
) {
print
LOG
$parser
->counter() .
"\n"
unless
(
$parser
->counter() % 100);
}
}