use
MARC::File::XML (
BinaryEncoding
=>
'UTF-8'
,
DefaultEncoding
=>
'UTF-8'
,
RecordFormat
=>
'MARC21'
);
has
type
=> (
is
=>
'ro'
,
default
=>
sub
{
'USMARC'
});
has
id
=> (
is
=>
'ro'
,
default
=>
sub
{
'001'
});
has
records
=> (
is
=>
'rw'
);
sub
aleph_generator {
my
$self
=
shift
;
sub
{
state
$fh
=
$self
->fh;
state
$prev_id
;
state
$record
= [];
while
(<
$fh
>) {
chop
;
next
unless
(
length
$_
>= 18);
my
(
$sysid
,
$s1
,
$tag
,
$ind1
,
$ind2
,
$s2
,
$char
,
$s3
,
$data
) =
unpack
(
"A9A1A3A1A1A1A1A1U0A*"
,
$_
);
unless
(
$tag
=~ m{^[0-9A-Z]+}) {
warn
"skipping $sysid $tag unknown tag"
;
next
;
}
unless
(
$ind1
=~ m{[A-Za-z0-9]}) {
$ind1
=
" "
;
}
unless
(
$ind2
=~ m{[A-Za-z0-9]}) {
$ind2
=
" "
;
}
unless
(utf8::decode(
$data
)) {
warn
"skipping $sysid $tag unknown data"
;
next
;
}
if
(
$tag
eq
'LDR'
) {
$data
=~ s/\^/ /g;
}
my
@parts
= (
'_'
,
split
(/\$\$(.)/,
$data
) );
unless
(
$tag
=~ /FMT|LDR|00./o) {
shift
@parts
;
shift
@parts
;
}
push
(
@parts
,
''
)
unless
int
(
@parts
) % 2 == 0;
if
(
@$record
> 0 &&
$tag
eq
'FMT'
) {
my
$result
= {
_id
=>
$prev_id
,
record
=> [
@$record
] };
$record
= [[
$tag
,
$ind1
,
$ind2
,
@parts
]];
$prev_id
=
$sysid
;
return
$result
;
}
push
@$record
, [
$tag
,
$ind1
,
$ind2
,
@parts
];
$prev_id
=
$sysid
;
}
if
(
@$record
> 0) {
my
$result
= {
_id
=>
$prev_id
,
record
=> [
@$record
] };
$record
= [];
return
$result
;
}
else
{
return
;
}
};
}
sub
marc_generator {
my
(
$self
) =
@_
;
my
$type
=
$self
->type;
my
$file
;
if
(
$type
eq
'USMARC'
) {
$file
= MARC::File::USMARC->in(
$self
->fh);
}
elsif
(
$type
eq
'MicroLIF'
) {
$file
= MARC::File::MicroLIF->in(
$self
->fh);
}
elsif
(
$type
eq
'XML'
) {
$file
= MARC::File::XML->in(
$self
->fh);
}
else
{
die
"unknown"
;
}
sub
{
$self
->decode_marc(
$file
->
next
());
}
}
sub
record_generator {
my
(
$self
) =
@_
;
my
@records
= @{
$self
->records};
sub
{
$self
->decode_marc(
shift
@records
);
}
}
sub
generator {
my
(
$self
) =
@_
;
my
$type
=
$self
->type;
if
(
$self
->records) {
return
$self
->record_generator;
}
if
(
$type
=~ /^USMARC|MicroLIF|XML$/) {
return
$self
->marc_generator;
}
if
(
$type
eq
'ALEPHSEQ'
) {
return
$self
->aleph_generator;
}
die
"need USMARC, MicroLIF, XML, ALEPHSEQ or MARC::Record"
;
}
sub
decode_marc {
my
(
$self
,
$record
) =
@_
;
return
unless
eval
{
$record
->isa(
'MARC::Record'
) };
my
@result
= ();
push
@result
, [
'LDR'
,
undef
,
undef
,
'_'
,
$record
->leader ];
for
my
$field
(
$record
->fields()) {
my
$tag
=
$field
->tag;
my
$ind1
=
$field
->indicator(1);
my
$ind2
=
$field
->indicator(2);
my
@sf
= ();
if
(
$field
->is_control_field) {
push
@sf
,
'_'
,
$field
->data;
}
for
my
$subfield
(
$field
->subfields) {
push
@sf
,
@$subfield
;
}
push
@result
, [
$tag
,
$ind1
,
$ind2
,
@sf
];
}
my
$sysid
=
undef
;
my
$id
=
$self
->id;
if
(
$id
=~ /^00/ &&
$record
->field(
$id
)) {
$sysid
=
$record
->field(
$id
)->data();
}
elsif
(
$id
=~ /^(\d{3})([\da-zA-Z])$/) {
my
$field
=
$record
->field($1);
$sysid
=
$field
->subfield($2)
if
(
$field
);
}
elsif
(
defined
$id
&&
$record
->field(
$id
)) {
$sysid
=
$record
->field(
$id
)->subfield(
"a"
);
}
return
{
_id
=>
$sysid
,
record
=> \
@result
};
}
1;