From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

package PICA::Field;
{
$PICA::Field::VERSION = '0.585';
}
#ABSTRACT: Perl extension for handling PICA+ fields
use strict;
use base qw(Exporter);
use Carp qw(croak);
our @EXPORT = qw(parse_pp_tag);
our $SUBFIELD_INDICATOR = "\x1F"; # 31
our $START_OF_FIELD = "\x1E"; # 30
our $END_OF_FIELD = "\x0A"; # 10
our $FIELD_TAG_REGEXP = qr/[012][0-9][0-9][A-Z@]$/;
our $FIELD_OCCURRENCE_REGEXP = qr/[0-9][0-9]$/;
our $SUBFIELD_CODE_REGEXP = qr/^[0-9a-zA-Z]$/;
'bool' => sub { ! $_[0]->empty; },
'""' => sub { $_[0]->string; };
use sort 'stable';
sub new($) {
my $class = shift;
$class = ref($class) || $class;
my $tag = shift;
$tag or croak( "No tag provided." );
if (not @_) { # empty field
return PICA::Field->parse($tag);
}
my ($occurrence, $tagno) = parse_pp_tag($tag);
defined $tagno or croak( "\"$tag\" is not a valid tag." );
my $self = bless {
_tag => $tagno,
_occurrence => $occurrence,
_subfields => [],
}, $class;
$self->add(@_);
return $self;
}
sub copy {
my $self = shift;
my $tagno = $self->{_tag};
my $occurrence = $self->{_occurrence};
my $copy = bless {
_tag => $tagno,
_occurrence => $occurrence,
}, ref($self);
$copy->add( @{$self->{_subfields}} );
return $copy;
}
sub parse {
my $class = shift;
$class = ref($class) || $class;
my $data = shift;
my $tag_filter_func = shift;
# TODO: better manage different parsing modes (normalized, plain, WinIBW...)
my $END_OF_FIELD = qr/[\x0A\x0D]+/; # local
$data =~ s/^$START_OF_FIELD//;
$data =~ s/$END_OF_FIELD$//;
my $self = bless {}, $class;
my ($tagno, $subfields) = ($data =~ /([^\$\x1F\x83\s]+)\s?(.*)/);
return if $tag_filter_func and !$tag_filter_func->($tagno);
# TODO: better manage different parsing modes (normalized, plain, WinIBW...)
my $sfreg;
my $sf = defined $subfields ? substr($subfields, 0, 1) : '';
if ($sf eq "\x1F") { $sfreg = '\x1F'; }
elsif ( $sf eq '$' ) { $sfreg = '\$'; }
elsif( $sf eq "\x83" ) { $sfreg = '\x83'; }
elsif( $sf eq "\x9f" ) { $sfreg = '\x9f'; }
elsif( $sf eq '') {
return $self->new($tagno,'');
} else {
croak("not allowed subfield indicator (ord: " . ord($sf) . ") specified");
}
$sfreg = '('.$sfreg.'[0-9a-zA-Z])';
my @sfields = split($sfreg, $subfields);
shift @sfields;
my @subfields = ();
my ($value, $code);
while (@sfields) {
$code = shift @sfields;
$code = substr($code, 1);
$value = shift @sfields;
next unless defined $value;
$value =~ s/\$\$/\$/g if $sf eq '$';
$value =~ s/\s+/ /gm;
push(@subfields, ($code, $value));
}
return $self->new($tagno, @subfields);
}
sub tag {
my $self = shift;
my $tag = shift;
if (defined $tag) {
my ($occurrence, $tagno) = parse_pp_tag($tag);
defined $tagno or croak( "\"$tag\" is not a valid tag." );
$self->{_tag} = $tagno;
$self->{_occurrence} = $occurrence;
}
return $self->{_tag} . ($self->{_occurrence} ? ("/" . $self->{_occurrence}) : "");
}
sub occurrence {
my $self = shift;
my $occurrence = shift;
if (defined $occurrence) {
croak unless $occurrence >= 0 and $occurrence <= 99;
$self->{_occurrence} = sprintf("%02d", $occurrence);
}
return $self->{_occurrence};
}
# Shortcut
*occ = \&occurrence;
sub level {
my $self = shift;
return substr($self->{_tag},0,1);
}
sub subfield {
my $self = shift;
my $codes = $_[0];
if (ref($codes) ne 'Regexp') {
$codes = join('',@_);
if ($codes eq '') {
$codes = qr/./;
} else {
$codes = qr/[$codes]/;
}
}
my @list;
my @data = @{$self->{_subfields}};
for ( my $i=0; $i < @data; $i+=2 ) {
next unless $data[$i] =~ $codes;
my $value = $data[$i+1];
$value =~ s/\s+/ /gm;
if ( wantarray ) {
push( @list, $value );
} else {
return $value;
}
}
return $list[0] unless wantarray;
return @list;
}
# Shortcut
*sf = \&subfield;
sub content {
my $self = shift;
my $codes = join('',@_);
$codes = $codes eq '' ? '.' : "[$codes]";
$codes = qr/$codes/;
my @list;
my @data = @{$self->{_subfields}};
for ( my $i=0; $i < @data; $i+=2 ) {
next unless $data[$i] =~ $codes;
push( @list, [ $data[$i], $data[$i+1] ] );
}
return @list;
}
sub add {
my $self = shift;
my $nfields = @_ / 2;
($nfields >= 1) or return 0;
for my $i ( 1..$nfields ) {
my $offset = ($i-1)*2;
my $code = $_[$offset];
my $value = $_[$offset+1];
$value = defined $value ? "$value" : "";
$value =~ s/\s+/ /gm;
croak( "Subfield code \"$code\" is not a valid subfield code" )
if !($code =~ $SUBFIELD_CODE_REGEXP);
push( @{$self->{_subfields}}, $code, $value );
}
return $nfields;
}
sub update {
my $self = shift;
my %values;
my @order;
# collect values into a hash of array references
while( @_ ) {
my $c = shift;
croak( "Subfield code \"$c\" is not a valid subfield code" )
unless $c =~ $SUBFIELD_CODE_REGEXP;
my $v = shift;
if ( exists $values{$c} ) {
push @{$values{$c}}, (UNIVERSAL::isa($v,'ARRAY') ? @{$v} : $v);
} else {
push @order, $c;
$values{$c} = UNIVERSAL::isa($v,'ARRAY') ? $v : [ $v ];
}
}
my @data;
my $changes = 0;
while ( @{$self->{_subfields}} ) {
my $code = shift @{$self->{_subfields}};
my $value = shift @{$self->{_subfields}};
if ( exists $values{$code} ) {
if ( defined $values{$code} ) {
my @vals = grep { defined $_ } @{$values{$code}};
push @data, map { $code => "$_" } @vals;
$changes += scalar @vals;
$values{$code} = undef;
}
# TODO: better count
} else {
# keep subfield unchanged
push @data, $code => $value;
}
}
## append new subfields in their order
foreach my $code ( @order ) {
next unless defined $values{$code};
my @vals = grep { defined $_ } @{$values{$code}};
$changes += scalar @vals;
push @data, map { $code => "$_" } @vals;
}
## synchronize our subfields
$self->{_subfields} = \@data;
return $changes;
}
sub replace {
my $self = shift;
my $new;
if (@_ and UNIVERSAL::isa($self,'PICA::Field')) {
$new = shift;
} else {
$new = PICA::Field->new(@_);
}
%$self = %$new;
}
sub empty_subfields {
my $self = shift;
my @list;
my @data = @{$self->{_subfields}};
while ( defined( my $code = shift @data ) ) {
push (@list, $code) if shift @data eq "";
}
return @list;
}
sub empty {
my $self = shift;
return 1 unless @{$self->{_subfields}};
my @data = @{$self->{_subfields}};
while ( defined( my $code = shift @data ) ) {
return 0 if shift @data ne "";
}
return 1;
}
sub purged {
my $self = shift;
my @subfields;
my $code;
foreach (@{$self->{_subfields}}) {
if (defined $code) {
push @subfields, ($code, $_) if defined $_ and $_ ne "";
undef $code;
} else {
$code = $_;
}
}
return unless @subfields;
my $copy = bless {
_tag => $self->{_tag},
_occurrence => $self->{_occurrence},
_subfields => \@subfields
}, ref($self);
return $copy;
}
sub normalized {
my $self = shift;
my $subfields = shift;
return $self->string(
subfields => $subfields,
startfield => $START_OF_FIELD,
endfield => $END_OF_FIELD,
startsubfield => $SUBFIELD_INDICATOR
);
}
sub sort {
my ($self, $order) = @_;
return unless @{$self->{_subfields}};
$order = "" unless defined $order;
my (%pos,$i);
for (split('',$order.'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) {
$pos{$_} = $i++ unless defined $pos{$_};
}
my @sf = @{$self->{_subfields}};
my $n = @sf / 2 - 1;
my @sorted = ();
@sorted = sort {
$pos{$sf[2*$a]} <=> $pos{$sf[2*$b]}
} (0..$n);
$self->{_subfields} = [ map { $sf[2*$_] => $sf[2*$_+1] } @sorted ];
}
sub size {
my $self = shift;
return @{$self->{_subfields}} / 2;
}
sub string {
my $self = shift;
my (%args) = @_ ? @_ : ();
my $subfields = defined($args{subfields}) ? $args{subfields} : '';
my $startfield = defined($args{startfield}) ? $args{startfield} : '';
my $endfield = defined($args{endfield}) ? $args{endfield} : "\n";
my $startsubfield = defined($args{startsubfield}) ? $args{startsubfield} : '$';
my @subs;
my $subs = $self->{_subfields};
my $nfields = @$subs / 2;
for my $i ( 1..$nfields ) {
my $offset = ($i-1)*2;
my $code = $subs->[$offset];
my $value = $subs->[$offset+1];
if (!$subfields || $code =~ /^[$subfields]$/) {
$value =~ s/\$/\$\$/g if $startsubfield eq '$';
push( @subs, $code.$value )
}
} # for
return "" unless @subs; # no subfields => no field
my $occ = '';
$occ = "/" . $self->{_occurrence} if defined $self->{_occurrence};
return $startfield .
$self->{_tag} . $occ . ' ' .
$startsubfield . join( $startsubfield, @subs ) .
$endfield;
}
# Write the field to a L<XML::Writer> object
my $write_xml = sub {
my ($self, $writer) = @_;
my ($datafield, $subfield);
if (UNIVERSAL::isa( $writer, 'XML::Writer::Namespaces' )) {
$datafield = [$PICA::Record::XMLNAMESPACE, 'datafield'];
$subfield = [$PICA::Record::XMLNAMESPACE, 'subfield'];
} else {
$datafield = 'datafield';
$subfield = 'subfield';
}
my %attr = ('tag' => $self->{_tag});
$attr{occurrence} = $self->{_occurrence} if defined $self->{_occurrence};
$writer->startTag( $datafield, %attr );
my $subs = $self->{_subfields};
my $nfields = @$subs / 2;
if ($nfields) {
for my $i ( 1..$nfields ) {
my $offset = ($i-1)*2;
$writer->startTag( $subfield, code => $subs->[$offset] );
$writer->characters( $subs->[$offset+1] );
$writer->endTag(); # subfield
}
}
$writer->endTag(); # datafield
$writer;
};
sub xml {
my $self = shift;
my %param;
if ( UNIVERSAL::isa( $_[0], 'XML::Writer' ) ) {
(%param) = ( writer => @_ );
} elsif ( ref($_[0]) ) {
(%param) = ( OUTPUT => @_ );
} else {
(%param) = @_;
}
if ( defined $param{writer} ) {
$write_xml->( $self, $param{writer} );
return $param{writer};
} else {
my ($string, $sref);
if (not defined $param{OUTPUT}) {
$sref = \$string;
$param{OUTPUT} = $sref;
}
my $writer = PICA::Writer::xmlwriter( %param );
$write_xml->( $self, $writer );
return defined $sref ? "$string" : $writer;
}
}
sub html {
my $self = shift;
my %options = @_;
# CSS classes (TODO: customize)
my $field = 'field';
my $tag = 'tag';
my $tagcode = 'tagcode';
my $occurrence = 'occurrence';
my $sfcode = 'sfcode';
my $sfindicator = 'sfindicator';
my $html = "<div class='$field'><span class='$tag'>"
. "<span class='$tagcode'>" . $self->{_tag} . "</span>";
if (defined $self->{_occurrence}) {
$html .= "/<span class='$occurrence'>"
. $self->{_occurrence} . "</span>";
} else {
# TODO: in monospaced mode only
# $html .= "&#xA0;&#xA0;&#xA0;";
}
$html .= "</span> "; # tag
my $subs = $self->{_subfields};
my $nfields = @$subs / 2;
if ($nfields) {
for my $i ( 1..$nfields ) {
my $offset = ($i-1)*2;
my $code = $subs->[$offset];
my $text = $subs->[$offset+1];
$html .= "<span class='$sfindicator'>\$</span>"
. "<span class='$sfcode'>$code</span>";
$text =~ s/&/&amp;/g;
$text =~ s/</&lt;/g;
$html .= $text; # TODO: character encoding (?)
}
}
return $html . "</div>\n";
}
sub parse_pp_tag {
my $tag = shift;
my ($tagno, $occurrence) = split ('/', $tag);
undef $tagno unless defined $tagno and $tagno =~ $FIELD_TAG_REGEXP;
undef $occurrence unless defined $occurrence and $occurrence =~ $FIELD_OCCURRENCE_REGEXP;
return ($occurrence, $tagno);
}
1;
__END__
=pod
=encoding utf-8
=head1 NAME
PICA::Field - Perl extension for handling PICA+ fields
=head1 VERSION
version 0.585
=head1 SYNOPSIS
use PICA::Field;
my $field = PICA::Field->new( '028A',
'9' => '117060275',
'8' => 'Martin Schrettinger'
);
$field->add( 'd' => 'Martin', 'a' => 'Schrettinger' );
$field->update( "8", "Schrettinger, Martin" );
print $field->normalized;
print $field->xml;
=head1 DESCRIPTION
Defines PICA+ fields for use in the PICA::Record module.
=head1 EXPORT
The method C<parse_pp_tag> is exported.
=head1 METHODS
=head2 new ( [...] )
The constructor, which will return a C<PICA::Field> object or croak on error.
You can call the constructor with a tag and a list of subfields:
PICA::Field->new( '028A',
'9' => '117060275',
'8' => 'Martin Schrettinger'
);
With a string of normalized PICA+ data of one field:
PICA::Field->new("\x1E028A \x1F9117060275\x1F8Martin Schrettinger\x0A');
With a string of readable PICA+ data:
PICA::Field->new('028A $9117060275$8Martin Schrettinger');
=head2 copy ( $field )
Creates and returns a copy of this object.
=head2 parse ( $string, [, \&tag_filter_func ] )
The constructur will return a PICA::Field object based on data that is
parsed if null if the filter dropped the field. Dropped fields will not
be parsed so they are also not validated.
The C<$tag_filter_func> is an optional reference to a user-supplied
function that determines on a tag-by-tag basis if you want the tag to
be parsed or dropped. The function is passed the tag number (including
occurrence), and must return a boolean.
For example, if you only want to 021A fields, try this:
The filter function can be used to select only required fields
sub filter {
my $tagno = shift;
return $tagno eq "021A";
}
my $field = PICA::Field->parse( $string, \&filter );
=head2 tag ( [ $tag ] )
Returns the PICA+ tag and occurrence of the field. Optionally sets tag (and occurrence) to a new value.
=head2 occurrence ( [ $occurrence ] ) or occ ( ... )
Returns the ocurrence or undef. Optionally sets the ocurrence to a new value.
=head2 level ( )
Returns the level (0: main, 1: local, 2: copy) of this field.
=head2 subfield ( [ $code(s) ] ) or sf ( ... )
Return selected or all subfield values. If you specify
one ore more subfield codes, only matching subfields are
returned. When called in a scalar context returns only the
first (matching) subfield. You may specify multiple subfield codes:
my $subfield = $field->subfield( 'a' ); # first $a
my $subfield = $field->subfield( 'acr' ); # first of $a, $c, $r
my $subfield = $field->subfield( 'a', 'c', 'r' ); # the same
my @subfields = $field->subfield( '0-9' ); # $0 ... $9
my @subfields = $field->subfield( qr/[0-9]/ ); # $0 ... $9
my @subfields = $field->subfield( 'a' );
my @all_subfields = $field->subfield();
If no matching subfields are found, C<undef> is returned in a scalar
context or an empty list in a list context.
Remember that there can be more than one subfield of a given code!
=head2 content ( [ $code(s) ] )
Return selected or all subfields as an array of arrays. If you specify
one ore more subfield codes, only matching subfields are returned. See
the C<subfield> method for more examples.
This shows the subfields from a 021A field:
[
[ 'a', '@Traité de documentation' ],
[ 'd', 'Le livre sur le livre ; Théorie et pratique' ],
[ 'h', 'Paul Otlet' ]
]
=head2 add ( $code, $value [, $code, $value ...] )
Adds subfields to the end of the subfield list.
Whitespace in subfield values is normalized.
$field->add( 'c' => '1985' );
Returns the number of subfields added.
=head2 update ( $sf => $value [ $sf => $value ...] )
Allows you to change the values of the field for one or more given subfields:
$field->update( a => 'Little Science, Big Science' );
If you attempt to update a subfield which does not currently exist in the field,
then a new subfield will be appended. If you don't like this auto-vivification
you must check for the existence of the subfield prior to update.
if ( defined $field->subfield( 'a' ) ) {
$field->update( 'a' => 'Cryptonomicon' );
}
Instead of a single value you can also pass an array reference. The following
statements should have the same result:
$field->update( 'x', 'foo', 'x', 'bar' );
$field->update( 'x' => ['foo', 'bar'] );
To remove a subfield, update it to undef or an empty array reference:
$field->update( 'a' => undef );
$field->update( 'a' => [] );
=head2 replace ( $field | ... )
Allows you to replace an existing field with a new one. You may pass a
C<PICA::Field> object or parameters for a new field to replace the
existing field with. Replace does not return a meaningful or reliable value.
=head2 empty_subfields ( )
Returns a list of all codes of empty subfields.
=head2 empty ( )
Test whether there are no subfields or all subfields are empty. This method
is automatically called by overloading whenever a PICA::Field is converted
to a boolean value.
=head2 purged ( )
Remove a copy of this field with empty subfields
removed or undef if the whole field is empty.
=head2 normalized ( [$subfields] )
Returns the field as a string. The tag number, occurrence and
subfield indicators are included.
If C<$subfields> is specified, then only those subfields will be included.
=head2 sort ( [ $order ] )
Sort subfields by subfield indicators. You can optionally specify an order as string of subfield codes.
=head2 size
Returns the number of subfields (no matter if empty or not).
=head2 string ( [ %params ] )
Returns a pretty string for printing.
Returns the field as a string. The tag number, occurrence and
subfield indicators are included.
If C<subfields> is specified, then only those subfields will be included.
Fields without subfields return an empty string.
=head2 xml ( [ [ writer => ] $writer | [ OUTPUT ] => \$sref | %param ] )
Return the field in PICA-XML format or write it to an L<XML::Writer>
and return the writer. If you provide parameters, they will be passed
to a newly created XML::Writer that is used to write to a string.
By default the PICA-XML namespaces with namespace prefix 'pica' is
included. In addition to XML::Writer this methods knows the 'header'
parameter that first adds the XML declaration.
=head2 html ( [ %options ] )
Returns a HTML representation of the field for browser display. See also
the C<pica2html.xsl> script to generate a more elaborated HTML view from
PICA-XML.
=head1 STATIC METHODS
=head2 parse_pp_tag tag ( $tag )
Tests whether a string can be used as a tag/occurrence specifier. A tag
indicator consists of a 'type' (00-99) and an 'indicator' (A-Z and @),
both conflated as the 'tag', and an optional occurrence (00-99). This
method returns a list of two values: occurrence and tag (this order!).
It can be used to parse and test tag specifiers this ways:
($occurrence, $tag) = parse_pp_tag( $t );
parse_pp_tag( $t ) or print STDERR "Not a valid tag: $t\n";
=head1 SEE ALSO
This module was inspired by L<MARC::Field> by Andy Lester.
=head1 AUTHOR
Jakob Voß <voss@gbv.de>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2014 by Verbundzentrale Goettingen (VZG) and Jakob Voss.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut