NAME
PICA::Path - PICA path expression to match field and subfield values
SYNOPSIS
use PICA::Path;
use PICA::Parser::Plain;
# extract URLs from PIC Records, given from STDIN
my $urlpath = PICA::Path->new('009P$a');
my $parser = PICA::Parser::Plain->new(\*STDIN);
while ( my $record = $parser->next ) {
print "$_\n" for $urlpath->record_subfields($record);
}
DESCRIPTION
PICA path expressions can be used to match fields and subfields of PICA::Data records or equivalent record structures. An instance of PICA::Path is a blessed array reference, consisting of the following fields:
regular expression to match field tags against
regular expression to match occurrences against, or range of occurrences values given as array reference (from-to), or undefined
regular expression to match subfields against
substring start position
substring end position
Matching rules
Example record:
...
use PICA::Path;
# PICA::Data record
my $record = {
record => [
[ '005A', '', '0', '1234-5678' ],
[ '005A', '', '0', '1011-1213' ],
[ '009Q', '', 'u', 'http://example.org/', 'x', 'A', 'z', 'B', 'z', 'C' ],
[ '021A', '', 'a', 'Title', 'd', 'Supplement' ],
[ '031N', '', 'j', '1600', 'k', '1700', 'j', '1800', 'k', '1900', 'j', '2000' ],
[ '045F', '01', 'a', '001' ],
[ '045F', '02', 'a', '002' ],
[ '045U', '', 'e', '003', 'e', '004' ],
[ '045U', '', 'e', '005' ]
],
_id => 1234
};
# create path
my $path = PICA::Path->new('021A$ab');
# match record
my $match = $path->match_record($record);
# $match = 'TitleSupplement'
Match single field with no subfield repetition
Field 021A
has only unique subfield codes.
# get all subfields
$path = PICA::Path->new('021A');
$match = $path->match($record);
# $match = 'TitleSupplement'
# get single subfield by code
$path = PICA::Path->new('021A$a');
$match = $path->match($record);
# $match = 'Title'
# get two subfields by code
$path = PICA::Path->new('021A$ad');
$match = $path->match($record);
# $match = 'TitleSupplement');
$path = PICA::Path->new('021A$da');
$match = $path->match($record);
# $match = 'TitleSupplement'
# get two subfields by code in specific order
$path = PICA::Path->new('021A$da');
$match = $path->match($record, pluck => 1);
# $match = 'SupplementTitle'
# join subfields
$path = PICA::Path->new('021A$da');
$match = $path->match($record, pluck => 1, join => ' ');
# $match = 'Supplement Title'
Option split
creates a list out of subfields:
# split subfields to list
$path = PICA::Path->new('021A$da');
$match = $path->match($record, split => 1);
# $match = ['Title', 'Supplement']
Option nested_arrays
creates a list for every field found:
# split fields to lists
$path = PICA::Path->new('021A$da');
$match = $path->match($record, split => 1, nested_arrays => 1);
# $match = [['Title', 'Supplement']]
Match single field with subfield repetition
Field 009Q
has repeated subfields.
# get all subfields
$path = PICA::Path->new('009Q');
$match = $path->match($record);
# $match = 'http://example.orgABC'
# get repeated subfields
$path = PICA::Path->new('009Q$z');
$match = $path->match($record);
# $match = 'BC'
Option split
creates a list out of subfields:
# split subfields to list
$path = PICA::Path->new('009Q');
$match = $path->match($record, split => 1);
# $match = ['http://example.org', 'A', 'B', 'C']
# split subfields to list
$path = PICA::Path->new('009Q$z');
$match = $path->match($record, split => 1);
# $match = ['B', 'C']
Option nested_arrays
creates a list for every field found:
# split fields to lists
$path = PICA::Path->new('009Q$z');
$match = $path->match($record, split => 1, nested_arrays => 1);
# $match = [['B', 'C']]
Match repeated Field with no subfield repetition
Field 005A
is repeated.
# get all subfields
$path = PICA::Path->new('009Q');
$match = $path->match($record);
# $match = '1234-56781011-1213'
# get subfields by code
$path = PICA::Path->new('009Q');
$match = $path->match($record);
# $match = '1234-56781011-1213'
Option split
creates a list out of subfields:
# split subfields to list
$path = PICA::Path->new('005A$0');
$match = $path->match($record, split => 1);
# $match = ['1234-5678', '1011-1213']
```
Option nested_arrays
creates a list for every field found:
# split fields to lists
$path = PICA::Path->new('005A$0');
$match = $path->match($record, split => 1, nested_arrays => 1);
# $match = [['1234-5678'], ['1011-1213']]
Match repeated field with subfield repetition
Field 045U
is repeated and has repeated subfields.
# get all subfields
$path = PICA::Path->new('045U');
$match = $path->match($record);
# $match = '003004005'
# get subfields by code
$path = PICA::Path->new('045U$e');
$match = $path->match($record);
# $match = '003004005'
Option split
creates a list out of subfields:
# split subfields to list
$path = PICA::Path->new('045U$e');
$match = $path->match($record, split => 1);
# $match = ['003', '004', '005']
Option nested_arrays
creates a list for every field found:
# split fields to lists
$path = PICA::Path->new('045U$e');
$match = $path->match($record, split => 1, nested_arrays => 1);
# $match = [['003', '004'], ['005']]
Match repeated field with occurrence
Field 045F
is repeated and has occurrences.
# get subfield from field with specific occurrence
$path = PICA::Path->new('045F[01]$a');
$match = $path->match($record);
# $match = '001'
# get subfield from field with wildcard for occurrence
$path = PICA::Path->new('045F[0.]$a');
$match = $path->match($record);
# $match = '001002'
Option split
creates a list out of subfields:
# split subfields to list
$path = PICA::Path->new('045F[0.]$a');
$match = $path->match($record, split => 1);
# $match = ['001', '002']
Match the whole record with wildcards
The dot (.) is a wildcard for field tags, occurrence and subfield codes.
The path .....
means take any subfield from any field.
# get all subfields from all fields
$path = PICA::Path->new('....$.');
$match = $path->match($record);
# $match = '1234-56781011-1213http://example.org/ABCTitleSupplement16001700180019002000001002003004005'
# get specific subfield from all fields
$path = PICA::Path->new('....$a');
$match = $path->match($record);
# $match = 'Title001002'
Option split
creates a list out of subfields:
# split subfields to list
$path = PICA::Path->new('....$a');
$match = $path->match($record, split => 1);
# $match = ['Title', '001', '002']
Option nested_arrays
creates a list for every field found:
# split fields to lists
$path = PICA::Path->new('....');
$match = $path->match($record, split => 1, nested_arrays => 1);
# $match = [['1234-5678'], ['1011-1213'], [ 'http://example.org/', 'A', 'B', 'C', ], [ 'Title', 'Supplement' ], [ 1600, 1700, 1800, 1900, 2000, ], ['001'], ['002'], [ '003', '004' ], ['005']]
METHODS
new( $expression [, position_as_occurrence => 1 ] )
Create a PICA path by parsing the path expression. The expression consists of
A tag, consisting of three digits, the first
0
to2
, followed by a digit or@
. The character.
can be used as wildcard.An optional occurrence, given by two or three digits (or
.
as wildcard) in brackets, e.g.[12]
,[0.]
or[102]
.An optional list of subfields. Allowed subfield codes include
_A-Za-z0-9
.An optional position, preceded by
/
. Both single characters (e.g./0
for the first), and character ranges (such as2-4
,-3
,2-
...) are supported.
If option position_as_occurrence
is set, positions will be read as occurrences, e.g. /2-4
is read as [2-4]
.
match_record( $record, %options )
Returns matched fields as string or array reference.
Optional parameter:
- join STRING
-
By default all the matched values are joined into a string without a field separator. Use the join function to set the separator. Default: '' (empty string).
my $record = { _id => 123X, record => [[ '021A', '', 'a', 'Title', 'd', 'Supplement' ]] } my $path = PICA::Path->new( '021A' ); my $match = $path->match_record( $record, join => ' - ' ); # $match = 'Title - Supplement'
- pluck 0|1
-
Be default, all subfields are added to the mapping in the order they are found in the record. Using the pluck option, one can select the required order of subfields to map. Default: 0.
my $record = { _id => 123X, record => [[ '021A', '', 'a', 'Title', 'd', 'Supplement' ]] } my $path = PICA::Path->new( '021A' ); my $match = $path->match_record( $record, pluck => 1 ); # $match = 'SupplementTitle'
- split 0|1
-
When split is set to 1 then all mapped values will be joined into an array instead of a string. Default: 0.
my $record = { _id => 123X, record => [[ '021A', '', 'a', 'Title', 'd', 'Supplement' ]] } my $path = PICA::Path->new( '021A' ); my $match = $path->match_record( $record, split => 1 ); # $match = [ 'Title', 'Supplement' ]
- nested_arrays 0|1
-
When the split option is specified the output of the mapping will always be an array of strings (one string for each subfield found). Using the nested_array option the output will be an array of array of strings (one array item for each matched field, one array of strings for each matched subfield). Default: 0.
my $record = { _id => 123X, record => [[ '045U', '', 'e', '003', 'e', '004' ], [ '045U', '', 'e', '005' ]] } my $path = PICA::Path->new( '045U' ); my $match = $path->match_record( $record, nested_arrays => 1 ); # $match = [[ '003', '004'], ['005' ]]
- force_array 0|1
-
Force array as return value. Default: 0.
my $record = { _id => 123X, record => [[ '021A', '', 'a', 'Title', 'd', 'Supplement' ]] } my $path = PICA::Path->new( '021A' ); my $match = $path->match_record( $record, force_array => 1 ); # $match = [ 'TitleSupplement' ]
match_field( $field )
Check whether a given PICA field matches the field and occurrence of this path. Returns the $field
on success.
filter_record_fields( $record )
Returns an array reference with fields of a PICA::Data that match the path. Subfield codes are ignored.
match_subfields( $field )
Returns a list of matching subfields (optionally trimmed by from and length) without inspection field and occurrence values.
stringify( [ $short ] )
Stringifies the PICA path to normalized form. Subfields are separated with $
, unless called as stringify(1)
or the first subfield is $
.
fields
Return the stringified field expression or undefined.
subfields
Return the stringified subfields expression or undefined.
occurrences
Return the stringified occurrences expression or undefined.
positions
Return the stringified position or undefined.