NAME
Parse::Stallion::CSV - Comma Separated Values
SYNOPSIS
This is primarily for demonstrating Parse::Stallion.
use Parse::Stallion::CSV;
my $csv_stallion = new Parse::Stallion::CSV;
my $input_string = 'header1,header2,header3'."\n";
$input_string .= 'field_1_1,field_1_2,field_1_3'."\n";
$input_string .=
'"field_2_1 3 words",field_2_2 3 words,\"field3_2 x\"'."\n";
my $result = eval {$csv_stallion->
parse_and_evaluate({parse_this=>$input_string})};
if ($@) {
if ($csv_stallion->parse_failed) {#parse failed};
}
# $result should contain reference to a hase same as
{'header' => [ 'header1', 'header2', 'header3' ],
'records' => [
[ 'field_1_1', 'field_1_2', 'field_1_3' ],
[ 'field_2_1 3 words', 'field_2_2 3 words', '"field3_2 x"' ]
]
};
DESCRIPTION
Reads a comma separated value string, returning a reference to a hash containing the headers and the data.
The source of the grammar from the RFC and the implementation follow to demonstrate how one can use Parse::Stallion.
GRAMMAR SOURCE
The grammar used here is based on RFC 4180, see for example http://tools.ietf.org/html/rfc41801. The grammar represented by an ABNF grammar:
file = [header CRLF] record *(CRLF record) [CRLF]
header = name *(COMMA name)
record = field *(COMMA field)
name = field
field = (escaped / non-escaped)
escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
non-escaped = *TEXTDATA
COMMA = %x2C
CR = %x0D
DQUOTE = %x22
LF = %x0A
CRLF = CR LF
TEXTDATA = %x20-21 / %x23-2B / %x2D-7E
GRAMMAR IMPLEMENTATION
The following is the code used for handling the grammar
my %with_header_csv_rules = (
file => {
and=>
['header',
'CRLF',
'record',
{multiple => {and => ['CRLF', 'record']}},
{optional=> 'CRLF'}
],
evaluation => sub {
return {header => $_[0]->{header}, records => $_[0]->{record}};
}
},
header => {and=>['name', {multiple=>{and=>['COMMA', 'name']}}],
evaluation => sub {return $_[0]->{name}}
},
record => {and=>['field', {multiple=>{and=>['COMMA', 'field']}}],
evaluation => sub {return $_[0]->{field}}
},
name => {and=>['field']},
field => {or => ['escaped', 'non_escaped']},
escaped => {and => ['DQUOTE', 'inner_escaped', 'DQUOTE'],
evaluation => sub {return $_[0]->{inner_escaped}}
},
inner_escaped =>{
multiple=>{or=>['TEXTDATA','COMMA','CR','LF','DDQUOTE'],
rule_name => 'ie_choices'
},
evaluation => sub {
my $param = shift;
return join('', @{$param->{'ie_choices'}});
}
},
DDQUOTE => {and=>['DQUOTE','DQUOTE'],
evaluation => sub {return '"'},
},
non_escaped => {and=>['TEXTDATA']},
COMMA => {leaf=>qr/\x2C/},
CR => {leaf=>qr/\x0D/},
DQUOTE => {leaf=>qr/\x22/},
LF => {leaf=>qr/\x0A/},
CRLF => {leaf=>qr/\n/},
TEXTDATA => {leaf=>qr/[\x20-\x21\x23-\x2B\x2D-\x7E]+/,
);
sub new {
my $self = shift;
my $parameters = shift;
return new Parse::Stallion({
rules_to_set_up_hash=>\%with_header_csv_rules,
start_rule=>'file'});
}