# Filename: Parse.pm
# Parse the Open Financial Exchange format
#
# Created January 30, 2008 Brandon Fosdick <bfoz@bfoz.net>
#
# Copyright 2008 Brandon Fosdick <bfoz@bfoz.net> (BSD License)
#
# $Id: Parse.pm,v 1.2 2008/03/04 04:22:27 bfoz Exp $
use strict;
use vars qw($VERSION);
$VERSION = sprintf("%d.%03d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/);
sub is_unique
{
my $a = shift;
return undef unless ref($a) eq 'ARRAY';
my %saw;
$saw{$_->{name}}++ || return 0 for @{$a};
1;
}
sub collapse
{
my $tree = shift;
return $tree unless ref($tree) eq 'ARRAY';
# Recurse on any elements that have arrays for content
$_->{content} = collapse($_->{content}) for( @{$tree} );
# The passed array can be converted to a hash if all of it's nodes have
# unique names
my %a;
if( is_unique($tree) )
{
$a{$_->{name}} = $_->{content} for ( @{$tree} );
}
else # Duplicate names can be converted to an array
{
my %b;
$b{$_->{name}}++ for @{$tree};
# grep(!$b{$_->{name}}++, @{$tree});
($b{$_} > 1) && ($a{$_} = []) for keys %b;
for( @{$tree} )
{
push(@{$a{$_->{name}}}, $_->{content}), next if $b{$_->{name}} > 1;
$a{$_->{name}} = $_->{content};
# ($b{$_->{name}} > 1) ? push(@{$a{$_->{name}}}, $_->{content}) :
# ($a{$_->{name}} = $_->{content});
}
}
return \%a;
}
sub parse_dates
{
my $tree = shift;
if( ref($tree) eq 'ARRAY' )
{
parse_dates($_) for @{$tree};
}
elsif( ref($tree) eq 'HASH' )
{
for( keys %{$tree} )
{
if( /^dt/ )
{
# Add seconds spacer
$tree->{$_} =~ s/^([0-9]{12})([0-9]{2})/$1:$2/;
# Add minutes spacer
$tree->{$_} =~ s/^([0-9]{10})([0-9]{2})/$1:$2/;
# Add date spacers
$tree->{$_} =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})/$1-$2\-$3 /;
# Add a leading zero to the timezone offset, if needed
$tree->{$_} =~ s/\[([-+]?)([0-9]):[A-Z]{3}\]/ $1\x30$2\x30\x30/;
# Handle timezone offsets that were already 2 digits
$tree->{$_} =~ s/\[([-+]?)([0-9]{1,2}):[A-Z]{3}\]/ $1$2\x30\x30/;
# Do the conversion
$tree->{$_} = str2time($tree->{$_});
}
else
{
parse_dates($tree->{$_});
}
}
}
}
sub parse
{
my ($header, $body) = split /\n\n/, shift, 2;
# Parse the OFX header block
$header =~ s/^\s//; # Strip leading whitespace
$header =~ s/\x0D//g; # Un-networkify newlines
my %header = split /[:\n]/, $header; # Convert to a hash
return undef unless ($header{OFXHEADER} == '100') and ($header{DATA} eq 'OFXSGML');
# $body =~ s/\x0D//g; # Un-networkify newlines
my $tree = Finance::OFX::Tree::parse($body);
return undef unless $tree and ($tree->[0]{name} eq 'ofx');
$tree = collapse($tree); # Collapse the parse tree into a hash
parse_dates($tree); # Convert date elements to Unix time
# Merge the header hash into the parse tree
$tree->{header} = \%header;
return $tree;
}
sub parse_file
{
my $file = shift;
return undef unless $file;
# my $text = do { local(@ARGV, $/) = $file; <> };
my $text = read_file($file);
return undef unless $text;
return parse($text);
}
1;
__END__
=head1 NAME
Finance::OFX::Parse - Parse the Open Financial Exchange protocol
=head1 SYNOPSIS
use Finance::OFX::Parse
my $tree = Finance::OFX::Parse::parse($ofxContent);
=head1 DESCRIPTION
C<Finance::OFX::Parse> provides two functions, C<parse()> and C<parse_file()>, that
accept an OFX "file" and return a reference to a hash tree representing the
contents of the file. C<parse()> expects the OFX content as a scalar argument
while C<parse_file> expects a filename.
Parsing well-formed OFX content returns a hash with two keys: 'ofx' and
'header'. The 'ofx' key is a reference to a hash tree representing the <OFX> block
and the 'header' key is a reference to a hash of header attributes. All date
values are automatically converted to UNIX time.
=head2 EXAMPLE
If C<$ofxContent> in the above code is...
OFXHEADER:100
DATA:OFXSGML
VERSION:102
SECURITY:NONE
ENCODING:USASCII
CHARSET:1252
COMPRESSION:NONE
OLDFILEUID:NONE
NEWFILEUID:NONE
<OFX>
<SIGNONMSGSRSV1>
<SONRS>
<STATUS>
<CODE>0
<SEVERITY>INFO
<MESSAGE>SonRq is successful
</STATUS>
<DTSERVER>20080220142819.321[-8:PST]
<LANGUAGE>ENG
<FI>
<ORG>DI
<FID>074014187
</FI>
</SONRS>
</SIGNONMSGSRSV1>
</OFX>
...the resulting HoH will be...
$VAR1 = {
'ofx' => {
'signonmsgsrsv1' => {
'sonrs' => {
'fi' => {
'org' => 'DI',
'fid' => '074014187'
},
'language' => 'ENG',
'status' => {
'severity' => 'INFO',
'message' => 'SonRq is successful',
'code' => '0'
},
'dtserver' => '1203546499.321'
}
}
},
'header' => {
'CHARSET' => '1252',
'OFXHEADER' => 100,
'OLDFILEUID' => 'NONE',
'COMPRESSION' => 'NONE',
'SECURITY' => 'NONE',
'ENCODING' => 'USASCII',
'NEWFILEUID' => 'NONE',
'DATA' => 'OFXSGML',
'VERSION' => '102'
}
};
=head1 FUNCTIONS
=over
=item $tree = parse($ofx)
C<parse()> accepts a single scalar argument containing the OFX data to be
parsed and retunrs a reference to a hash tree.
=item $tree = parse_file($file_name)
C<parse_file()> accepts a single scalar argument containing the path to a file
containing the OFX data to be parsed and retunrs a reference to a hash tree.
=back
=head1 SEE ALSO
L<HTML::Parser>
=head1 WARNING
From C<Finance::Bank::LloydsTSB>:
This is code for B<online banking>, and that means B<your money>, and
that means B<BE CAREFUL>. You are encouraged, nay, expected, to audit
the source of this module yourself to reassure yourself that I am not
doing anything untoward with your banking data. This software is useful
to me, but is provided under B<NO GUARANTEE>, explicit or implied.
=head1 AUTHOR
Brandon Fosdick, E<lt>bfoz@bfoz.netE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright 2008 Brandon Fosdick <bfoz@bfoz.net>
This software is provided under the terms of the BSD License.
=cut