——package
MIME::Parser::Reader;
=head1 NAME
MIME::Parser::Reader - a line-oriented reader for a MIME::Parser
=head1 SYNOPSIS
This module is used internally by MIME::Parser; you probably
don't need to be looking at it at all. But just in case...
### Create a top-level reader, where chunks end at EOF:
$rdr = MIME::Parser::Reader->new();
### Spawn a child reader, where chunks also end at a boundary:
$subrdr = $rdr->spawn->add_boundary($bound);
### Spawn a child reader, where chunks also end at a given string:
$subrdr = $rdr->spawn->add_terminator($string);
### Read until boundary or terminator:
$subrdr->read_chunk($in, $out);
=head1 DESCRIPTION
A line-oriented reader which can deal with virtual end-of-stream
defined by a collection of boundaries.
B<Warning:> this is a private class solely for use by MIME::Parser.
This class has no official public interface
=cut
use
strict;
### All possible end-of-line sequences.
### Note that "" is included because last line of stream may have no newline!
my
@EOLs
= (
""
,
"\r"
,
"\n"
,
"\r\n"
,
"\n\r"
);
### Long line:
my
$LONGLINE
=
' '
x 1000;
#------------------------------
#
# new
#
# I<Class method.>
# Construct an empty (top-level) reader.
#
sub
new {
my
(
$class
) =
@_
;
my
$eos
;
return
bless
{
Bounds
=> [],
BH
=> {},
TH
=> {},
EOS
=> \
$eos
,
},
$class
;
}
#------------------------------
#
# spawn
#
# I<Instance method.>
# Return a reader which is mostly a duplicate, except that the EOS
# accumulator is shared.
#
sub
spawn {
my
$self
=
shift
;
my
$dup
=
bless
{},
ref
(
$self
);
$dup
->{Bounds} = [ @{
$self
->{Bounds}} ];
### deep copy
$dup
->{BH} = { %{
$self
->{BH}} };
### deep copy
$dup
->{TH} = { %{
$self
->{TH}} };
### deep copy
$dup
->{EOS} =
$self
->{EOS};
### shallow copy; same ref!
$dup
;
}
#------------------------------
#
# add_boundary BOUND
#
# I<Instance method.>
# Let BOUND be the new innermost boundary. Returns self.
#
sub
add_boundary {
my
(
$self
,
$bound
) =
@_
;
unshift
@{
$self
->{Bounds}},
$bound
;
### now at index 0
$self
->{BH}{
"--$bound"
} =
"DELIM $bound"
;
$self
->{BH}{
"--$bound--"
} =
"CLOSE $bound"
;
$self
;
}
#------------------------------
#
# add_terminator LINE
#
# I<Instance method.>
# Let LINE be another terminator. Returns self.
#
sub
add_terminator {
my
(
$self
,
$line
) =
@_
;
foreach
(
@EOLs
) {
$self
->{TH}{
"$line$_"
} =
"DONE $line"
;
}
$self
;
}
#------------------------------
#
# has_bounds
#
# I<Instance method.>
# Are there boundaries to contend with?
#
sub
has_bounds {
scalar
(@{
shift
->{Bounds}});
}
#------------------------------
#
# depth
#
# I<Instance method.>
# How many levels are there?
#
sub
depth {
scalar
(@{
shift
->{Bounds}});
}
#------------------------------
#
# eos [EOS]
#
# I<Instance method.>
# Return the last end-of-stream token seen.
# See read_chunk() for what these might be.
#
sub
eos {
my
$self
=
shift
;
${
$self
->{EOS}} =
$_
[0]
if
@_
;
${
$self
->{EOS}};
}
#------------------------------
#
# eos_type [EOSTOKEN]
#
# I<Instance method.>
# Return the high-level type of the given token (defaults to our token).
#
# DELIM saw an innermost boundary like --xyz
# CLOSE saw an innermost boundary like --xyz--
# DONE callback returned false
# EOF end of file
# EXT saw boundary of some higher-level
#
sub
eos_type {
my
(
$self
,
$eos
) =
@_
;
$eos
=
$self
->eos
if
(
@_
== 1);
if
(
$eos
=~ /^(DONE|EOF)/) {
return
$1;
}
elsif
(
$eos
=~ /^(DELIM|CLOSE) (.*)$/) {
return
(($2 eq
$self
->{Bounds}[0]) ? $1 :
'EXT'
);
}
else
{
die
(
"internal error: unable to classify boundary token ($eos)"
);
}
}
#------------------------------
#
# native_handle HANDLE
#
# I<Function.>
# Can we do native i/o on HANDLE? If true, returns the handle
# that will respond to native I/O calls; else, returns undef.
#
sub
native_handle {
my
$fh
=
shift
;
return
$fh
if
(
$fh
->isa(
'IO::File'
) ||
$fh
->isa(
'IO::Handle'
));
return
$fh
if
(
ref
$fh
eq
'GLOB'
);
undef
;
}
#------------------------------
#
# read_chunk INHANDLE, OUTHANDLE
#
# I<Instance method.>
# Get lines until end-of-stream.
# Returns the terminating-condition token:
#
# DELIM xyz saw boundary line "--xyz"
# CLOSE xyz saw boundary line "--xyz--"
# DONE xyz saw terminator line "xyz"
# EOF end of file
# Parse up to (and including) the boundary, and dump output.
# Follows the RFC 2046 specification, that the CRLF immediately preceding
# the boundary is part of the boundary, NOT part of the input!
#
# NOTE: while parsing bodies, we take care to remember the EXACT end-of-line
# sequence. This is because we *may* be handling 'binary' encoded data, and
# in that case we can't just massage \r\n into \n! Don't worry... if the
# data is styled as '7bit' or '8bit', the "decoder" will massage the CRLF
# for us. For now, we're just trying to chop up the data stream.
# NBK - Oct 12, 1999
# The CRLF at the end of the current line is considered part
# of the boundary. I buffer the current line and output the
# last. I strip the last CRLF when I hit the boundary.
sub
read_chunk {
my
(
$self
,
$in
,
$out
,
$keep_newline
,
$normalize_newlines
) =
@_
;
# If we're parsing a preamble or epilogue, we need to keep the blank line
# that precedes the boundary line.
$keep_newline
||= 0;
$normalize_newlines
||= 0;
### Init:
my
%bh
= %{
$self
->{BH}};
my
%th
= %{
$self
->{TH}};
my
$thx
=
keys
%th
;
local
$_
=
$LONGLINE
;
my
$maybe
;
my
$last
=
''
;
my
$eos
=
''
;
### Determine types:
my
$n_in
= native_handle(
$in
);
my
$n_out
= native_handle(
$out
);
### Handle efficiently by type:
if
(
$n_in
) {
if
(
$n_out
) {
### native input, native output [fastest]
while
(<
$n_in
>) {
# Normalize line ending
$_
=~ s/(?:\n\r|\r\n|\r)$/\n/
if
$normalize_newlines
;
if
(
substr
(
$_
, 0, 2) eq
'--'
) {
(
$maybe
=
$_
) =~ s/[ \t\r\n]+\Z//;
$bh
{
$maybe
} and
do
{
$eos
=
$bh
{
$maybe
};
last
};
}
$thx
and
$th
{
$_
} and
do
{
$eos
=
$th
{
$_
};
last
};
$n_out
$last
;
$last
=
$_
;
}
}
else
{
### native input, OO output [slower]
while
(<
$n_in
>) {
# Normalize line ending
$_
=~ s/(?:\n\r|\r\n|\r)$/\n/
if
$normalize_newlines
;
if
(
substr
(
$_
, 0, 2) eq
'--'
) {
(
$maybe
=
$_
) =~ s/[ \t\r\n]+\Z//;
$bh
{
$maybe
} and
do
{
$eos
=
$bh
{
$maybe
};
last
};
}
$thx
and
$th
{
$_
} and
do
{
$eos
=
$th
{
$_
};
last
};
$out
->
(
$last
);
$last
=
$_
;
}
}
}
else
{
if
(
$n_out
) {
### OO input, native output [even slower]
while
(
defined
(
$_
=
$in
->getline)) {
# Normalize line ending
$_
=~ s/(?:\n\r|\r\n|\r)$/\n/
if
$normalize_newlines
;
if
(
substr
(
$_
, 0, 2) eq
'--'
) {
(
$maybe
=
$_
) =~ s/[ \t\r\n]+\Z//;
$bh
{
$maybe
} and
do
{
$eos
=
$bh
{
$maybe
};
last
};
}
$thx
and
$th
{
$_
} and
do
{
$eos
=
$th
{
$_
};
last
};
$n_out
$last
;
$last
=
$_
;
}
}
else
{
### OO input, OO output [slowest]
while
(
defined
(
$_
=
$in
->getline)) {
# Normalize line ending
$_
=~ s/(?:\n\r|\r\n|\r)$/\n/
if
$normalize_newlines
;
if
(
substr
(
$_
, 0, 2) eq
'--'
) {
(
$maybe
=
$_
) =~ s/[ \t\r\n]+\Z//;
$bh
{
$maybe
} and
do
{
$eos
=
$bh
{
$maybe
};
last
};
}
$thx
and
$th
{
$_
} and
do
{
$eos
=
$th
{
$_
};
last
};
$out
->
(
$last
);
$last
=
$_
;
}
}
}
# Write out last held line, removing terminating CRLF if ended on bound,
# unless the line consists only of CRLF and we're wanting to keep the
# preceding blank line (as when parsing a preamble)
$last
=~ s/[\r\n]+\Z//
if
(
$eos
=~ /^(DELIM|CLOSE)/ && !(
$keep_newline
&&
$last
=~ m/^[\r\n]\z/));
$out
->
(
$last
);
### Save and return what we finished on:
${
$self
->{EOS}} = (
$eos
||
'EOF'
);
1;
}
#------------------------------
#
# read_lines INHANDLE, \@OUTLINES
#
# I<Instance method.>
# Read lines into the given array.
#
sub
read_lines {
my
(
$self
,
$in
,
$outlines
) =
@_
;
my
$data
=
''
;
open
(
my
$fh
,
'>'
, \
$data
) or
die
$!;
$self
->read_chunk(
$in
,
$fh
);
@$outlines
=
split
(/^/,
$data
);
close
$fh
;
1;
}
1;
__END__
=head1 SEE ALSO
L<MIME::Tools>, L<MIME::Parser>