sub
from_file
{
my
$self
=
shift
;
my
(
$file
) =
@_
;
if
(
$file
=~ m/\.gz$/ ) {
return
$self
->SUPER::from_file(
$file
,
binmode
=>
":gzip"
);
}
else
{
return
$self
->SUPER::from_file(
$file
);
}
}
sub
parse
{
my
$self
=
shift
;
$self
->_change_para_options(
mode
=>
"P"
,
filling
=> 1,
indent
=> 0,
);
$self
->{para_flushed} = 0;
$self
->sequence_of( \
&parse_line
);
}
sub
token_nl
{
my
$self
=
shift
;
$self
->expect(
"\n"
);
}
sub
token_chunk
{
my
$self
=
shift
;
$self
->skip_ws;
return
$self
->any_of(
sub
{
$self
->committed_scope_of(
'"'
, "_escaped
", qr/"
|$/m ) },
sub
{
$self
->_escaped( 1 ) },
);
}
sub
_escaped
{
my
$self
=
shift
;
my
(
$break_on_space
) =
@_
;
my
$ret
=
""
;
my
$consumed
= 0;
$consumed
++
while
$self
->any_of(
sub
{
my
$esc
= (
$self
->expect(
qr/\\(?|\((..)|(.))/
) )[1];
$self
->commit;
$ret
.=
$self
->parse_escape(
$esc
);
1;
},
sub
{
length
(
my
$more
=
$self
->substring_before(
$break_on_space
?
qr/[\\\n\s]/
:
qr/[\\\n]/
) ) or
return
0;
$ret
.=
$more
;
1 },
sub
{ 0 },
);
$consumed
or
$self
->fail(
"Expected a chunk"
);
return
$ret
;
}
sub
parse_chunks
{
my
$self
=
shift
;
@{
$self
->sequence_of( \
&token_chunk
) };
}
sub
parse_chunks_flat
{
my
$self
=
shift
;
return
join
" "
,
$self
->parse_chunks;
}
sub
parse_line
{
my
$self
=
shift
;
$self
->commit;
$self
->any_of(
sub
{
my
$directive
= (
$self
->expect(
qr/\.([A-Z]+)/
i ) )[1];
$self
->commit;
$self
->${\
"parse_directive_$directive"
}(
$self
);
$self
->token_nl;
},
sub
{
$self
->expect(
qr/\.\\".*?\n/
);
},
sub
{
$self
->commit;
$self
->scope_of(
undef
,
sub
{
$self
->{fonts} = [
"R"
];
$self
->parse_plaintext;
},
"\n"
);
},
);
}
sub
parse_directive_B
{
my
$self
=
shift
;
$self
->_flush_para;
$self
->chunk(
$self
->parse_chunks_flat,
font
=>
"B"
,
size
=> 0 );
}
sub
parse_directive_I
{
my
$self
=
shift
;
$self
->_flush_para;
$self
->chunk(
$self
->parse_chunks_flat,
font
=>
"I"
,
size
=> 0 );
}
sub
parse_directive_R
{
my
$self
=
shift
;
$self
->_flush_para;
$self
->chunk(
$self
->parse_chunks_flat,
font
=>
"R"
,
size
=> 0 );
}
sub
parse_directive_SM
{
my
$self
=
shift
;
$self
->_flush_para;
my
@chunks
=
$self
->parse_chunks;
$self
->chunk(
shift
@chunks
,
font
=>
"R"
,
size
=> 0 )
if
@chunks
> 2;
$self
->chunk(
shift
@chunks
,
font
=>
"R"
,
size
=> -1 );
$self
->chunk(
shift
@chunks
,
font
=>
"R"
,
size
=> 0 )
if
@chunks
;
}
sub
_parse_directive_alternate
{
my
$self
=
shift
;
my
(
$first
,
$second
) =
@_
;
$self
->_flush_para;
my
$i
= 0;
map
{
$self
->chunk(
$_
,
font
=> ( ++
$i
% 2 ?
$first
:
$second
),
size
=> 0 ) }
$self
->parse_chunks;
}
sub
parse_directive_BI
{
my
$self
=
shift
;
$self
->_parse_directive_alternate(
"B"
,
"I"
);
}
sub
parse_directive_IB
{
my
$self
=
shift
;
$self
->_parse_directive_alternate(
"I"
,
"B"
);
}
sub
parse_directive_RB
{
my
$self
=
shift
;
$self
->_parse_directive_alternate(
"R"
,
"B"
);
}
sub
parse_directive_BR
{
my
$self
=
shift
;
$self
->_parse_directive_alternate(
"B"
,
"R"
);
}
sub
parse_directive_RI
{
my
$self
=
shift
;
$self
->_parse_directive_alternate(
"R"
,
"I"
);
}
sub
parse_directive_IR
{
my
$self
=
shift
;
$self
->_parse_directive_alternate(
"I"
,
"R"
);
}
sub
parse_directive_TH
{
my
$self
=
shift
;
$self
->_change_para(
"P"
),
$self
->para_TH(
$self
->parse_chunks );
}
sub
parse_directive_SH
{
my
$self
=
shift
;
$self
->_change_para(
"P"
),
$self
->para_SH(
$self
->parse_chunks_flat );
}
sub
parse_directive_SS
{
my
$self
=
shift
;
$self
->_change_para(
"P"
),
$self
->para_SS(
$self
->parse_chunks_flat );
}
sub
parse_directive_TP
{
my
$self
=
shift
;
$self
->_change_para(
"TP"
);
}
sub
parse_directive_IP
{
my
$self
=
shift
;
$self
->_change_para(
"IP"
);
if
(
defined
(
my
$marker
=
$self
->maybe(
"token_chunk"
) ) ) {
$self
->_change_para_options(
marker
=>
$marker
);
}
if
(
defined
(
my
$indent
=
$self
->maybe(
"token_chunk"
) ) ) {
$self
->_change_para_options(
indent
=>
$indent
);
}
}
sub
parse_directive_P
{
my
$self
=
shift
;
$self
->_change_para(
"P"
);
}
{
no
warnings
'once'
;
*parse_directive_PP
=
*parse_directive_LP
= \
&parse_directive_P
;
}
sub
parse_directive_EX
{
my
$self
=
shift
;
$self
->_push_para(
"EX"
);
}
sub
parse_directive_EE
{
my
$self
=
shift
;
$self
->_pop_para(
"EX"
);
}
sub
parse_directive_RS
{
my
$self
=
shift
;
if
(
defined
(
my
$indent
=
$self
->maybe(
"token_chunk"
) ) ) {
$self
->_change_para_options(
indent
=>
$indent
);
}
else
{
$self
->_change_para_options(
indent
=>
"4n"
);
}
}
sub
parse_directive_RE
{
my
$self
=
shift
;
$self
->_change_para_options(
indent
=>
"0"
);
}
sub
parse_directive_br
{
my
$self
=
shift
;
$self
->entity_br;
}
sub
parse_directive_fi
{
my
$self
=
shift
;
$self
->_change_para_options(
filling
=> 1 );
}
sub
parse_directive_in
{
my
$self
=
shift
;
my
@ret
;
my
$indent
= 0;
$self
->maybe(
sub
{
$indent
=
$self
->expect(
qr/[+-]?\d+[n]?/
);
} );
$self
->_change_para_options(
indent
=>
$indent
);
}
sub
parse_directive_nf
{
my
$self
=
shift
;
$self
->_change_para_options(
filling
=> 0 );
}
sub
parse_directive_sp
{
my
$self
=
shift
;
$self
->entity_sp;
}
sub
parse_plaintext
{
my
$self
=
shift
;
$self
->_flush_para;
$self
->sequence_of(
sub
{
$self
->any_of(
sub
{
my
$esc
= (
$self
->expect(
qr/\\(?|\((..)|(.))/
) )[1];
$self
->commit;
my
@chunks
=
$self
->parse_escape(
$esc
);
$self
->chunk(
$_
,
font
=>
$self
->{fonts}[-1],
size
=> 0 )
for
@chunks
;
},
sub
{
$self
->chunk(
$self
->substring_before(
qr/[\\\n]/
),
font
=>
$self
->{fonts}[-1],
size
=> 0 ) },
); }
);
}
sub
parse_escape
{
my
$self
=
shift
;
my
(
$esc
) =
@_
;
my
$meth
=
"parse_escape_$esc"
;
$meth
=
sprintf
"parse_escape_x%v02X"
,
$esc
if
length
(
$esc
) == 1 and
$esc
=~ m/[^A-Za-z0-9]/;
$meth
=
"parse_escape_char"
if
length
(
$esc
) > 1;
$meth
=
$self
->can(
$meth
) or
$self
->fail(
"Unrecognised escape sequence \\$esc"
);
return
$self
->
$meth
(
$esc
);
}
sub
parse_escape_x2D
{
my
$self
=
shift
;
return
"-"
;
}
*parse_escape_x2C
=
*parse_escape_x2F
=
sub
{
return
};
sub
parse_escape_x26
{
my
$self
=
shift
;
return
""
;
}
*parse_escape_e
=
*parse_escape_E
=
*parse_escape_x5C
=
sub
{
my
$self
=
shift
;
return
"\\"
;
};
sub
parse_escape_f
{
my
$self
=
shift
;
$self
->any_of(
sub
{
$self
->expect(
qr/P/
);
$self
->commit;
@{
$self
->{fonts} } > 1 or
$self
->fail(
"Cannot \\fP without a \\f font defined"
);
pop
@{
$self
->{fonts} }; },
sub
{
push
@{
$self
->{fonts} }, (
$self
->expect(
qr/([A-Z])/
) )[1]; },
sub
{
push
@{
$self
->{fonts} }, (
$self
->expect(
qr/\((..)/
) )[1]; },
);
return
;
}
my
%chars
= (
'aq'
=>
q(')
,
'bu'
=>
'•'
,
'co'
=>
'©'
,
);
sub
parse_escape_char
{
my
$self
=
shift
;
my
(
$name
) =
@_
;
my
$char
=
$chars
{
$name
} //
"<char $name>"
;
return
$char
;
}
sub
_change_para
{
my
$self
=
shift
;
my
(
$mode
) =
@_
;
$self
->_change_para_options(
mode
=>
$mode
);
$self
->{para_flushed} = 0;
}
sub
_change_para_options
{
my
$self
=
shift
;
my
%opts
=
@_
;
if
(
grep
{ (
$self
->{para_options}{
$_
}//
""
) ne
$opts
{
$_
} }
keys
%opts
) {
$self
->{para_flushed} = 0;
}
$self
->{para_options}{
$_
} =
$opts
{
$_
}
for
keys
%opts
;
}
sub
_flush_para
{
my
$self
=
shift
;
if
( !
$self
->{para_flushed} ) {
my
$mode
=
$self
->{para_options}{mode};
$self
->${\
"para_$mode"
}(
$self
->{para_options} );
$self
->{para_flushed}++;
}
else
{
$self
->join_para;
}
}
sub
_push_para
{
my
$self
=
shift
;
my
(
$new_mode
) =
@_
;
push
@{
$self
->{para_stack} }, { %{
$self
->{para_options} } };
$self
->_change_para(
$new_mode
);
}
sub
_pop_para
{
my
$self
=
shift
;
my
(
$expect_mode
) =
@_
;
$self
->{para_options}{mode} eq
$expect_mode
or
$self
->fail(
"Expected current paragraph mode of $expect_mode"
);
$self
->{para_options} =
pop
@{
$self
->{para_stack} };
$self
->_flush_para;
$self
->{para_flushed} = 0;
}
0x55AA;