use
warnings
qw(FATAL all NONFATAL misc)
;
our
$VERSION
=
"0.03"
;
use
constant
TRACE
=>
$ENV
{TRACE_XHF_PARSER};
use
fields
qw(cf_FH cf_filename cf_string cf_tokens
fh_configured
cf_allow_empty_name
cf_encoding cf_crlf
cf_nocr cf_subst
cf_first_lineno
_depth
cf_skip_comment cf_bytes)
;
our
@EXPORT
=
qw(read_file_xhf)
;
our
@EXPORT_OK
= (
@EXPORT
,
qw(parse_xhf $cc_name)
);
our
$cc_name
=
qr{[0-9A-Za-z_\.\-/~!]}
;
our
$re_suffix
=
qr{\[$cc_name*\]}
;
our
$cc_sigil
=
qr{[:\#,\-=\[\]\{\}
]};
our
$cc_tabsp
=
qr{[\ \t]}
;
our
%OPN
= (
'['
=> \
&organize_array
,
'{'
=> \
&organize_hash
,
'='
=> \
&organize_expr
);
our
%CLO
= (
']'
=>
'['
,
'}'
=>
'{'
);
our
%NAME_LESS
= (
%CLO
,
'-'
=> 1);
our
%ALLOW_EMPTY_NAME
= (
':'
=> 1);
sub
after_new {
(
my
MY
$self
) =
@_
;
$self
->{cf_skip_comment} //= 1;
}
sub
read_file_xhf {
my
(
$pack
,
$fn
,
%rest
) =
@_
;
my
$method
=
do
{
my
$single
=
delete
$rest
{single};
my
$all
=
delete
$rest
{all} // 1;
if
(
$single
or not
$all
) {
'read'
;
}
else
{
'read_all'
;
}
};
MY->new(
filename
=>
$fn
,
encoding
=>
'utf8'
,
%rest
)->
$method
;
}
sub
parse_xhf {
MY->new(
string
=>
@_
)->
read
;
}
*configure_file
= \
&configure_filename
;
*configure_file
= \
&configure_filename
;
sub
configure_filename {
(
my
MY
$self
,
my
(
$fn
)) =
@_
;
open
$self
->{cf_FH},
'<'
,
$fn
or croak
"Can't open file '$fn': $!"
;
$self
->{fh_configured} = 0;
$self
->{cf_filename} =
$fn
;
$self
;
}
sub
configure_filename_for_error {
(
my
MY
$self
,
my
(
$fn
)) =
@_
;
$self
->{cf_filename} =
$fn
;
}
sub
configure_encoding {
(
my
MY
$self
,
my
$value
) =
@_
;
$self
->{fh_configured} = 0;
$self
->{cf_encoding} =
$value
;
}
sub
configure_binary {
(
my
MY
$self
,
my
$value
) =
@_
;
warnings::warnif(
deprecated
=>
"XHF option 'binary' is deprecated, use 'bytes' instead"
);
$self
->{cf_bytes} =
$value
;
}
sub
configure_string {
my
MY
$self
=
shift
;
(
$self
->{cf_string}) =
@_
;
open
$self
->{cf_FH},
'<'
, \
$self
->{cf_string}
or croak
"Can't create string stream: $!"
;
$self
;
}
sub
trace {
(
my
MY
$reader
,
my
(
$msg
,
@desc
)) =
@_
;
print
STDERR
" "
x
$reader
->{_depth},
$msg
, terse_dump(
@desc
),
"\n"
;
}
sub
read_all {
(
my
MY
$self
) =
@_
;
my
@res
;
while
(
my
@block
=
$self
->
read
) {
push
@res
,
@block
;
}
wantarray
?
@res
:
do
{
my
%dict
=
@res
;
\
%dict
;
};
}
sub
read
{
my
MY
$self
=
shift
;
$self
->cf_let(\
@_
,
sub
{
if
(
my
@tokens
=
$self
->tokenize) {
$self
->organize(
@tokens
);
}
else
{
return
;
}
});
}
sub
tokenize {
(
my
MY
$self
) =
@_
;
local
$/ =
""
;
my
$fh
=
$$self
{cf_FH};
unless
(
$self
->{fh_configured}++) {
if
(not
$self
->{cf_bytes} and not
$self
->{cf_string}
and
$self
->{cf_encoding}) {
binmode
$fh
,
":encoding($self->{cf_encoding})"
;
}
if
(
$self
->{cf_crlf}) {
binmode
$fh
,
":crlf"
;
}
}
my
@tokens
;
LOOP: {
do
{
defined
(
my
$para
= <
$fh
>) or
last
LOOP;
$para
= untaint_unless_tainted
(
$self
->{cf_filename} //
$self
->{cf_string}
,
$para
);
@tokens
=
$self
->tokenize_1(
$para
);
}
until
(not
$self
->{cf_skip_comment} or
@tokens
);
}
@tokens
;
}
sub
tokenize_1 {
my
MY
$reader
=
shift
;
$_
[0] =~ s{\n+$}{\n}s;
$_
[0] =~ s{\r+}{}g
if
$reader
->{cf_nocr};
if
(
my
$sub
=
$reader
->{cf_subst}) {
local
$_
;
*_
= \
$_
[0];
$sub
->(
$_
);
}
my
$lineno
=
$reader
->{cf_first_lineno} // 1;
my
(
$pos
,
$ncomments
,
@tokens
,
@result
);
foreach
my
$token
(
@tokens
=
split
/(?<=\n)(?=[^\ \t])/,
$_
[0]) {
$pos
++;
if
(
$token
=~ s{^(?:\
$ncomments
++;
next
if
$token
eq
''
;
}
unless
(
$token
=~ s{^(
$cc_name
*$re_suffix
*) (
$cc_sigil
) (?:(
$cc_tabsp
)|(\n|$))}{}x) {
croak
"Invalid XHF token '$token' "
.
$reader
->fileinfo_lineno(
$lineno
).
"\n"
;
}
my
(
$name
,
$sigil
,
$tabsp
,
$eol
) = ($1, $2, $3, $4);
if
(
$name
eq
''
) {
croak
"Invalid XHF token(name is empty for '$token') "
.
$reader
->fileinfo_lineno(
$lineno
).
"\n"
if
$sigil
eq
':'
and not
$reader
->{cf_allow_empty_name};
}
elsif
(
$NAME_LESS
{
$sigil
}) {
croak
"Invalid XHF token('$sigil' should not be prefixed by name '$name') "
.
$reader
->fileinfo_lineno(
$lineno
).
"\n"
;
}
$ncomments
++,
next
if
$sigil
eq
"#"
;
if
(
$CLO
{
$sigil
}) {
undef
$name
;
}
$token
=~ s/\n[\ \t]/\n/g;
unless
(
defined
$eol
) {
$token
=~ s/^\s+|\s+$//gs;
}
else
{
croak
"Invalid XHF token(container with value) "
.
join
(
""
,
grep
{
defined
$_
}
$name
,
$sigil
,
$tabsp
,
$token
)
.
$reader
->fileinfo_lineno(
$lineno
).
"\n"
if
$sigil
eq
'{'
and
$token
ne
""
;
$token
=~ s/^[\ \t]//;
}
push
@result
, [
$name
,
$sigil
,
$token
,
$lineno
];
}
continue
{
$lineno
++;
}
return
if
$ncomments
&& !
@result
;
wantarray
?
@result
: \
@result
;
}
sub
fileinfo {
(
my
MY
$reader
,
my
$desc
) =
@_
;
$reader
->fileinfo_lineno(
$desc
->[_LINENO]);
}
sub
fileinfo_lineno {
(
my
MY
$reader
,
my
$lineno
) =
@_
;
sprintf
(
"at %s line %d"
,
$reader
->{cf_filename} //
"(unknown)"
,
$lineno
);
}
sub
organize {
my
MY
$reader
=
shift
;
local
$reader
->{_depth} = -1;
my
$pos
= 0;
my
@result
;
while
(
$pos
<
@_
) {
my
$desc
=
$_
[
$pos
++];
unless
(
defined
$desc
->[_NAME]) {
croak
"Invalid XHF: Field close '$desc->[_SIGIL]'"
.
" (line $desc->[_LINENO]) without open! "
.
$reader
->fileinfo(
$desc
).
"\n"
;
}
push
@result
,
$desc
->[_NAME]
if
$desc
->[_NAME] ne
''
or
$ALLOW_EMPTY_NAME
{
$desc
->[_SIGIL]};
if
(
my
$sub
=
$OPN
{
$desc
->[_SIGIL]}) {
push
@result
,
$sub
->(
$reader
, \
$pos
, \
@_
,
$desc
);
}
else
{
push
@result
,
$desc
->[_VALUE];
}
}
if
(
wantarray
) {
@result
}
else
{
my
%hash
=
@result
;
\
%hash
;
}
}
sub
organize_array {
(
my
MY
$reader
,
my
(
$posref
,
$tokens
,
$first
)) =
@_
;
local
$reader
->{_depth} =
$reader
->{_depth} + 1;
$reader
->trace(
"> "
,
$first
)
if
TRACE;
my
@result
;
push
@result
,
$first
->[_VALUE]
if
defined
$first
and
$first
->[_VALUE] ne
''
;
while
(
$$posref
<
@$tokens
) {
my
$desc
=
$tokens
->[
$$posref
++];
unless
(
defined
$desc
->[_NAME]) {
if
(
$desc
->[_SIGIL] ne
']'
) {
croak
"Invalid XHF: paren mismatch. '['"
.
" (line $first->[_LINENO]) is closed by '$desc->[_SIGIL]' "
.
$reader
->fileinfo(
$desc
).
"\n"
;
}
$reader
->trace(
"< "
,
$first
,
$desc
)
if
TRACE;
return
\
@result
;
}
elsif
(
$desc
->[_NAME] ne
''
) {
$reader
->trace(
"| "
,
$desc
)
if
TRACE;
push
@result
,
$desc
->[_NAME];
}
if
(
my
$sub
=
$OPN
{
$desc
->[_SIGIL]}) {
push
@result
,
$sub
->(
$reader
,
$posref
,
$tokens
,
$desc
);
}
else
{
$reader
->trace(
"| "
,
$desc
)
if
TRACE;
push
@result
,
$desc
->[_VALUE];
}
}
croak
"Invalid XHF: Missing close ']' for '[' "
.
$reader
->fileinfo(
$first
).
"\n"
;
}
sub
organize_hash {
(
my
MY
$reader
,
my
(
$posref
,
$tokens
,
$first
)) =
@_
;
croak
"Invalid XHF hash block beginning! "
.
join
(
""
,
@$first
).
$reader
->fileinfo(
$first
).
"\n"
if
defined
$first
and
$first
->[_VALUE] ne
''
;
local
$reader
->{_depth} =
$reader
->{_depth} + 1;
$reader
->trace(
"> "
,
$first
)
if
TRACE;
my
%result
;
while
(
$$posref
<
@$tokens
) {
my
$desc
=
$tokens
->[
$$posref
++];
unless
(
defined
$desc
->[_NAME]) {
if
(
$desc
->[_SIGIL] ne
'}'
) {
croak
"Invalid XHF: paren mismatch. '{'"
.
" (line $first->[_LINENO]) is closed by '$desc->[_SIGIL]' "
.
$reader
->fileinfo(
$desc
).
"\n"
;
}
$reader
->trace(
"< "
,
$first
,
$desc
)
if
TRACE;
return
\
%result
;
}
elsif
(
$desc
->[_SIGIL] eq
'-'
) {
unless
(
$$posref
<
@$tokens
) {
croak
"Invalid XHF hash:"
.
" key '- $desc->[_VALUE]' doesn't have value! "
.
$reader
->fileinfo(
$desc
).
"\n"
;
}
my
$valdesc
=
$tokens
->[
$$posref
++];
my
$value
=
do
{
if
(
my
$sub
=
$OPN
{
$valdesc
->[_SIGIL]}) {
$sub
->(
$reader
,
$posref
,
$tokens
,
$valdesc
);
}
elsif
(
$valdesc
->[_SIGIL] eq
'-'
) {
$valdesc
->[_VALUE];
}
else
{
croak
"Invalid XHF hash value:"
.
" key '$desc->[_VALUE]' has invalid sigil '$valdesc->[_SIGIL]' "
.
$reader
->fileinfo(
$valdesc
).
"\n"
}
};
$reader
->add_value(
$result
{
$desc
->[_VALUE]},
$value
);
}
else
{
$reader
->trace(
"| "
,
$desc
)
if
TRACE;
if
(
my
$sub
=
$OPN
{
$desc
->[_SIGIL]}) {
$desc
->[_VALUE] =
$sub
->(
$reader
,
$posref
,
$tokens
,
$desc
);
}
$reader
->add_value(
$result
{
$desc
->[_NAME]},
$desc
->[_VALUE]);
}
}
croak
"Invalid XHF: Missing close '}' for '{' "
.
$reader
->fileinfo(
$first
).
"\n"
;
}
sub
_undef {
undef
}
our
%EXPR
= (
null
=> \
&_undef
,
'undef'
=> \
&_undef
);
sub
organize_expr {
(
my
MY
$reader
,
my
(
$posref
,
$tokens
,
$first
)) =
@_
;
if
((
my
$val
=
$first
->[_VALUE]) =~ s/^\
my
$sub
=
$EXPR
{$1}
or croak
"Invalid XHF keyword: '= #$1'"
;
$sub
->(
$reader
,
$val
,
$tokens
);
}
else
{
croak
"Not yet implemented XHF token: '@$first'"
;
}
}
sub
add_value {
my
MY
$reader
=
shift
;
unless
(
defined
$_
[0]) {
$_
[0] =
$_
[1];
}
elsif
(
ref
$_
[0] ne
'ARRAY'
) {
$_
[0] = [
$_
[0],
$_
[1]];
}
else
{
push
@{
$_
[0]},
$_
[1];
}
}
YATT::Lite::Breakpoint::break_load_xhf();
1;