#!/usr/bin/perl -w
main();
sub
main {
my
$file
;
my
$usage
=
<<EOM;
usage: perlxmltok filename >outfile
EOM
getopts(
'h'
) or
die
"$usage"
;
if
(
$opt_h
) {
die
$usage
}
if
(
@ARGV
== 1 ) {
$file
=
$ARGV
[0];
}
else
{
die
$usage
}
my
$source
;
my
$fh
;
if
(
$file
) {
$fh
= IO::File->new(
$file
,
'r'
);
unless
(
$fh
) {
die
"cannot open '$file': $!\n"
}
$source
=
$fh
;
}
else
{
$source
=
'-'
;
}
my
$formatter
= Perl::Tidy::XmlWriter->new(
$file
);
my
$dest
;
my
$err
= perltidy(
'formatter'
=>
$formatter
,
'source'
=>
$source
,
'destination'
=> \
$dest
,
'argv'
=>
"-npro -se"
,
);
if
(
$err
) {
die
"Error calling perltidy\n"
;
}
$fh
->
close
()
if
$fh
;
return
;
}
%token_short_names
%short_to_long_names
$rOpts
$missing_html_entities
}
;
{
eval
"use HTML::Entities"
;
$missing_html_entities
= $@; }
sub
new {
my
(
$class
,
$input_file
) =
@_
;
my
$self
=
bless
{ },
$class
;
$self
->
print
(
<<"HEADER");
<?xml version = "1.0"?>
HEADER
unless
( !
$input_file
||
$input_file
eq
'-'
||
ref
(
$input_file
) ) {
$self
->
print
(
<<"COMMENT");
<!-- created by perltidy from file: $input_file -->
COMMENT
}
$self
->
print
(
"<file>\n"
);
return
$self
;
}
sub
print
{
my
(
$self
,
$line
) =
@_
;
print
$line
;
}
sub
write_line {
my
$self
=
shift
;
my
(
$line_of_tokens
) =
@_
;
my
$line_type
=
$line_of_tokens
->{_line_type};
my
$input_line
=
$line_of_tokens
->{_line_text};
my
$line_number
=
$line_of_tokens
->{_line_number};
chomp
$input_line
;
$self
->
print
(
" <line type='$line_type'>\n"
);
$self
->
print
(
" <text>\n"
);
$input_line
= my_encode_entities(
$input_line
);
$self
->
print
(
"$input_line\n"
);
$self
->
print
(
" </text>\n"
);
if
(
$line_type
eq
'CODE'
) {
my
$xml_line
;
my
$rtoken_type
=
$line_of_tokens
->{_rtoken_type};
my
$rtokens
=
$line_of_tokens
->{_rtokens};
if
(
$input_line
=~ /(^\s*)/ ) {
$xml_line
= $1;
}
else
{
$xml_line
=
""
;
}
my
$rmarked_tokens
=
$self
->markup_tokens(
$rtokens
,
$rtoken_type
);
$xml_line
.=
join
''
,
@$rmarked_tokens
;
$self
->
print
(
" <tokens>\n"
);
$self
->
print
(
"$xml_line\n"
);
$self
->
print
(
" </tokens>\n"
);
}
$self
->
print
(
" </line>\n"
);
}
BEGIN {
%short_to_long_names
= (
'n'
=>
'numeric'
,
'p'
=>
'paren'
,
'q'
=>
'quote'
,
's'
=>
'structure'
,
'c'
=>
'comment'
,
'b'
=>
'blank'
,
'v'
=>
'v-string'
,
'cm'
=>
'comma'
,
'w'
=>
'bareword'
,
'co'
=>
'colon'
,
'pu'
=>
'punctuation'
,
'i'
=>
'identifier'
,
'j'
=>
'label'
,
'h'
=>
'here-doc-target'
,
'hh'
=>
'here-doc-text'
,
'k'
=>
'keyword'
,
'sc'
=>
'semicolon'
,
'm'
=>
'subroutine'
,
'pd'
=>
'pod-text'
,
);
%token_short_names
= (
'#'
=>
'c'
,
'n'
=>
'n'
,
'v'
=>
'v'
,
'b'
=>
'b'
,
'k'
=>
'k'
,
'F'
=>
'k'
,
'Q'
=>
'q'
,
'q'
=>
'q'
,
'J'
=>
'j'
,
'j'
=>
'j'
,
'h'
=>
'h'
,
'H'
=>
'hh'
,
'w'
=>
'w'
,
','
=>
'cm'
,
'=>'
=>
'cm'
,
';'
=>
'sc'
,
':'
=>
'co'
,
'f'
=>
'sc'
,
'('
=>
'p'
,
')'
=>
'p'
,
'M'
=>
'm'
,
'P'
=>
'pd'
,
);
my
@identifier
=
qw" i t U C Y Z G :: "
;
@token_short_names
{
@identifier
} = (
'i'
) x
scalar
(
@identifier
);
my
@structure
=
qw" { } "
;
@token_short_names
{
@structure
} = (
's'
) x
scalar
(
@structure
);
}
sub
markup_tokens {
my
$self
=
shift
;
my
(
$rtokens
,
$rtoken_type
) =
@_
;
my
(
@marked_tokens
,
$j
,
$type
,
$token
);
for
(
$j
= 0 ;
$j
<
@$rtoken_type
;
$j
++ ) {
$type
=
$$rtoken_type
[
$j
];
$token
=
$$rtokens
[
$j
];
if
(
$type
eq
'i'
&&
$token
=~ /^(
sub
\s+)(\w.*)$/ ) {
$token
=
$self
->markup_xml_element( $1,
'k'
);
push
@marked_tokens
,
$token
;
$token
= $2;
$type
=
'M'
;
}
if
(
$type
eq
'i'
&&
$token
=~ /^(
package
\s+)(\w.*)$/ ) {
$token
=
$self
->markup_xml_element( $1,
'k'
);
push
@marked_tokens
,
$token
;
$token
= $2;
$type
=
'i'
;
}
$token
=
$self
->markup_xml_element(
$token
,
$type
);
push
@marked_tokens
,
$token
;
}
return
\
@marked_tokens
;
}
sub
my_encode_entities {
my
(
$token
) =
@_
;
if
(
$missing_html_entities
) {
$token
=~ s/\&/
&
;/g;
$token
=~ s/\</
<
;/g;
$token
=~ s/\>/
>
;/g;
$token
=~ s/\"/
"
;/g;
}
else
{
HTML::Entities::encode_entities(
$token
);
}
return
$token
;
}
sub
markup_xml_element {
my
$self
=
shift
;
my
(
$token
,
$type
) =
@_
;
if
(
$token
) {
$token
= my_encode_entities(
$token
) }
my
$short_name
=
$token_short_names
{
$type
};
if
( !
defined
(
$short_name
) ) {
$short_name
=
"pu"
;
}
$token
=
qq(<$short_name>)
.
$token
.
qq(</$short_name>)
;
return
$token
;
}
sub
finish_formatting {
my
$self
=
shift
;
$self
->
print
(
"</file>\n"
);
return
;
}