#!/usr/bin/perl
my
@FORM_TAGS
=
qw(form input textarea button select option)
;
my
$p
= HTML::PullParser->new(
file
=>
shift
||
"xxx.html"
,
start
=>
'tag, attr'
,
end
=>
'tag'
,
text
=>
'@{text}'
,
report_tags
=> \
@FORM_TAGS
,
) ||
die
"$!"
;
sub
get_text {
my
(
$p
,
$stop
) =
@_
;
my
$text
;
while
(
defined
(
my
$t
=
$p
->get_token)) {
if
(
ref
$t
) {
$p
->unget_token(
$t
)
unless
$t
->[0] eq
$stop
;
last
;
}
else
{
$text
.=
$t
;
}
}
return
$text
;
}
my
@forms
;
while
(
defined
(
my
$t
=
$p
->get_token)) {
next
unless
ref
$t
;
if
(
$t
->[0] eq
"form"
) {
shift
@$t
;
push
(
@forms
,
$t
);
while
(
defined
(
my
$t
=
$p
->get_token)) {
next
unless
ref
$t
;
last
if
$t
->[0] eq
"/form"
;
if
(
$t
->[0] eq
"select"
) {
my
$sel
=
$t
;
push
(@{
$forms
[-1]},
$t
);
while
(
defined
(
my
$t
=
$p
->get_token)) {
next
unless
ref
$t
;
last
if
$t
->[0] eq
"/select"
;
if
(
$t
->[0] eq
"option"
) {
my
$value
=
$t
->[1]->{value};
my
$text
= get_text(
$p
,
"/option"
);
unless
(
defined
$value
) {
$value
= decode_entities(
$text
);
}
push
(
@$sel
,
$value
);
}
else
{
warn
"$t->[0] inside select"
;
}
}
}
elsif
(
$t
->[0] =~ /^\/?option$/) {
warn
"option tag outside select"
;
}
elsif
(
$t
->[0] eq
"textarea"
) {
push
(@{
$forms
[-1]},
$t
);
$t
->[1]{value} = get_text(
$p
,
"/textarea"
);
}
elsif
(
$t
->[0] =~ m,^/,) {
warn
"stray $t->[0] tag"
;
}
else
{
push
(@{
$forms
[-1]},
$t
);
}
}
}
else
{
warn
"form tag $t->[0] outside form"
;
}
}
print
Dumper(\
@forms
),
"\n"
;