use
5.008001;
no
warnings;
our
$VERSION
=
'0.992'
;
BEGIN
{
{
*HAS_XLXDSLN
=
sub
() { 1 };
}
else
{
*HAS_XLXDSLN
=
sub
() { 0 };
}
}
*XML::LibXML::Element::appendTextFromUnicode
=
sub
{
my
$element
=
shift
;
my
$parser
=
shift
if
ref
$_
[0];
my
$text
=
shift
; utf8::encode(
$text
);
my
$token
=
shift
;
if
(
defined
$element
->lastChild
and
$element
->lastChild->nodeType == XML_TEXT_NODE)
{
$element
->appendText(
$text
);
return
;
}
my
$textnode
= XML::LibXML::Text->new(
$text
);
if
(
$token
)
{
$parser
->_data(
$textnode
,
manakai_source_line
=>
$token
->{line})
if
$parser
and
defined
$token
->{line};
$parser
->_data(
$textnode
,
manakai_source_column
=>
$token
->{column})
if
$parser
and
defined
$token
->{column};
if
(HAS_XLXDSLN
and
exists
$token
->{line}
and
int
(
$token
->{line})
and
int
(
$token
->{line}) eq
$token
->{line})
{
$textnode
->XML::LibXML::Devel::SetLineNumber::set_line_number(
$token
->{line});
}
}
return
$element
->appendChild(
$textnode
);
};
our
$DATA
= {};
sub
DATA {
_data(
undef
,
@_
);
}
sub
_data
{
my
$self
=
shift
;
my
(
$object
,
$k
,
$v
) =
@_
;
my
$argc
=
@_
;
unless
(blessed(
$object
) and
$object
->isa(
'XML::LibXML::Node'
))
{
return
{}
if
$argc
==1;
return
;
}
my
$oaddr
= XML::LibXML::Devel::node_from_perl(
$object
);
my
$data
;
if
(
ref
$self
) {
$data
=
$self
->{_debug_cache}{
$oaddr
} ||= {};
}
else
{
$data
=
$DATA
->{
$oaddr
} ||= {};
}
if
(HAS_XLXDSLN
and
defined
$k
and
$k
eq
'manakai_source_line'
and
defined
$v
and
int
(
$v
)
and
int
(
$v
) eq
$v
and
$object
->nodeType == XML_ELEMENT_NODE)
{
$object
->XML::LibXML::Devel::SetLineNumber::set_line_number(
$v
);
}
$data
->{
$k
} =
$v
if
$argc
==3;
return
$data
->{
$k
}
if
$argc
==2;
return
$data
;
}
sub
BUTTON_SCOPING_EL () { 0b1_000000000000000000 }
sub
SPECIAL_EL () { 0b1_00000000000000000 }
sub
SCOPING_EL () { 0b1_0000000000000000 }
sub
FORMATTING_EL () { 0b1_000000000000000 }
sub
PHRASING_EL () { 0b1_00000000000000 }
sub
SVG_EL () { 0b1_0000000000000 }
sub
MML_EL () { 0b1_000000000000 }
sub
FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
sub
TABLE_SCOPING_EL () { 0b1_000000000 }
sub
TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
sub
TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
sub
TABLE_ROWS_EL () { 0b1_000000 }
sub
ADDRESS_DIV_P_EL () { 0b1_00000 }
sub
ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
sub
END_TAG_OPTIONAL_EL () { 0b1_000 }
sub
MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
sub
FORM_EL () { SPECIAL_EL | 0b001 }
sub
FRAMESET_EL () { SPECIAL_EL | 0b010 }
sub
HEADING_EL () { SPECIAL_EL | 0b011 }
sub
SELECT_EL () { SPECIAL_EL | 0b100 }
sub
SCRIPT_EL () { SPECIAL_EL | 0b101 }
sub
BUTTON_EL () { SPECIAL_EL | BUTTON_SCOPING_EL | 0b110 }
sub
ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
sub
BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
sub
DTDD_EL () {
SPECIAL_EL |
END_TAG_OPTIONAL_EL |
ALL_END_TAG_OPTIONAL_EL |
0b010
}
sub
LI_EL () {
SPECIAL_EL |
END_TAG_OPTIONAL_EL |
ALL_END_TAG_OPTIONAL_EL |
0b100
}
sub
P_EL () {
SPECIAL_EL |
ADDRESS_DIV_P_EL |
END_TAG_OPTIONAL_EL |
ALL_END_TAG_OPTIONAL_EL |
0b001
}
sub
TABLE_ROW_EL () {
SPECIAL_EL |
TABLE_ROWS_EL |
TABLE_ROW_SCOPING_EL |
ALL_END_TAG_OPTIONAL_EL |
0b001
}
sub
TABLE_ROW_GROUP_EL () {
SPECIAL_EL |
TABLE_ROWS_EL |
TABLE_ROWS_SCOPING_EL |
ALL_END_TAG_OPTIONAL_EL |
0b001
}
sub
MISC_SCOPING_EL () { SCOPING_EL | BUTTON_SCOPING_EL | 0b000 }
sub
CAPTION_EL () { SCOPING_EL | BUTTON_SCOPING_EL | 0b010 }
sub
HTML_EL () {
SCOPING_EL |
BUTTON_SCOPING_EL |
TABLE_SCOPING_EL |
TABLE_ROWS_SCOPING_EL |
TABLE_ROW_SCOPING_EL |
ALL_END_TAG_OPTIONAL_EL |
0b001
}
sub
TABLE_EL () {
SCOPING_EL |
BUTTON_SCOPING_EL |
TABLE_ROWS_EL |
TABLE_SCOPING_EL |
0b001
}
sub
TABLE_CELL_EL () {
SCOPING_EL |
BUTTON_SCOPING_EL |
ALL_END_TAG_OPTIONAL_EL |
0b001
}
sub
MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
sub
A_EL () { FORMATTING_EL | 0b001 }
sub
NOBR_EL () { FORMATTING_EL | 0b010 }
sub
RUBY_EL () { PHRASING_EL | 0b001 }
sub
OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
sub
OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
sub
RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
sub
MML_TEXT_INTEGRATION_EL () {
MML_EL |
SCOPING_EL |
BUTTON_SCOPING_EL |
FOREIGN_EL |
FOREIGN_FLOW_CONTENT_EL
}
sub
MML_AXML_EL () {
MML_EL |
SCOPING_EL |
BUTTON_SCOPING_EL |
FOREIGN_EL |
0b001
}
sub
SVG_INTEGRATION_EL () {
SVG_EL |
SCOPING_EL |
BUTTON_SCOPING_EL |
FOREIGN_EL |
FOREIGN_FLOW_CONTENT_EL
}
sub
SVG_SCRIPT_EL () {
SVG_EL |
FOREIGN_EL |
0b101
}
my
$el_category
= {
a
=> A_EL,
address
=> ADDRESS_DIV_EL,
applet
=> MISC_SCOPING_EL,
area
=> MISC_SPECIAL_EL,
article
=> MISC_SPECIAL_EL,
aside
=> MISC_SPECIAL_EL,
b
=> FORMATTING_EL,
base
=> MISC_SPECIAL_EL,
basefont
=> MISC_SPECIAL_EL,
bgsound
=> MISC_SPECIAL_EL,
big
=> FORMATTING_EL,
blockquote
=> MISC_SPECIAL_EL,
body
=> BODY_EL,
br
=> MISC_SPECIAL_EL,
button
=> BUTTON_EL,
caption
=> CAPTION_EL,
center
=> MISC_SPECIAL_EL,
code
=> FORMATTING_EL,
col
=> MISC_SPECIAL_EL,
colgroup
=> MISC_SPECIAL_EL,
command
=> MISC_SPECIAL_EL,
dd
=> DTDD_EL,
details
=> MISC_SPECIAL_EL,
dir
=> MISC_SPECIAL_EL,
div
=> ADDRESS_DIV_EL,
dl
=> MISC_SPECIAL_EL,
dt
=> DTDD_EL,
em
=> FORMATTING_EL,
embed
=> MISC_SPECIAL_EL,
fieldset
=> MISC_SPECIAL_EL,
figure
=> MISC_SPECIAL_EL,
figcaption
=> MISC_SPECIAL_EL,
font
=> FORMATTING_EL,
footer
=> MISC_SPECIAL_EL,
form
=> FORM_EL,
frame
=> MISC_SPECIAL_EL,
frameset
=> FRAMESET_EL,
h1
=> HEADING_EL,
h2
=> HEADING_EL,
h3
=> HEADING_EL,
h4
=> HEADING_EL,
h5
=> HEADING_EL,
h6
=> HEADING_EL,
head
=> MISC_SPECIAL_EL,
header
=> MISC_SPECIAL_EL,
hgroup
=> MISC_SPECIAL_EL,
hr
=> MISC_SPECIAL_EL,
html
=> HTML_EL,
i
=> FORMATTING_EL,
iframe
=> MISC_SPECIAL_EL,
img
=> MISC_SPECIAL_EL,
input
=> MISC_SPECIAL_EL,
isindex
=> MISC_SPECIAL_EL,
li
=> LI_EL,
link
=> MISC_SPECIAL_EL,
listing
=> MISC_SPECIAL_EL,
marquee
=> MISC_SCOPING_EL,
menu
=> MISC_SPECIAL_EL,
meta
=> MISC_SPECIAL_EL,
nav
=> MISC_SPECIAL_EL,
nobr
=> NOBR_EL,
noembed
=> MISC_SPECIAL_EL,
noframes
=> MISC_SPECIAL_EL,
noscript
=> MISC_SPECIAL_EL,
object
=> MISC_SCOPING_EL,
ol
=> MISC_SPECIAL_EL,
optgroup
=> OPTGROUP_EL,
option
=> OPTION_EL,
p
=> P_EL,
param
=> MISC_SPECIAL_EL,
plaintext
=> MISC_SPECIAL_EL,
pre
=> MISC_SPECIAL_EL,
rp
=> RUBY_COMPONENT_EL,
rt
=> RUBY_COMPONENT_EL,
ruby
=> RUBY_EL,
s
=> FORMATTING_EL,
script
=> MISC_SPECIAL_EL,
select
=> SELECT_EL,
section
=> MISC_SPECIAL_EL,
small
=> FORMATTING_EL,
strike
=> FORMATTING_EL,
strong
=> FORMATTING_EL,
style
=> MISC_SPECIAL_EL,
summary
=> MISC_SPECIAL_EL,
table
=> TABLE_EL,
tbody
=> TABLE_ROW_GROUP_EL,
td
=> TABLE_CELL_EL,
textarea
=> MISC_SPECIAL_EL,
tfoot
=> TABLE_ROW_GROUP_EL,
th
=> TABLE_CELL_EL,
thead
=> TABLE_ROW_GROUP_EL,
title
=> MISC_SPECIAL_EL,
tr
=> TABLE_ROW_EL,
tt
=> FORMATTING_EL,
u
=> FORMATTING_EL,
ul
=> MISC_SPECIAL_EL,
wbr
=> MISC_SPECIAL_EL,
xmp
=> MISC_SPECIAL_EL,
};
my
$el_category_f
= {
(MML_NS) => {
'annotation-xml'
=> MML_AXML_EL,
mi
=> MML_TEXT_INTEGRATION_EL,
mo
=> MML_TEXT_INTEGRATION_EL,
mn
=> MML_TEXT_INTEGRATION_EL,
ms
=> MML_TEXT_INTEGRATION_EL,
mtext
=> MML_TEXT_INTEGRATION_EL,
},
(SVG_NS) => {
foreignObject
=> SVG_INTEGRATION_EL,
desc
=> SVG_INTEGRATION_EL,
title
=> SVG_INTEGRATION_EL,
script
=> SVG_SCRIPT_EL,
},
};
my
$svg_attr_name
= {
attributename
=>
'attributeName'
,
attributetype
=>
'attributeType'
,
basefrequency
=>
'baseFrequency'
,
baseprofile
=>
'baseProfile'
,
calcmode
=>
'calcMode'
,
clippathunits
=>
'clipPathUnits'
,
contentscripttype
=>
'contentScriptType'
,
contentstyletype
=>
'contentStyleType'
,
diffuseconstant
=>
'diffuseConstant'
,
edgemode
=>
'edgeMode'
,
externalresourcesrequired
=>
'externalResourcesRequired'
,
filterres
=>
'filterRes'
,
filterunits
=>
'filterUnits'
,
glyphref
=>
'glyphRef'
,
gradienttransform
=>
'gradientTransform'
,
gradientunits
=>
'gradientUnits'
,
kernelmatrix
=>
'kernelMatrix'
,
kernelunitlength
=>
'kernelUnitLength'
,
keypoints
=>
'keyPoints'
,
keysplines
=>
'keySplines'
,
keytimes
=>
'keyTimes'
,
lengthadjust
=>
'lengthAdjust'
,
limitingconeangle
=>
'limitingConeAngle'
,
markerheight
=>
'markerHeight'
,
markerunits
=>
'markerUnits'
,
markerwidth
=>
'markerWidth'
,
maskcontentunits
=>
'maskContentUnits'
,
maskunits
=>
'maskUnits'
,
numoctaves
=>
'numOctaves'
,
pathlength
=>
'pathLength'
,
patterncontentunits
=>
'patternContentUnits'
,
patterntransform
=>
'patternTransform'
,
patternunits
=>
'patternUnits'
,
pointsatx
=>
'pointsAtX'
,
pointsaty
=>
'pointsAtY'
,
pointsatz
=>
'pointsAtZ'
,
preservealpha
=>
'preserveAlpha'
,
preserveaspectratio
=>
'preserveAspectRatio'
,
primitiveunits
=>
'primitiveUnits'
,
refx
=>
'refX'
,
refy
=>
'refY'
,
repeatcount
=>
'repeatCount'
,
repeatdur
=>
'repeatDur'
,
requiredextensions
=>
'requiredExtensions'
,
requiredfeatures
=>
'requiredFeatures'
,
specularconstant
=>
'specularConstant'
,
specularexponent
=>
'specularExponent'
,
spreadmethod
=>
'spreadMethod'
,
startoffset
=>
'startOffset'
,
stddeviation
=>
'stdDeviation'
,
stitchtiles
=>
'stitchTiles'
,
surfacescale
=>
'surfaceScale'
,
systemlanguage
=>
'systemLanguage'
,
tablevalues
=>
'tableValues'
,
targetx
=>
'targetX'
,
targety
=>
'targetY'
,
textlength
=>
'textLength'
,
viewbox
=>
'viewBox'
,
viewtarget
=>
'viewTarget'
,
xchannelselector
=>
'xChannelSelector'
,
ychannelselector
=>
'yChannelSelector'
,
zoomandpan
=>
'zoomAndPan'
,
};
my
$foreign_attr_xname
= {
'xlink:actuate'
=> [(XLINK_NS), [
'xlink'
,
'actuate'
]],
'xlink:arcrole'
=> [(XLINK_NS), [
'xlink'
,
'arcrole'
]],
'xlink:href'
=> [(XLINK_NS), [
'xlink'
,
'href'
]],
'xlink:role'
=> [(XLINK_NS), [
'xlink'
,
'role'
]],
'xlink:show'
=> [(XLINK_NS), [
'xlink'
,
'show'
]],
'xlink:title'
=> [(XLINK_NS), [
'xlink'
,
'title'
]],
'xlink:type'
=> [(XLINK_NS), [
'xlink'
,
'type'
]],
'xml:base'
=> [(XML_NS), [
'xml'
,
'base'
]],
'xml:lang'
=> [(XML_NS), [
'xml'
,
'lang'
]],
'xml:space'
=> [(XML_NS), [
'xml'
,
'space'
]],
'xmlns'
=> [(XMLNS_NS), [
undef
,
'xmlns'
]],
'xmlns:xlink'
=> [(XMLNS_NS), [
'xmlns'
,
'xlink'
]],
};
sub
parse_byte_string ($$$$;$) {
my
$self
=
shift
;
my
$charset_name
=
shift
;
open
my
$input
,
'<'
,
ref
$_
[0] ?
$_
[0] : \(
$_
[0]);
return
$self
->parse_byte_stream (
$charset_name
,
$input
,
@_
[1..
$#_
]);
}
sub
parse_byte_stream ($$$$;$$) {
my
$self
=
ref
$_
[0] ?
shift
:
shift
->new;
my
$charset_name
=
shift
;
my
$byte_stream
=
$_
[0];
my
$onerror
=
$_
[2] ||
sub
{
my
(
%opt
) =
@_
;
warn
"Parse error ($opt{type})\n"
;
};
$self
->{parse_error} =
$onerror
;
my
$get_wrapper
=
$_
[3] ||
sub
($) {
return
$_
[0];
};
my
$charset
;
my
$buffer
;
my
(
$char_stream
,
$e_status
);
SNIFFING: {
if
(
defined
$charset_name
) {
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
$charset_name
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$byte_stream
,
allow_error_reporting
=> 1,
allow_fallback
=> 1);
if
(
$char_stream
) {
$self
->{confident} = 1;
last
SNIFFING;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'charset:not supported'
,
layer
=>
'encode'
,
line
=> 1,
column
=> 1,
value
=>
$charset_name
,
level
=>
$self
->{level}->{uncertain});
}
}
my
$byte_buffer
=
''
;
for
(1..1024) {
my
$char
=
$byte_stream
->
getc
;
last
unless
defined
$char
;
$byte_buffer
.=
$char
;
}
if
(
$byte_buffer
=~ /^\xFE\xFF/) {
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
'utf-16be'
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$byte_stream
,
allow_error_reporting
=> 1,
allow_fallback
=> 1,
byte_buffer
=> \
$byte_buffer
);
$self
->{confident} = 1;
last
SNIFFING;
}
elsif
(
$byte_buffer
=~ /^\xFF\xFE/) {
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
'utf-16le'
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$byte_stream
,
allow_error_reporting
=> 1,
allow_fallback
=> 1,
byte_buffer
=> \
$byte_buffer
);
$self
->{confident} = 1;
last
SNIFFING;
}
elsif
(
$byte_buffer
=~ /^\xEF\xBB\xBF/) {
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
'utf-8'
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$byte_stream
,
allow_error_reporting
=> 1,
allow_fallback
=> 1,
byte_buffer
=> \
$byte_buffer
);
$self
->{confident} = 1;
last
SNIFFING;
}
$charset_name
= HTML::HTML5::Parser::Charset::UniversalCharDet->detect_byte_string(
$byte_buffer
)
if
$byte_buffer
;
if
(
defined
$charset_name
) {
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
$charset_name
);
$buffer
= HTML::HTML5::Parser::Charset::DecodeHandle::ByteBuffer->new
(
$byte_stream
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$buffer
,
allow_error_reporting
=> 1,
allow_fallback
=> 1,
byte_buffer
=> \
$byte_buffer
);
if
(
$char_stream
) {
$buffer
->{buffer} =
$byte_buffer
;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'sniffing:chardet'
,
text
=>
$charset_name
,
level
=>
$self
->{level}->{info},
layer
=>
'encode'
,
line
=> 1,
column
=> 1);
$self
->{confident} = 0;
last
SNIFFING;
}
}
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
'windows-1252'
);
$buffer
= HTML::HTML5::Parser::Charset::DecodeHandle::ByteBuffer->new
(
$byte_stream
);
(
$char_stream
,
$e_status
)
=
$charset
->get_decode_handle (
$buffer
,
allow_error_reporting
=> 1,
allow_fallback
=> 1,
byte_buffer
=> \
$byte_buffer
);
$buffer
->{buffer} =
$byte_buffer
;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'sniffing:default'
,
text
=>
'windows-1252'
,
level
=>
$self
->{level}->{info},
line
=> 1,
column
=> 1,
layer
=>
'encode'
);
$self
->{confident} = 0;
}
if
(
$e_status
& HTML::HTML5::Parser::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
$self
->{input_encoding} =
$charset
->get_iana_name;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'chardecode:fallback'
,
level
=>
$self
->{level}->{uncertain},
line
=> 1,
column
=> 1,
layer
=>
'encode'
);
}
elsif
(not (
$e_status
&
HTML::HTML5::Parser::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
$self
->{input_encoding} =
$charset
->get_iana_name;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'chardecode:no error'
,
text
=>
$self
->{input_encoding},
level
=>
$self
->{level}->{uncertain},
line
=> 1,
column
=> 1,
layer
=>
'encode'
);
}
else
{
$self
->{input_encoding} =
$charset
->get_iana_name;
}
$self
->{change_encoding} =
sub
{
my
$self
=
shift
;
$charset_name
=
shift
;
my
$token
=
shift
;
my
$orig_char_stream
=
$char_stream
;
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
$charset_name
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$byte_stream
,
allow_error_reporting
=> 1,
allow_fallback
=> 1,
byte_buffer
=> \
$buffer
->{buffer});
if
(
$char_stream
) {
if
(
$charset
->{category} & HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT () or
$charset
->{category} & HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
}
else
{
return
;
}
if
(
defined
$self
->{input_encoding} and
$self
->{input_encoding} eq
$charset_name
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'charset label:matching'
,
text
=>
$charset_name
,
level
=>
$self
->{level}->{info});
$self
->{confident} = 1;
return
;
}
if
(
defined
$self
->{input_encoding} and
HTML::HTML5::Parser::Charset::Info->get_by_html_name (
$self
->{input_encoding})
->{category} & HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
$self
->{confident} = 1;
return
;
}
if
(
$charset
->{category} &
HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
$charset
= HTML::HTML5::Parser::Charset::Info->get_by_html_name (
'utf-8'
);
(
$char_stream
,
$e_status
) =
$charset
->get_decode_handle
(
$byte_stream
,
allow_error_reporting
=> 1,
byte_buffer
=> \
$buffer
->{buffer});
}
$charset_name
=
$charset
->get_iana_name;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'charset label detected'
,
text
=>
$self
->{input_encoding},
value
=>
$charset_name
,
level
=>
$self
->{level}->{
warn
},
token
=>
$token
);
HTML::HTML5::Parser::TagSoupParser::RestartParser->throw;
}
else
{
$char_stream
=
$orig_char_stream
;
}
};
my
%x
= (
level
=>
$self
->{level}{must},
layer
=>
'encode'
,
line
=>
$self
->{line},
column
=>
$self
->{column} + 1,
error
=>
$self
->{parse_error},
);
my
$char_onerror
=
sub
{
my
(
undef
,
$type
,
%opt
) =
@_
;
$x
{error}->(
level
=>
$x
{level},
layer
=>
$x
{layer},
line
=>
$x
{line},
column
=>
$x
{column},
%opt
,
type
=>
$type
);
if
(
$opt
{octets}) {
${
$opt
{octets}} =
"\x{FFFD}"
;
}
};
my
$wrapped_char_stream
=
$get_wrapper
->(
$char_stream
);
$wrapped_char_stream
->onerror (
$char_onerror
);
my
@args
= (
$_
[1],
$_
[2]);
my
$return
;
try
{
$return
=
$self
->parse_char_stream (
$wrapped_char_stream
,
@args
);
}
catch
{
unless
(blessed(
$_
)
and
$_
->isa(
'HTML::HTML5::Parser::TagSoupParser::RestartParser'
))
{
die
$_
;
}
if
(
$e_status
& HTML::HTML5::Parser::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
$self
->{input_encoding} =
$charset
->get_iana_name;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'chardecode:fallback'
,
level
=>
$self
->{level}->{uncertain},
line
=> 1,
column
=> 1,
layer
=>
'encode'
);
}
elsif
(not (
$e_status
&
HTML::HTML5::Parser::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
$self
->{input_encoding} =
$charset
->get_iana_name;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'chardecode:no error'
,
text
=>
$self
->{input_encoding},
level
=>
$self
->{level}->{uncertain},
line
=> 1,
column
=> 1,
layer
=>
'encode'
);
}
else
{
$self
->{input_encoding} =
$charset
->get_iana_name;
}
$self
->{confident} = 1;
$wrapped_char_stream
=
$get_wrapper
->(
$char_stream
);
$wrapped_char_stream
->onerror (
$char_onerror
);
$return
=
$self
->parse_char_stream (
$wrapped_char_stream
,
@args
);
};
$self
->_data(
$return
,
charset
=>
$charset_name
);
return
$return
;
}
sub
parse_char_string ($$$;$$) {
my
$self
=
shift
;
my
$s
=
ref
$_
[0] ?
$_
[0] : \(
$_
[0]);
my
$input
= HTML::HTML5::Parser::Charset::DecodeHandle::CharString->new (
$s
);
return
$self
->parse_char_stream (
$input
,
@_
[1..
$#_
]);
}
*parse_string
= \
&parse_char_string
;
sub
parse_char_stream ($$$;$$) {
my
$self
=
ref
$_
[0] ?
shift
:
shift
->new;
my
$input
=
$_
[0];
my
$doc
=
$self
->{document} =
$_
[1];
$self
->{document}->removeChildNodes;
$self
->{confident} = 1
unless
exists
$self
->{confident};
$self
->{document}->setEncoding(
$self
->{input_encoding})
if
defined
$self
->{input_encoding};
$self
->{line_prev} =
$self
->{line} = 1;
$self
->{column_prev} = -1;
$self
->{column} = 0;
$self
->{set_nc} =
sub
{
my
$self
=
shift
;
my
$char
=
''
;
if
(
defined
$self
->{next_nc}) {
$char
=
$self
->{next_nc};
delete
$self
->{next_nc};
$self
->{nc} =
ord
$char
;
}
else
{
$self
->{char_buffer} =
''
;
$self
->{char_buffer_pos} = 0;
my
$count
=
$input
->manakai_read_until
(
$self
->{char_buffer},
qr/[^\x0A\x0D]/
,
$self
->{char_buffer_pos});
if
(
$count
) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
return
;
}
if
(
$input
->
read
(
$char
, 1)) {
$self
->{nc} =
ord
$char
;
}
else
{
$self
->{nc} = -1;
return
;
}
}
(
$self
->{line_prev},
$self
->{column_prev})
= (
$self
->{line},
$self
->{column});
$self
->{column}++;
if
(
$self
->{nc} == 0x000A) {
$self
->{line}++;
$self
->{column} = 0;
}
elsif
(
$self
->{nc} == 0x000D) {
my
$next
=
''
;
if
(
$input
->
read
(
$next
, 1) and
$next
ne
"\x0A"
) {
$self
->{next_nc} =
$next
;
}
$self
->{nc} = 0x000A;
$self
->{line}++;
$self
->{column} = 0;
}
};
$self
->{read_until} =
sub
{
return
0
if
defined
$self
->{next_nc};
my
$pattern
=
qr/[^$_[1]\x0A\x0D]/
;
my
$offset
=
$_
[2] || 0;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
pos
(
$self
->{char_buffer}) =
$self
->{char_buffer_pos};
if
(
$self
->{char_buffer} =~ /\G(?>
$pattern
)+/) {
substr
(
$_
[0],
$offset
)
=
substr
(
$self
->{char_buffer}, $-[0], $+[0] - $-[0]);
my
$count
= $+[0] - $-[0];
if
(
$count
) {
$self
->{column} +=
$count
;
$self
->{char_buffer_pos} +=
$count
;
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column} - 1;
$self
->{nc} = -1;
}
return
$count
;
}
else
{
return
0;
}
}
else
{
my
$count
=
$input
->manakai_read_until (
$_
[0],
$pattern
,
$_
[2]);
if
(
$count
) {
$self
->{column} +=
$count
;
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column} - 1;
$self
->{nc} = -1;
}
return
$count
;
}
};
my
$onerror
=
$_
[2] ||
sub
{
my
(
%opt
) =
@_
;
my
$line
=
$opt
{token} ?
$opt
{token}->{line} :
$opt
{line};
my
$column
=
$opt
{token} ?
$opt
{token}->{column} :
$opt
{column};
warn
"Parse error ($opt{type}) at line $line column $column\n"
;
};
$self
->{parse_error} =
sub
{
$onerror
->(
line
=>
$self
->{line},
column
=>
$self
->{column},
@_
);
};
my
$char_onerror
=
sub
{
my
(
undef
,
$type
,
%opt
) =
@_
;
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
layer
=>
'encode'
,
line
=>
$self
->{line},
column
=>
$self
->{column} + 1,
%opt
,
type
=>
$type
);
};
if
(
$_
[3]) {
$input
=
$_
[3]->(
$input
);
$input
->onerror (
$char_onerror
);
}
else
{
$input
->onerror (
$char_onerror
)
unless
defined
$input
->onerror;
}
$self
->_initialize_tokenizer;
$self
->_initialize_tree_constructor;
$self
->_construct_tree;
$self
->_terminate_tree_constructor;
delete
$self
->{set_nc};
delete
$self
->{read_until};
delete
$self
->{parse_error};
delete
$self
->{document};
return
$doc
;
}
sub
new ($;@) {
my
$class
=
shift
;
my
%p
=
@_
;
my
$self
=
bless
{
level
=> {
must
=>
'm'
,
should
=>
's'
,
obsconforming
=>
's'
,
warn
=>
'w'
,
info
=>
'i'
,
uncertain
=>
'u'
,
},
_debug_cache
=>
$p
{no_cache} ? {} :
$DATA
,
},
$class
;
$self
->{set_nc} =
sub
{
$self
->{nc} = -1;
};
$self
->{parse_error} =
sub
{
};
$self
->{change_encoding} =
sub
{
};
$self
->{application_cache_selection} =
sub
{
};
return
$self
;
}
sub
AFTER_HTML_IMS () { 0b100 }
sub
HEAD_IMS () { 0b1000 }
sub
BODY_IMS () { 0b10000 }
sub
BODY_TABLE_IMS () { 0b100000 }
sub
TABLE_IMS () { 0b1000000 }
sub
ROW_IMS () { 0b10000000 }
sub
BODY_AFTER_IMS () { 0b100000000 }
sub
FRAME_IMS () { 0b1000000000 }
sub
SELECT_IMS () { 0b10000000000 }
sub
IN_CDATA_RCDATA_IM () { 0b1000000000000 }
sub
IM_MASK () { 0b11111111111 }
sub
AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
sub
AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
sub
IN_HEAD_IM () { HEAD_IMS | 0b00 }
sub
IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
sub
AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
sub
BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
sub
IN_BODY_IM () { BODY_IMS }
sub
IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
sub
IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
sub
IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
sub
IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
sub
IN_TABLE_IM () { TABLE_IMS }
sub
AFTER_BODY_IM () { BODY_AFTER_IMS }
sub
IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
sub
AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
sub
IN_SELECT_IM () { SELECT_IMS | 0b01 }
sub
IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
sub
IN_COLUMN_GROUP_IM () { 0b10 }
sub
_initialize_tree_constructor ($) {
my
$self
=
shift
;
$self
->_data(
$self
->{document})->{strict_error_checking} = 0;
$self
->_data(
$self
->{document})->{manakai_is_html} = 1;
$self
->_data(
$self
->{document})->{manakai_source_line} = 1;
$self
->_data(
$self
->{document})->{manakai_source_column} = 1;
$self
->{frameset_ok} = 1;
}
sub
_terminate_tree_constructor ($) {
my
$self
=
shift
;
$self
->_data(
$self
->{document},
strict_error_checking
=> 1);
}
{
my
$token
;
sub
_construct_tree ($) {
my
(
$self
) =
@_
;
$self
->{insertion_mode} = 0;
$token
=
$self
->_get_next_token;
undef
$self
->{form_element};
undef
$self
->{head_element};
$self
->{open_elements} = [];
undef
$self
->{inner_html_node};
undef
$self
->{ignore_newline};
$self
->_tree_construction_initial;
$self
->_tree_construction_root_element;
$self
->{insertion_mode} = BEFORE_HEAD_IM;
$self
->_tree_construction_main;
}
sub
_tree_construction_initial ($) {
my
$self
=
shift
;
INITIAL: {
if
(
$token
->{type} == DOCTYPE_TOKEN) {
my
$doctype_name
=
$token
->{name};
$doctype_name
=
''
unless
defined
$doctype_name
;
if
(
$doctype_name
ne
'html'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not HTML5'
,
token
=>
$token
);
}
elsif
(
defined
$token
->{pubid}) {
my
$xsysid
= {
}->{
$token
->{pubid}};
if
(
defined
$xsysid
and
(not
defined
$token
->{sysid} or
$token
->{sysid} eq
$xsysid
)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'obs DOCTYPE'
,
token
=>
$token
,
level
=>
$self
->{level}->{obsconforming});
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not HTML5'
,
token
=>
$token
);
}
}
elsif
(
defined
$token
->{sysid}) {
if
(
$token
->{sysid} eq
'about:legacy-compat'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'XSLT-compat'
,
token
=>
$token
,
level
=>
$self
->{level}->{should});
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not HTML5'
,
token
=>
$token
);
}
}
else
{
}
$self
->_data(
$self
->{
'document'
},
'DTD_PUBLIC_ID'
,
$token
->{pubid});
$self
->_data(
$self
->{
'document'
},
'DTD_SYSTEM_ID'
,
$token
->{sysid});
$self
->_data(
$self
->{
'document'
},
'DTD_ELEMENT'
, (
defined
$token
->{name}?
$token
->{name}:
''
));
$self
->_data(
$self
->{
'document'
},
'DTD_COLUMN'
,
$token
->{column});
$self
->_data(
$self
->{
'document'
},
'DTD_LINE'
,
$token
->{line});
$self
->_data(
$self
->{
'document'
},
isHTML4
=> 1)
if
((
$token
->{pubid}||
''
) =~ /html 4/i or (
$token
->{sysid}||
''
) =~ /html4/i);
if
(
$token
->{quirks} or
$doctype_name
ne
'html'
) {
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'quirks'
;
}
elsif
(
defined
$token
->{pubid}) {
my
$pubid
=
$token
->{pubid};
$pubid
=~
tr
/a-z/A-Z/;
my
$prefix
= [
"+//SILMARIL//DTD HTML PRO V0R11 19970101//"
,
"-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//"
,
"-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//"
,
"-//IETF//DTD HTML 2.0 LEVEL 1//"
,
"-//IETF//DTD HTML 2.0 LEVEL 2//"
,
"-//IETF//DTD HTML 2.0 STRICT LEVEL 1//"
,
"-//IETF//DTD HTML 2.0 STRICT LEVEL 2//"
,
"-//IETF//DTD HTML 2.0 STRICT//"
,
"-//IETF//DTD HTML 2.0//"
,
"-//IETF//DTD HTML 2.1E//"
,
"-//IETF//DTD HTML 3.0//"
,
"-//IETF//DTD HTML 3.2 FINAL//"
,
"-//IETF//DTD HTML 3.2//"
,
"-//IETF//DTD HTML 3//"
,
"-//IETF//DTD HTML LEVEL 0//"
,
"-//IETF//DTD HTML LEVEL 1//"
,
"-//IETF//DTD HTML LEVEL 2//"
,
"-//IETF//DTD HTML LEVEL 3//"
,
"-//IETF//DTD HTML STRICT LEVEL 0//"
,
"-//IETF//DTD HTML STRICT LEVEL 1//"
,
"-//IETF//DTD HTML STRICT LEVEL 2//"
,
"-//IETF//DTD HTML STRICT LEVEL 3//"
,
"-//IETF//DTD HTML STRICT//"
,
"-//IETF//DTD HTML//"
,
"-//METRIUS//DTD METRIUS PRESENTATIONAL//"
,
"-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//"
,
"-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//"
,
"-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//"
,
"-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//"
,
"-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//"
,
"-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//"
,
"-//NETSCAPE COMM. CORP.//DTD HTML//"
,
"-//NETSCAPE COMM. CORP.//DTD STRICT HTML//"
,
"-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//"
,
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//"
,
"-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//"
,
"-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//"
,
"-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//"
,
"-//SPYGLASS//DTD HTML 2.0 EXTENDED//"
,
"-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//"
,
"-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//"
,
"-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//"
,
"-//W3C//DTD HTML 3 1995-03-24//"
,
"-//W3C//DTD HTML 3.2 DRAFT//"
,
"-//W3C//DTD HTML 3.2 FINAL//"
,
"-//W3C//DTD HTML 3.2//"
,
"-//W3C//DTD HTML 3.2S DRAFT//"
,
"-//W3C//DTD HTML 4.0 FRAMESET//"
,
"-//W3C//DTD HTML 4.0 TRANSITIONAL//"
,
"-//W3C//DTD HTML EXPERIMETNAL 19960712//"
,
"-//W3C//DTD HTML EXPERIMENTAL 970421//"
,
"-//W3C//DTD W3 HTML//"
,
"-//W3O//DTD W3 HTML 3.0//"
,
"-//WEBTECHS//DTD MOZILLA HTML 2.0//"
,
"-//WEBTECHS//DTD MOZILLA HTML//"
,
];
my
$match
;
for
(
@$prefix
) {
if
(
substr
(
$prefix
, 0,
length
$_
) eq
$_
) {
$match
= 1;
last
;
}
}
if
(
$match
or
$pubid
eq
"-//W3O//DTD W3 HTML STRICT 3.0//EN//"
or
$pubid
eq
"-/W3C/DTD HTML 4.0 TRANSITIONAL/EN"
or
$pubid
eq
"HTML"
) {
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'quirks'
;
}
elsif
(
$pubid
=~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
$pubid
=~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
if
(
defined
$token
->{sysid}) {
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'quirks'
;
}
else
{
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'limited quirks'
;
}
}
elsif
(
$pubid
=~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
$pubid
=~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'limited quirks'
;
}
else
{
}
}
else
{
}
if
(
defined
$token
->{sysid}) {
my
$sysid
=
$token
->{sysid};
$sysid
=~
tr
/A-Z/a-z/;
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'quirks'
;
}
else
{
}
}
else
{
}
$token
=
$self
->_get_next_token;
return
;
}
elsif
({
START_TAG_TOKEN, 1,
END_TAG_TOKEN, 1,
END_OF_FILE_TOKEN, 1,
}->{
$token
->{type}}) {
unless
(
$self
->_data(
$self
->{
'document'
},
'manakai_is_srcdoc'
))
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no DOCTYPE'
,
token
=>
$token
);
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'quirks'
;
}
return
;
}
elsif
(
$token
->{type} == CHARACTER_TOKEN) {
if
(
$token
->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
redo
INITIAL;
}
else
{
}
}
else
{
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no DOCTYPE'
,
token
=>
$token
);
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
} =
'quirks'
;
return
;
}
elsif
(
$token
->{type} == COMMENT_TOKEN) {
my
$comment
=
$self
->{document}->createComment(
$token
->{data});
$self
->_data(
$comment
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$comment
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{document}->appendChild(
$comment
);
$token
=
$self
->_get_next_token;
redo
INITIAL;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
die
"$0: _tree_construction_initial: This should be never reached"
;
}
sub
_tree_construction_root_element ($) {
my
$self
=
shift
;
B: {
if
(
$token
->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in html:#DOCTYPE'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
redo
B;
}
elsif
(
$token
->{type} == COMMENT_TOKEN) {
my
$comment
=
$self
->{document}->createComment(
$token
->{data});
$self
->_data(
$comment
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$comment
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{document}->appendChild(
$comment
);
$token
=
$self
->_get_next_token;
redo
B;
}
elsif
(
$token
->{type} == CHARACTER_TOKEN) {
if
(
$token
->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
redo
B;
}
else
{
}
}
else
{
}
$self
->{application_cache_selection}->(
undef
);
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'html'
) {
my
$root_element
;
$root_element
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
next
unless
$attr
;
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$root_element
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$root_element
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$root_element
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{document}->setDocumentElement(
$root_element
);
push
@{
$self
->{open_elements}},
[
$root_element
,
$el_category
->{html}];
if
(
$token
->{attributes}->{manifest}) {
$self
->{application_cache_selection}
->(
$token
->{attributes}->{manifest}->{value});
}
else
{
$self
->{application_cache_selection}->(
undef
);
}
$token
=
$self
->_get_next_token;
return
;
}
else
{
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
({
head
=> 1,
body
=> 1,
html
=> 1,
br
=> 1,
}->{
$token
->{tag_name}}) {
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
redo
B;
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
my
$root_element
;
$root_element
=
$self
->{document}->createElementNS((HTML_NS),
'html'
);
$self
->_data(
$root_element
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$root_element
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$root_element
,
implied
=> __LINE__);
$self
->{document}->setDocumentElement(
$root_element
);
push
@{
$self
->{open_elements}}, [
$root_element
,
$el_category
->{html}];
$self
->{application_cache_selection}->(
undef
);
return
;
}
die
"$0: _tree_construction_root_element: This should never be reached"
;
}
sub
_reset_insertion_mode ($) {
my
$self
=
shift
;
my
$last
;
my
$i
= -1;
my
$node
=
$self
->{open_elements}->[
$i
];
LOOP: {
if
(
$self
->{open_elements}->[0]->[0] eq
$node
->[0]) {
$last
= 1;
if
(
defined
$self
->{inner_html_node}) {
$node
=
$self
->{inner_html_node};
}
else
{
die
"_reset_insertion_mode: t27"
;
}
}
my
$new_mode
;
if
(
$node
->[1] == TABLE_CELL_EL) {
if
(
$last
) {
}
else
{
$new_mode
= IN_CELL_IM;
}
}
elsif
(
$node
->[1] & FOREIGN_EL) {
}
else
{
$new_mode
= {
select
=> IN_SELECT_IM,
tr
=> IN_ROW_IM,
tbody
=> IN_TABLE_BODY_IM,
thead
=> IN_TABLE_BODY_IM,
tfoot
=> IN_TABLE_BODY_IM,
caption
=> IN_CAPTION_IM,
colgroup
=> IN_COLUMN_GROUP_IM,
table
=> IN_TABLE_IM,
head
=> IN_BODY_IM,
body
=> IN_BODY_IM,
frameset
=> IN_FRAMESET_IM,
}->{
$node
->[0]->tagName};
}
$self
->{insertion_mode} =
$new_mode
and
last
LOOP
if
defined
$new_mode
;
if
(
$node
->[1] == HTML_EL) {
$self
->{insertion_mode} = BEFORE_HEAD_IM;
last
LOOP;
}
else
{
}
if
(
$last
)
{
$self
->{insertion_mode} = IN_BODY_IM;
last
LOOP;
}
$i
--;
$node
=
$self
->{open_elements}->[
$i
];
redo
LOOP;
}
}
my
$parse_rcdata
=
sub
($$$$) {
my
(
$self
,
$insert
,
$open_tables
,
$parse_refs
) =
@_
;
my
$start_tag_name
=
$token
->{tag_name};
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
if
(
$parse_refs
) {
$self
->{state} = RCDATA_STATE;
}
else
{
$self
->{state} = RAWTEXT_STATE;
}
delete
$self
->{escape};
$self
->{insertion_mode} |= IN_CDATA_RCDATA_IM;
$token
=
$self
->_get_next_token;
};
my
$script_start_tag
=
sub
($$$) {
my
(
$self
,
$insert
,
$open_tables
) =
@_
;
my
$script_el
;
$script_el
=
$self
->{document}->createElementNS((HTML_NS),
'script'
);
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$script_el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$script_el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$script_el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$script_el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$script_el
,
$el_category
->{script}];
$self
->{state} = SCRIPT_DATA_STATE;
delete
$self
->{escape};
$self
->{insertion_mode} |= IN_CDATA_RCDATA_IM;
$token
=
$self
->_get_next_token;
};
sub
push_afe ($$)
{
my
(
$item
=>
$afes
) =
@_
;
my
$item_token
=
$item
->[2];
my
$depth
= 0;
OUTER:
for
my
$i
(
reverse
0..
$#$afes
)
{
my
$afe
=
$afes
->[
$i
];
if
(
$afe
->[0] eq
'#marker'
)
{
last
OUTER;
}
else
{
my
$token
=
$afe
->[2];
if
(
$token
->{tag_name} eq
$item_token
->{tag_name})
{
if
((
keys
%{
$token
->{attributes}}) !=
(
keys
%{
$item_token
->{attributes}}))
{
next
OUTER;
}
for
my
$attr_name
(
keys
%{
$item_token
->{attributes}})
{
next
OUTER
unless
$token
->{attributes}->{
$attr_name
};
next
OUTER
unless
$token
->{attributes}->{
$attr_name
}->{value} eq
$item_token
->{attributes}->{
$attr_name
}->{value};
}
$depth
++;
if
(
$depth
== 3)
{
splice
@$afes
,
$i
,
1
=> ();
last
OUTER;
}
}
}
}
push
@$afes
,
$item
;
}
my
$formatting_end_tag
=
sub
{
my
(
$self
,
$active_formatting_elements
,
$open_tables
,
$end_tag_token
) =
@_
;
my
$tag_name
=
$end_tag_token
->{tag_name};
my
$outer_loop_counter
= 0;
OUTER: {
if
(
$outer_loop_counter
>= 8)
{
$token
=
$self
->_get_next_token;
last
OUTER;
}
$outer_loop_counter
++;
my
$formatting_element
;
my
$formatting_element_i_in_active
;
AFE:
for
(
reverse
0..
$#$active_formatting_elements
) {
if
(
$active_formatting_elements
->[
$_
]->[0] eq
'#marker'
) {
last
AFE;
}
elsif
(
$active_formatting_elements
->[
$_
]->[0]->tagName
eq
$tag_name
) {
$formatting_element
=
$active_formatting_elements
->[
$_
];
$formatting_element_i_in_active
=
$_
;
last
AFE;
}
}
unless
(
defined
$formatting_element
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$tag_name
,
token
=>
$end_tag_token
);
$token
=
$self
->_get_next_token;
return
;
}
my
$in_scope
= 1;
my
$formatting_element_i_in_open
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0] eq
$formatting_element
->[0]) {
if
(
$in_scope
) {
$formatting_element_i_in_open
=
$_
;
last
INSCOPE;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$end_tag_token
);
$token
=
$self
->_get_next_token;
return
;
}
}
elsif
(
$node
->[1] & SCOPING_EL) {
$in_scope
= 0;
}
}
unless
(
defined
$formatting_element_i_in_open
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$end_tag_token
);
pop
@$active_formatting_elements
;
$token
=
$self
->_get_next_token;
return
;
}
if
(not
$self
->{open_elements}->[-1]->[0] eq
$formatting_element
->[0]) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$end_tag_token
);
}
my
$furthest_block
;
my
$furthest_block_i_in_open
;
OE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] & SPECIAL_EL or
$node
->[1] & SCOPING_EL) {
$furthest_block
=
$node
;
$furthest_block_i_in_open
=
$_
;
}
elsif
(
$node
->[0] eq
$formatting_element
->[0]) {
last
OE;
}
}
unless
(
defined
$furthest_block
) {
splice
@{
$self
->{open_elements}},
$formatting_element_i_in_open
;
splice
@$active_formatting_elements
,
$formatting_element_i_in_active
, 1;
$token
=
$self
->_get_next_token;
return
;
}
my
$common_ancestor_node
=
$self
->{open_elements}->[
$formatting_element_i_in_open
- 1];
my
$bookmark_prev_el
=
$active_formatting_elements
->[
$formatting_element_i_in_active
- 1]
->[0];
my
$node
=
$furthest_block
;
my
$node_i_in_open
=
$furthest_block_i_in_open
;
my
$last_node
=
$furthest_block
;
my
$inner_loop_counter
= 0;
INNER: {
if
(
$inner_loop_counter
>= 3) {
$token
=
$self
->_get_next_token;
last
OUTER;
}
$inner_loop_counter
++;
$node_i_in_open
--;
$node
=
$self
->{open_elements}->[
$node_i_in_open
];
my
$node_i_in_active
;
my
$node_token
;
S7S2: {
for
(
reverse
0..
$#$active_formatting_elements
) {
if
(
$active_formatting_elements
->[
$_
]->[0] eq
$node
->[0]) {
$node_i_in_active
=
$_
;
$node_token
=
$active_formatting_elements
->[
$_
]->[2];
last
S7S2;
}
}
splice
@{
$self
->{open_elements}},
$node_i_in_open
, 1;
redo
INNER;
}
last
INNER
if
$node
->[0] eq
$formatting_element
->[0];
if
(
$node
->[0]->hasChildNodes ()) {
my
$new_element
= [];
$new_element
->[0] =
$self
->{document}->createElementNS((HTML_NS),
$node_token
->{tag_name});
for
my
$attr_name
(
keys
%{
$node_token
->{attributes}}) {
my
$attr_t
=
$node_token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$new_element
->[0]->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$new_element
->[0],
manakai_source_line
=>
$node_token
->{line})
if
defined
$node_token
->{line};
$self
->_data(
$new_element
->[0],
manakai_source_column
=>
$node_token
->{column})
if
defined
$node_token
->{column};
$new_element
->[1] =
$node
->[1];
$new_element
->[2] =
$node_token
;
$active_formatting_elements
->[
$node_i_in_active
] =
$new_element
;
$self
->{open_elements}->[
$node_i_in_open
] =
$new_element
;
$node
=
$new_element
;
}
if
(
$last_node
->[0] eq
$furthest_block
->[0]) {
$bookmark_prev_el
=
$node
->[0];
}
$node
->[0]->appendChild (
$last_node
->[0]);
$last_node
=
$node
;
redo
INNER;
}
if
(
$common_ancestor_node
->[1] & TABLE_ROWS_EL) {
my
$foster_parent_element
;
my
$next_sibling
;
OE:
for
(
reverse
0..$
if
(
$self
->{open_elements}->[
$_
]->[1] == TABLE_EL) {
$foster_parent_element
=
$self
->{open_elements}->[
$_
- 1]->[0];
$next_sibling
=
$self
->{open_elements}->[
$_
]->[0];
undef
$next_sibling
unless
$next_sibling
->parentNode eq
$foster_parent_element
;
last
OE;
}
}
$foster_parent_element
||=
$self
->{open_elements}->[0]->[0];
$foster_parent_element
->insertBefore (
$last_node
->[0],
$next_sibling
);
$open_tables
->[-1]->[1] = 1;
}
else
{
$common_ancestor_node
->[0]->appendChild (
$last_node
->[0]);
}
my
$new_element
= [];
$new_element
->[0] =
$self
->{document}->createElementNS((HTML_NS),
$formatting_element
->[2]->{tag_name});
for
my
$attr_name
(
keys
%{
$formatting_element
->[2]->{attributes}}) {
my
$attr_t
=
$formatting_element
->[2]->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$new_element
->[0]->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$new_element
->[0],
manakai_source_line
=>
$formatting_element
->[2]->{line})
if
defined
$formatting_element
->[2]->{line};
$self
->_data(
$new_element
->[0],
manakai_source_column
=>
$formatting_element
->[2]->{column})
if
defined
$formatting_element
->[2]->{column};
$new_element
->[1] =
$formatting_element
->[1];
$new_element
->[2] =
$formatting_element
->[2];
my
@cn
=
$furthest_block
->[0]->childNodes;
$new_element
->[0]->appendChild(
$_
)
for
@cn
;
$furthest_block
->[0]->appendChild (
$new_element
->[0]);
my
$i
;
AFE:
for
(
reverse
0..
$#$active_formatting_elements
) {
if
(
$active_formatting_elements
->[
$_
]->[0] eq
$formatting_element
->[0]) {
splice
@$active_formatting_elements
,
$_
, 1;
$i
-- and
last
AFE
if
defined
$i
;
}
elsif
(
$active_formatting_elements
->[
$_
]->[0] eq
$bookmark_prev_el
) {
$i
=
$_
;
}
}
splice
@$active_formatting_elements
, (
defined
$i
?
$i
: 0) + 1, 0,
$new_element
;
undef
$i
;
OE:
for
(
reverse
0..$
if
(
$self
->{open_elements}->[
$_
]->[0] eq
$formatting_element
->[0]) {
splice
@{
$self
->{open_elements}},
$_
, 1;
$i
-- and
last
OE
if
defined
$i
;
}
elsif
(
$self
->{open_elements}->[
$_
]->[0] eq
$furthest_block
->[0]) {
$i
=
$_
;
}
}
splice
@{
$self
->{open_elements}},
$i
+ 1, 0,
$new_element
;
redo
OUTER;
}
};
my
$reconstruct_active_formatting_elements
=
sub
($$$$) {
my
(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
) =
@_
;
return
unless
@$active_formatting_elements
;
my
$i
= -1;
my
$entry
=
$active_formatting_elements
->[
$i
];
return
if
$entry
->[0] eq
'#marker'
;
for
(@{
$self
->{open_elements}}) {
if
(
$entry
->[0] eq
$_
->[0]) {
return
;
}
}
S4: {
last
S4
if
$active_formatting_elements
->[0]->[0] eq
$entry
->[0];
$i
--;
$entry
=
$active_formatting_elements
->[
$i
];
if
(
$entry
->[0] eq
'#marker'
) {
}
else
{
my
$in_open_elements
;
OE:
for
(@{
$self
->{open_elements}}) {
if
(
$entry
->[0] eq
$_
->[0]) {
$in_open_elements
= 1;
last
OE;
}
}
if
(
$in_open_elements
) {
}
else
{
redo
S4;
}
}
$i
++;
$entry
=
$active_formatting_elements
->[
$i
];
}
S7: {
my
$clone
= [
$entry
->[0]->cloneNode(0),
$entry
->[1],
$entry
->[2]];
$insert
->(
$self
,
$clone
->[0],
$open_tables
);
push
@{
$self
->{open_elements}},
$clone
;
$active_formatting_elements
->[
$i
] =
$self
->{open_elements}->[-1];
unless
(
$clone
->[0] eq
$active_formatting_elements
->[-1]->[0]) {
$i
++;
$entry
=
$active_formatting_elements
->[
$i
];
redo
S7;
}
}
};
my
$clear_up_to_marker
=
sub
($) {
my
$active_formatting_elements
=
$_
[0];
for
(
reverse
0..
$#$active_formatting_elements
) {
if
(
$active_formatting_elements
->[
$_
]->[0] eq
'#marker'
) {
splice
@$active_formatting_elements
,
$_
;
return
;
}
}
};
my
$insert_to_current
=
sub
{
$_
[0]->{open_elements}->[-1]->[0]->appendChild (
$_
[1]);
};
my
$insert_to_foster
=
sub
{
my
(
$self
,
$child
,
$open_tables
) =
@_
;
if
(
$self
->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
my
$foster_parent_element
;
my
$next_sibling
;
OE:
for
(
reverse
0..$
if
(
$self
->{open_elements}->[
$_
]->[1] == TABLE_EL) {
$foster_parent_element
=
$self
->{open_elements}->[
$_
- 1]->[0];
$next_sibling
=
$self
->{open_elements}->[
$_
]->[0];
undef
$next_sibling
unless
$next_sibling
->parentNode eq
$foster_parent_element
;
last
OE;
}
}
$foster_parent_element
||=
$self
->{open_elements}->[0]->[0];
if
(
$next_sibling
)
{
$foster_parent_element
->insertBefore (
$child
,
$next_sibling
);
}
else
{
$foster_parent_element
->appendChild(
$child
);
}
$open_tables
->[-1]->[1] = 1;
}
else
{
$self
->{open_elements}->[-1]->[0]->appendChild (
$child
);
}
};
sub
_tree_construction_main ($) {
my
$self
=
shift
;
my
$active_formatting_elements
= [];
my
$insert
;
my
$open_tables
= [[
$self
->{open_elements}->[0]->[0]]];
$insert
=
$insert_to_current
;
B:
while
(1) {
if
(
$token
->{n}++ == 100) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'parser impl error'
,
token
=>
$token
);
warn
"====== HTML Parser Error ======\n"
;
warn
join
(
' '
,
map
{
$_
->[0]->tagName } @{
$self
->{open_elements}}) .
' #'
.
$self
->{insertion_mode} .
"\n"
;
warn
Data::Dumper::Dumper (
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
if
(
(not @{
$self
->{open_elements}}) or
(not
$self
->{open_elements}->[-1]->[1] & FOREIGN_EL) or
(
$self
->{open_elements}->[-1]->[1] == MML_TEXT_INTEGRATION_EL and
((
$token
->{type} == START_TAG_TOKEN and
$token
->{tag_name} ne
'mglyph'
and
$token
->{tag_name} ne
'malignmark'
) or
$token
->{type} == CHARACTER_TOKEN)) or
(
$self
->{open_elements}->[-1]->[1] & MML_AXML_EL and
$token
->{type} == START_TAG_TOKEN and
$token
->{tag_name} eq
'svg'
) or
(
$self
->{open_elements}->[-1]->[1] == SVG_INTEGRATION_EL and
(
$token
->{type} == START_TAG_TOKEN or
$token
->{type} == CHARACTER_TOKEN)) or
(
$self
->{open_elements}->[-1]->[1] == MML_AXML_EL and
(
$token
->{type} == START_TAG_TOKEN or
$token
->{type} == CHARACTER_TOKEN) and
do
{
my
$encoding
=
$self
->{open_elements}->[-1]->[0]->getAttributeNS(
undef
,
'encoding'
) ||
''
;
$encoding
=~
tr
/A-Z/a-z/;
if
(
$encoding
eq
'text/html'
or
$encoding
eq
'application/xhtml+xml'
) {
1;
}
else
{
0;
}
}) or
(
$token
->{type} == END_OF_FILE_TOKEN)) {
}
else
{
if
(
$token
->{type} == CHARACTER_TOKEN) {
my
$data
=
$token
->{data};
while
(
$data
=~ s/\x00/\x{FFFD}/) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
,
token
=>
$token
);
}
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
,
$data
,
$token
);
if
(
$data
=~ /[^\x09\x0A\x0C\x0D\x20]/) {
delete
$self
->{frameset_ok};
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
{
b
=> 1,
big
=> 1,
blockquote
=> 1,
body
=> 1,
br
=> 1,
center
=> 1,
code
=> 1,
dd
=> 1,
div
=> 1,
dl
=> 1,
dt
=> 1,
em
=> 1,
embed
=> 1,
h1
=> 1,
h2
=> 1,
h3
=> 1,
h4
=> 1,
h5
=> 1,
h6
=> 1,
head
=> 1,
hr
=> 1,
i
=> 1,
img
=> 1,
li
=> 1,
listing
=> 1,
menu
=> 1,
meta
=> 1,
nobr
=> 1,
ol
=> 1,
p
=> 1,
pre
=> 1,
ruby
=> 1,
s
=> 1,
small
=> 1,
span
=> 1,
strong
=> 1,
strike
=> 1,
sub
=> 1,
sup
=> 1,
table
=> 1,
tt
=> 1,
u
=> 1,
ul
=> 1,
var
=> 1,
}->{
$token
->{tag_name}} or
(
$token
->{tag_name} eq
'font'
and
(
$token
->{attributes}->{color} or
$token
->{attributes}->{face} or
$token
->{attributes}->{size}))
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->localname,
token
=>
$token
);
pop
@{
$self
->{open_elements}};
V: {
my
$current_node
=
$self
->{open_elements}->[-1];
if
(
not
$current_node
->[1] & FOREIGN_EL or
$current_node
->[1] == MML_TEXT_INTEGRATION_EL or
$current_node
->[1] == SVG_INTEGRATION_EL or
(
$current_node
->[1] == MML_AXML_EL and
do
{
my
$encoding
=
$current_node
->[0]->getAttributeNS(
undef
,
'encoding'
) ||
''
;
$encoding
=~
tr
/A-Z/a-z/;
(
$encoding
eq
'text/html'
or
$encoding
eq
'application/xhtml+xml'
);
})
) {
last
V;
}
pop
@{
$self
->{open_elements}};
redo
V;
}
next
B;
}
else
{
my
$nsuri
=
$self
->{open_elements}->[-1]->[0]->namespaceURI;
my
$tag_name
=
$token
->{tag_name};
if
(
$nsuri
eq (SVG_NS)) {
$tag_name
= {
altglyph
=>
'altGlyph'
,
altglyphdef
=>
'altGlyphDef'
,
altglyphitem
=>
'altGlyphItem'
,
animatecolor
=>
'animateColor'
,
animatemotion
=>
'animateMotion'
,
animatetransform
=>
'animateTransform'
,
clippath
=>
'clipPath'
,
feblend
=>
'feBlend'
,
fecolormatrix
=>
'feColorMatrix'
,
fecomponenttransfer
=>
'feComponentTransfer'
,
fecomposite
=>
'feComposite'
,
feconvolvematrix
=>
'feConvolveMatrix'
,
fediffuselighting
=>
'feDiffuseLighting'
,
fedisplacementmap
=>
'feDisplacementMap'
,
fedistantlight
=>
'feDistantLight'
,
feflood
=>
'feFlood'
,
fefunca
=>
'feFuncA'
,
fefuncb
=>
'feFuncB'
,
fefuncg
=>
'feFuncG'
,
fefuncr
=>
'feFuncR'
,
fegaussianblur
=>
'feGaussianBlur'
,
feimage
=>
'feImage'
,
femerge
=>
'feMerge'
,
femergenode
=>
'feMergeNode'
,
femorphology
=>
'feMorphology'
,
feoffset
=>
'feOffset'
,
fepointlight
=>
'fePointLight'
,
fespecularlighting
=>
'feSpecularLighting'
,
fespotlight
=>
'feSpotLight'
,
fetile
=>
'feTile'
,
feturbulence
=>
'feTurbulence'
,
foreignobject
=>
'foreignObject'
,
glyphref
=>
'glyphRef'
,
lineargradient
=>
'linearGradient'
,
radialgradient
=>
'radialGradient'
,
textpath
=>
'textPath'
,
}->{
$tag_name
} ||
$tag_name
;
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS(
$nsuri
,
$tag_name
);
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
;
if
(
defined
$foreign_attr_xname
->{
$attr_name
})
{
my
$xmlnsuri
=
$foreign_attr_xname
->{
$attr_name
}->[0];
my
$qname
=
join
':'
, @{
$foreign_attr_xname
->{
$attr_name
}->[1]};
$qname
=~ s/(^:)|(:$)//;
$attr
=
$self
->{document}->createAttributeNS(
$xmlnsuri
,
$qname
);
}
elsif
(
$nsuri
eq (MML_NS) &&
$attr_name
eq
'definitionurl'
)
{
$attr
=
$self
->{document}->createAttributeNS((MML_NS),
'math:definitionURL'
);
}
elsif
(
$nsuri
eq (MML_NS) )
{
$attr
=
$self
->{document}->createAttributeNS((MML_NS),
"math:$attr_name"
);
}
elsif
(
$nsuri
eq (SVG_NS) )
{
$attr
=
$self
->{document}->createAttributeNS(
(SVG_NS),
"svg:"
.(
$svg_attr_name
->{
$attr_name
} ||
$attr_name
));
}
unless
(
defined
$attr
)
{
$attr
=
$self
->{document}->createAttributeNS(
$nsuri
,
$attr_name
);
}
unless
(
defined
$attr
)
{
$attr
=
$self
->{document}->createAttribute(
$attr_name
);
}
if
(
$attr
)
{
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
, (
$el_category_f
->{
$nsuri
}->{
$tag_name
} || 0) | FOREIGN_EL | ((
$nsuri
) eq SVG_NS ? SVG_EL : (
$nsuri
) eq MML_NS ? MML_EL : 0)];
if
(
$token
->{attributes}->{xmlns} and
$token
->{attributes}->{xmlns}->{value} ne (
$nsuri
)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bad namespace'
,
token
=>
$token
);
}
if
(
$token
->{attributes}->{
'xmlns:xlink'
} and
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bad namespace'
,
token
=>
$token
);
}
}
if
(
$self
->{self_closing}) {
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
}
else
{
}
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'script'
and
$self
->{open_elements}->[-1]->[1] == SVG_SCRIPT_EL) {
pop
@{
$self
->{open_elements}};
$token
=
$self
->_get_next_token;
next
B;
}
else
{
my
$i
= -1;
my
$node
=
$self
->{open_elements}->[
$i
];
my
$tag_name
=
$node
->[0]->localname;
$tag_name
=~
tr
/A-Z/a-z/;
if
(
$tag_name
ne
$token
->{tag_name}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
level
=>
$self
->{level}->{must});
}
LOOP: {
my
$tag_name
=
$node
->[0]->localname;
$tag_name
=~
tr
/A-Z/a-z/;
if
(
$tag_name
eq
$token
->{tag_name}) {
splice
@{
$self
->{open_elements}},
$i
, -
$i
, ();
$token
=
$self
->_get_next_token;
next
B;
}
$i
--;
$node
=
$self
->{open_elements}->[
$i
];
if
(
$node
->[1] & FOREIGN_EL) {
redo
LOOP;
}
}
}
}
elsif
(
$token
->{type} == COMMENT_TOKEN) {
my
$comment
=
$self
->{document}->createComment(
$token
->{data});
$self
->_data(
$comment
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$comment
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$comment
);
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in html:#DOCTYPE'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
if
(
$self
->{insertion_mode} & TABLE_IMS and
not
$self
->{insertion_mode} & IN_CDATA_RCDATA_IM) {
C: {
my
$s
;
if
(
$token
->{type} == CHARACTER_TOKEN) {
$self
->{pending_chars} ||= [];
push
@{
$self
->{pending_chars}},
$token
;
$token
=
$self
->_get_next_token;
next
B;
}
else
{
if
(
$self
->{pending_chars}) {
$s
=
join
''
,
map
{
$_
->{data} } @{
$self
->{pending_chars}};
delete
$self
->{pending_chars};
while
(
$s
=~ s/\x00//) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
,
token
=>
$token
);
}
if
(
$s
eq
''
) {
last
C;
}
elsif
(
$s
=~ /[^\x09\x0A\x0C\x0D\x20]/) {
}
else
{
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
,
$s
,
$token
);
last
C;
}
}
else
{
last
C;
}
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in table:#text'
,
token
=>
$token
);
$reconstruct_active_formatting_elements
->(
$self
,
$insert_to_foster
,
$active_formatting_elements
,
$open_tables
);
if
(
$self
->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
my
$foster_parent_element
;
my
$next_sibling
;
OE:
for
(
reverse
0..$
if
(
$self
->{open_elements}->[
$_
]->[1] == TABLE_EL) {
$foster_parent_element
=
$self
->{open_elements}->[
$_
- 1]->[0];
$next_sibling
=
$self
->{open_elements}->[
$_
]->[0];
undef
$next_sibling
unless
$next_sibling
->parentNode eq
$foster_parent_element
;
last
OE;
}
}
$foster_parent_element
||=
$self
->{open_elements}->[0]->[0];
$foster_parent_element
->insertBefore
(
$self
->{document}->createTextNode(
$s
),
$next_sibling
);
$open_tables
->[-1]->[1] = 1;
$open_tables
->[-1]->[2] = 1;
}
else
{
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
,
$s
);
}
}
}
if
(
$token
->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in html:#DOCTYPE'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == START_TAG_TOKEN and
$token
->{tag_name} eq
'html'
) {
if
(
$self
->{insertion_mode} == AFTER_HTML_BODY_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after html'
,
text
=>
'html'
,
token
=>
$token
);
$self
->{insertion_mode} = AFTER_BODY_IM;
}
elsif
(
$self
->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after html'
,
text
=>
'html'
,
token
=>
$token
);
$self
->{insertion_mode} = AFTER_FRAMESET_IM;
}
else
{
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not first start tag'
,
token
=>
$token
);
my
$top_el
=
$self
->{open_elements}->[0]->[0];
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
unless
(
$top_el
->hasAttribute(
$attr_name
)) {
$top_el
->setAttribute
(
$attr_name
,
$token
->{attributes}->{
$attr_name
}->{value});
}
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == COMMENT_TOKEN) {
my
$comment
=
$self
->{document}->createComment (
$token
->{data});
$self
->_data(
$comment
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$comment
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
if
(
$self
->{insertion_mode} & AFTER_HTML_IMS) {
$self
->{document}->appendChild (
$comment
);
}
elsif
(
$self
->{insertion_mode} == AFTER_BODY_IM) {
$self
->{open_elements}->[0]->[0]->appendChild(
$comment
);
}
else
{
$self
->{open_elements}->[-1]->[0]->appendChild(
$comment
);
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$self
->{insertion_mode} & IN_CDATA_RCDATA_IM) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
$token
->{data} =~ s/^\x0A//
if
$self
->{ignore_newline};
delete
$self
->{ignore_newline};
if
(
length
$token
->{data}) {
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode
(
$self
,
$token
->{data},
$token
);
}
else
{
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
delete
$self
->{ignore_newline};
if
(
$token
->{tag_name} eq
'script'
) {
my
$script
=
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
$token
=
$self
->_get_next_token;
next
B;
}
else
{
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
delete
$self
->{ignore_newline};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->localname,
token
=>
$token
);
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
next
B;
}
else
{
die
"$0: $token->{type}: In CDATA/RCDATA: Unknown token type"
;
}
}
if
(
$self
->{insertion_mode} == IN_HEAD_IM and
(
$token
->{tag_name}||
''
) eq
'object'
and
$token
->{type} == END_TAG_TOKEN and
$self
->_data(
$self
->{
'document'
},
'isHTML4'
)) {
pop
@{
$self
->{open_elements}}
if
$self
->{open_elements}->[-1]->[0]->localname eq
'object'
;
}
if
(
$self
->{insertion_mode} & HEAD_IMS) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
if
(
$token
->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
unless
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
, $1,
$token
);
}
else
{
}
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
next
B;
}
}
if
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{head_element} =
$self
->{document}->createElementNS((HTML_NS),
'head'
);
$self
->_data(
$self
->{head_element},
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$self
->{head_element},
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$self
->{head_element},
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$self
->{head_element});
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
pop
@{
$self
->{open_elements}};
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript:#text'
,
token
=>
$token
);
pop
@{
$self
->{open_elements}};
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM) {
pop
@{
$self
->{open_elements}};
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'body'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'body'
} || 0];
}
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'head'
) {
if
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{head_element} =
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$self
->{head_element}->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$self
->{head_element},
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$self
->{head_element},
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild(
$self
->{head_element});
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
$self
->{insertion_mode} = IN_HEAD_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
'head'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in head:head'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{head_element} =
$self
->{document}->createElementNS((HTML_NS),
'head'
);
$self
->_data(
$self
->{head_element},
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$self
->{head_element},
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$self
->{head_element},
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$self
->{head_element});
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
$self
->{insertion_mode} = IN_HEAD_IM;
}
else
{
}
if
(
$token
->{tag_name} eq
'base'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript'
,
text
=>
'base'
,
token
=>
$token
);
$self
->{insertion_mode} = IN_HEAD_IM;
}
else
{
}
if
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
pop
@{
$self
->{open_elements}}
if
$self
->{insertion_mode} == AFTER_HEAD_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
link
=> 1,
basefont
=> 1,
bgsound
=> 1,
}->{
$token
->{tag_name}}) {
if
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
pop
@{
$self
->{open_elements}}
if
$self
->{insertion_mode} == AFTER_HEAD_IM;
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'command'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_IM) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
pop
@{
$self
->{open_elements}}
if
$self
->{insertion_mode} == AFTER_HEAD_IM;
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
(
$token
->{tag_name} eq
'meta'
) {
if
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
my
$meta_el
=
pop
@{
$self
->{open_elements}};
unless
(
$self
->{confident}) {
if
(
$token
->{attributes}->{charset}) {
$self
->{change_encoding}
->(
$self
,
$token
->{attributes}->{charset}->{value},
$token
);
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS (
undef
,
'charset'
),
manakai_has_reference
=>
$token
->{attributes}->{charset}->{has_reference});
}
elsif
(
$token
->{attributes}->{content} and
$token
->{attributes}->{
'http-equiv'
}) {
if
(
$token
->{attributes}->{
'http-equiv'
}->{value}
=~ /\A[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]\z/ and
$token
->{attributes}->{content}->{value}
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
[\x09\x0A\x0C\x0D\x20]*=
[\x09\x0A\x0C\x0D\x20]*(?>
"([^"
]*)"|
'([^'
]*)'|
([^"'\x09\x0A\x0C\x0D\x20]
[^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
$self
->{change_encoding}
->(
$self
,
defined
$1 ? $1 :
defined
$2 ? $2 : $3,
$token
);
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS (
undef
,
'content'
),
manakai_has_reference
=>
$token
->{attributes}->{content}->{has_reference});
}
else
{
}
}
}
else
{
if
(
$token
->{attributes}->{charset}) {
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS(
undef
,
'charset'
),
manakai_has_reference
=>
$token
->{attributes}->{charset}->{has_reference});
}
if
(
$token
->{attributes}->{content}) {
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS(
undef
,
'content'
),
manakai_has_reference
=>
$token
->{attributes}->{content}->{has_reference});
}
}
pop
@{
$self
->{open_elements}}
if
$self
->{insertion_mode} == AFTER_HEAD_IM;
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'title'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript'
,
text
=>
'title'
,
token
=>
$token
);
$self
->{insertion_mode} = IN_HEAD_IM;
}
elsif
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
}
else
{
}
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 1);
splice
@{
$self
->{open_elements}}, -2, 1, ()
if
(
$self
->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
next
B;
}
elsif
(
$token
->{tag_name} eq
'style'
or
$token
->{tag_name} eq
'noframes'
) {
if
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
}
else
{
}
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 0);
splice
@{
$self
->{open_elements}}, -2, 1, ()
if
(
$self
->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
next
B;
}
elsif
(
$token
->{tag_name} eq
'noscript'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_IM) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$self
->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript'
,
text
=>
'noscript'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
(
$token
->{tag_name} eq
'script'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript'
,
text
=>
'script'
,
token
=>
$token
);
$self
->{insertion_mode} = IN_HEAD_IM;
}
elsif
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after head'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
}
else
{
}
$script_start_tag
->(
$self
,
$insert
,
$open_tables
);
splice
@{
$self
->{open_elements}}, -2, 1
if
(
$self
->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
next
B;
}
elsif
(
$token
->{tag_name} eq
'body'
or
$token
->{tag_name} eq
'frameset'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
pop
@{
$self
->{open_elements}};
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM) {
pop
@{
$self
->{open_elements}};
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
if
(
$token
->{tag_name} eq
'body'
) {
delete
$self
->{frameset_ok};
$self
->{insertion_mode} = IN_BODY_IM;
}
elsif
(
$token
->{tag_name} eq
'frameset'
) {
$self
->{insertion_mode} = IN_FRAMESET_IM;
}
else
{
die
"$0: tag name: $self->{tag_name}"
;
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM and
$token
->{tag_name} =~ m
'^(object|param)$'
and
$self
->_data(
$self
->{
'document'
},
'isHTML4'
)) {
{
my
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
if
(
$token
->{tag_name} eq
'param'
)
{
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
}
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
if
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
pop
@{
$self
->{open_elements}};
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM) {
pop
@{
$self
->{open_elements}};
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'body'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'body'
} || 0];
}
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'head'
) {
if
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{head_element} =
$self
->{document}->createElementNS((HTML_NS),
'head'
);
$self
->_data(
$self
->{head_element},
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$self
->{head_element},
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild(
$self
->{head_element});
push
@{
$self
->{open_elements}},
[
$self
->{head_element},
$el_category
->{head}];
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = AFTER_HEAD_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM) {
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = AFTER_HEAD_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
}
else
{
die
"$0: $self->{insertion_mode}: Unknown insertion mode"
;
}
}
elsif
(
$token
->{tag_name} eq
'noscript'
) {
if
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_HEAD_IM;
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
({
body
=> (
$self
->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
html
=> (
$self
->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
br
=> 1,
}->{
$token
->{tag_name}}) {
if
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{head_element} =
$self
->{document}->createElementNS((HTML_NS),
'head'
);
$self
->_data(
$self
->{head_element},
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$self
->{head_element},
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$self
->{head_element},
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$self
->{head_element});
$self
->{insertion_mode} = AFTER_HEAD_IM;
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM) {
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = AFTER_HEAD_IM;
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_HEAD_IM;
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = AFTER_HEAD_IM;
}
elsif
(
$self
->{insertion_mode} == AFTER_HEAD_IM) {
}
else
{
die
"$0: $self->{insertion_mode}: Unknown insertion mode"
;
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'body'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'body'
} || 0];
}
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
if
(
$self
->{insertion_mode} == BEFORE_HEAD_IM) {
$self
->{head_element} =
$self
->{document}->createElementNS((HTML_NS),
'head'
);
$self
->_data(
$self
->{head_element},
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$self
->{head_element},
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$self
->{head_element},
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild(
$self
->{head_element});
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_IM) {
pop
@{
$self
->{open_elements}};
}
elsif
(
$self
->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in noscript:#eof'
,
token
=>
$token
);
pop
@{
$self
->{open_elements}};
pop
@{
$self
->{open_elements}};
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'body'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'body'
} || 0];
}
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
elsif
(
$self
->{insertion_mode} & BODY_IMS) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
while
(
$token
->{data} =~ s/\x00//g) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
,
token
=>
$token
);
}
if
(
$token
->{data} eq
''
) {
$token
=
$self
->_get_next_token;
next
B;
}
$reconstruct_active_formatting_elements
->(
$self
,
$insert_to_current
,
$active_formatting_elements
,
$open_tables
);
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
,
$token
->{data},
$token
);
if
(
$self
->{frameset_ok} and
$token
->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) {
delete
$self
->{frameset_ok};
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
({
caption
=> 1,
col
=> 1,
colgroup
=> 1,
tbody
=> 1,
td
=> 1,
tfoot
=> 1,
th
=> 1,
thead
=> 1,
tr
=> 1,
}->{
$token
->{tag_name}}) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_CELL_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
$node
->[0]->tagName,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
;
}
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'start tag not allowed'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
elsif
((
$self
->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
'caption'
,
token
=>
$token
);
my
$i
;
INSCOPE: {
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == CAPTION_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
;
}
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'start tag not allowed'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(
$self
->{open_elements}->[-1]->[1]
& END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
unless
(
$self
->{open_elements}->[-1]->[1] == CAPTION_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
$clear_up_to_marker
->(
$active_formatting_elements
);
$self
->{insertion_mode} = IN_TABLE_IM;
next
B;
}
else
{
}
}
else
{
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'td'
or
$token
->{tag_name} eq
'th'
) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0]->tagName eq
$token
->{tag_name}) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(
$self
->{open_elements}->[-1]->[1]
& END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
if
(
$self
->{open_elements}->[-1]->[0]->tagName
ne
$token
->{tag_name}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
$clear_up_to_marker
->(
$active_formatting_elements
);
$self
->{insertion_mode} = IN_ROW_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
((
$self
->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
(
$token
->{tag_name} eq
'caption'
) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
my
$i
;
INSCOPE: {
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == CAPTION_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
;
}
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(
$self
->{open_elements}->[-1]->[1]
& END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
unless
(
$self
->{open_elements}->[-1]->[1] == CAPTION_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
$clear_up_to_marker
->(
$active_formatting_elements
);
$self
->{insertion_mode} = IN_TABLE_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
((
$self
->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
({
table
=> 1,
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
tr
=> 1,
}->{
$token
->{tag_name}} and
(
$self
->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
my
$i
;
my
$tn
;
INSCOPE: {
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0]->localname eq
$token
->{tag_name}) {
$i
=
$_
;
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
$tn
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$node
->[1] == TABLE_CELL_EL) {
$tn
=
$node
->[0]->tagName;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
;
}
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{tag_name} eq
'table'
and
(
$self
->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
'caption'
,
token
=>
$token
);
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == CAPTION_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'caption'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(
$self
->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
unless
(
$self
->{open_elements}->[-1]->[1] == CAPTION_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
$clear_up_to_marker
->(
$active_formatting_elements
);
$self
->{insertion_mode} = IN_TABLE_IM;
next
B;
}
elsif
({
body
=> 1,
col
=> 1,
colgroup
=> 1,
html
=> 1,
}->{
$token
->{tag_name}}) {
if
(
$self
->{insertion_mode} & BODY_TABLE_IMS) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
({
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
tr
=> 1,
}->{
$token
->{tag_name}} and
(
$self
->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
for
my
$entry
(@{
$self
->{open_elements}}) {
unless
(
$entry
->[1] & ALL_END_TAG_OPTIONAL_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body:#eof'
,
token
=>
$token
);
last
;
}
}
last
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
$insert
=
$insert_to_current
;
}
elsif
(
$self
->{insertion_mode} & TABLE_IMS) {
if
(
$token
->{type} == START_TAG_TOKEN) {
if
({
tr
=> ((
$self
->{insertion_mode} & IM_MASK) != IN_ROW_IM),
th
=> 1,
td
=> 1,
}->{
$token
->{tag_name}}) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'tbody'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'tbody'
} || 0];
}
$self
->{insertion_mode} = IN_TABLE_BODY_IM;
}
if
((
$self
->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
unless
(
$token
->{tag_name} eq
'tr'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'missing start tag:tr'
,
token
=>
$token
);
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROWS_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
$self
->{insertion_mode} = IN_ROW_IM;
if
(
$token
->{tag_name} eq
'tr'
) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
$token
=
$self
->_get_next_token;
next
B;
}
else
{
{
my
$el
;
$el
=
$self
->{document}->createElementNS
((HTML_NS),
'tr'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'tr'
} || 0];
}
}
}
else
{
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROW_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
$self
->{insertion_mode} = IN_CELL_IM;
push
@$active_formatting_elements
, [
'#marker'
,
''
,
undef
];
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
caption
=> 1,
col
=> 1,
colgroup
=> 1,
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
tr
=> 1,
}->{
$token
->{tag_name}}) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_ROW_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmacthed end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROW_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_BODY_IM;
if
(
$token
->{tag_name} eq
'tr'
) {
next
B;
}
else
{
}
}
if
((
$self
->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_ROW_GROUP_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROWS_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_IM;
}
else
{
}
if
(
$token
->{tag_name} eq
'col'
) {
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'colgroup'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
'colgroup'
} || 0];
}
$self
->{insertion_mode} = IN_COLUMN_GROUP_IM;
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
next
B;
}
elsif
({
caption
=> 1,
colgroup
=> 1,
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
}->{
$token
->{tag_name}}) {
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
push
@$active_formatting_elements
, [
'#marker'
,
''
,
undef
]
if
$token
->{tag_name} eq
'caption'
;
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
$self
->{insertion_mode} = {
caption
=> IN_CAPTION_IM,
colgroup
=> IN_COLUMN_GROUP_IM,
tbody
=> IN_TABLE_BODY_IM,
tfoot
=> IN_TABLE_BODY_IM,
thead
=> IN_TABLE_BODY_IM,
}->{
$token
->{tag_name}};
$token
=
$self
->_get_next_token;
next
B;
}
else
{
die
"$0: in table: <>: $token->{tag_name}"
;
}
}
elsif
(
$token
->{tag_name} eq
'table'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'table'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(
$self
->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
unless
(
$self
->{open_elements}->[-1]->[1] == TABLE_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
pop
@{
$open_tables
};
$self
->_reset_insertion_mode;
next
B;
}
elsif
(
$token
->{tag_name} eq
'style'
) {
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 0);
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
next
B;
}
elsif
(
$token
->{tag_name} eq
'script'
) {
$script_start_tag
->(
$self
,
$insert
,
$open_tables
);
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
next
B;
}
elsif
(
$token
->{tag_name} eq
'input'
) {
if
(
$token
->{attributes}->{type}) {
my
$type
=
$token
->{attributes}->{type}->{value};
$type
=~
tr
/A-Z/a-z/;
if
(
$type
eq
'hidden'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in table'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$open_tables
->[-1]->[2] = 0
if
@$open_tables
;
pop
@{
$self
->{open_elements}};
$token
=
$self
->_get_next_token;
delete
$self
->{self_closing};
next
B;
}
else
{
}
}
else
{
}
}
elsif
(
$token
->{tag_name} eq
'form'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'form in table'
,
token
=>
$token
);
if
(
$self
->{form_element}) {
$token
=
$self
->_get_next_token;
next
B;
}
else
{
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$self
->{form_element} =
$self
->{open_elements}->[-1]->[0];
pop
@{
$self
->{open_elements}};
$token
=
$self
->_get_next_token;
next
B;
}
}
else
{
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in table'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$insert
=
$insert_to_foster
;
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'tr'
and
(
$self
->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_ROW_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROW_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_BODY_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'table'
) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_ROW_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{type},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROW_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_BODY_IM;
}
if
((
$self
->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_ROW_GROUP_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROWS_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_IM;
}
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
splice
@{
$self
->{open_elements}},
$i
;
pop
@{
$open_tables
};
$self
->_reset_insertion_mode;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
}->{
$token
->{tag_name}} and
$self
->{insertion_mode} & ROW_IMS) {
if
((
$self
->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0]->tagName eq
$token
->{tag_name}) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == TABLE_ROW_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'tr'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROW_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_BODY_IM;
}
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0]->tagName eq
$token
->{tag_name}) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
while
(not (
$self
->{open_elements}->[-1]->[1]
& TABLE_ROWS_SCOPING_EL)) {
pop
@{
$self
->{open_elements}};
}
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_IM;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
body
=> 1,
caption
=> 1,
col
=> 1,
colgroup
=> 1,
html
=> 1,
td
=> 1,
th
=> 1,
tr
=> 1,
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
}->{
$token
->{tag_name}}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in table:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$insert
=
$insert_to_foster
;
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
unless
(
$self
->{open_elements}->[-1]->[1] == HTML_EL and
@{
$self
->{open_elements}} == 1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body:#eof'
,
token
=>
$token
);
}
else
{
}
last
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
elsif
((
$self
->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
if
(
$token
->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
, $1,
$token
);
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
next
B;
}
}
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'col'
) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'colgroup'
) {
if
(
$self
->{open_elements}->[-1]->[1] == HTML_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'colgroup'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_IM;
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{tag_name} eq
'col'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'col'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
if
(
$self
->{open_elements}->[-1]->[1] == HTML_EL and
@{
$self
->{open_elements}} == 1) {
last
B;
}
else
{
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_IM;
next
B;
}
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
if
(
$self
->{open_elements}->[-1]->[1] == HTML_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'colgroup'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
pop
@{
$self
->{open_elements}};
$self
->{insertion_mode} = IN_TABLE_IM;
next
B;
}
}
elsif
(
$self
->{insertion_mode} & SELECT_IMS) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
my
$data
=
$token
->{data};
while
(
$data
=~ s/\x00//) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
,
token
=>
$token
);
}
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
,
$data
,
$token
)
if
$data
ne
''
;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'option'
) {
if
(
$self
->{open_elements}->[-1]->[1] == OPTION_EL) {
pop
@{
$self
->{open_elements}};
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'optgroup'
) {
if
(
$self
->{open_elements}->[-1]->[1] == OPTION_EL) {
pop
@{
$self
->{open_elements}};
}
else
{
}
if
(
$self
->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
pop
@{
$self
->{open_elements}};
}
else
{
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'select'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'select in select'
,
token
=>
$token
);
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'select'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
({
input
=> 1,
textarea
=> 1,
keygen
=> 1,
}->{
$token
->{tag_name}}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
'select'
,
token
=>
$token
);
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == SELECT_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] == OPTGROUP_EL or
$node
->[1] == OPTION_EL) {
}
else
{
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$token
=
$self
->_get_next_token;
next
B;
}
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'select'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
(
$self
->{insertion_mode} & IM_MASK) == IN_SELECT_IN_TABLE_IM and
{
caption
=> 1,
table
=> 1,
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
tr
=> 1,
td
=> 1,
th
=> 1,
}->{
$token
->{tag_name}}
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
'select'
,
token
=>
$token
);
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'select'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$token
->{tag_name} eq
'script'
) {
$script_start_tag
->(
$self
,
$insert
,
$open_tables
);
next
B;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in select'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'optgroup'
) {
if
(
$self
->{open_elements}->[-1]->[1] == OPTION_EL and
$self
->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
splice
@{
$self
->{open_elements}}, -2;
}
elsif
(
$self
->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
pop
@{
$self
->{open_elements}};
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'option'
) {
if
(
$self
->{open_elements}->[-1]->[1] == OPTION_EL) {
pop
@{
$self
->{open_elements}};
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'select'
) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == SELECT_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] == OPTION_EL or
$node
->[1] == OPTGROUP_EL) {
}
else
{
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
splice
@{
$self
->{open_elements}},
$i
;
$self
->_reset_insertion_mode;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
(
$self
->{insertion_mode} & IM_MASK) == IN_SELECT_IN_TABLE_IM and
{
caption
=> 1,
table
=> 1,
tbody
=> 1,
tfoot
=> 1,
thead
=> 1,
tr
=> 1,
td
=> 1,
th
=> 1,
}->{
$token
->{tag_name}}
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0]->tagName eq
$token
->{tag_name}) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & TABLE_SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$token
=
$self
->_get_next_token;
next
B;
}
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'select'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in select:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
unless
(
$self
->{open_elements}->[-1]->[1] == HTML_EL and
@{
$self
->{open_elements}} == 1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body:#eof'
,
token
=>
$token
);
}
else
{
}
last
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
elsif
(
$self
->{insertion_mode} & BODY_AFTER_IMS) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
if
(
$token
->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
my
$data
= $1;
$reconstruct_active_formatting_elements
->(
$self
,
$insert_to_current
,
$active_formatting_elements
,
$open_tables
);
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
, $1,
$token
);
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
next
B;
}
}
if
(
$self
->{insertion_mode} == AFTER_HTML_BODY_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after html:#text'
,
token
=>
$token
);
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after body:#text'
,
token
=>
$token
);
}
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$self
->{insertion_mode} == AFTER_HTML_BODY_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after html'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after body'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$self
->{insertion_mode} == AFTER_HTML_BODY_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after html:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
else
{
}
if
(
$token
->{tag_name} eq
'html'
) {
if
(
defined
$self
->{inner_html_node}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'html'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
$self
->{insertion_mode} = AFTER_HTML_BODY_IM;
$token
=
$self
->_get_next_token;
next
B;
}
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after body:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$self
->{insertion_mode} = IN_BODY_IM;
next
B;
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
last
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
elsif
(
$self
->{insertion_mode} & FRAME_IMS) {
if
(
$token
->{type} == CHARACTER_TOKEN) {
if
(
$token
->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
$self
->{open_elements}->[-1]->[0]->appendTextFromUnicode(
$self
, $1,
$token
);
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
next
B;
}
}
if
(
$token
->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
if
(
$self
->{insertion_mode} == IN_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in frameset:#text'
,
token
=>
$token
);
}
elsif
(
$self
->{insertion_mode} == AFTER_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after frameset:#text'
,
token
=>
$token
);
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after html:#text'
,
token
=>
$token
);
}
if
(
length
$token
->{data}) {
}
else
{
$token
=
$self
->_get_next_token;
}
next
B;
}
die
qq[$0: Character "$token->{data}"]
;
}
elsif
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'frameset'
and
$self
->{insertion_mode} == IN_FRAMESET_IM) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'frame'
and
$self
->{insertion_mode} == IN_FRAMESET_IM) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->{open_elements}->[-1]->[0]->appendChild (
$el
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'noframes'
) {
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 0);
next
B;
}
else
{
if
(
$self
->{insertion_mode} == IN_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in frameset'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
elsif
(
$self
->{insertion_mode} == AFTER_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after frameset'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after after frameset'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'frameset'
and
$self
->{insertion_mode} == IN_FRAMESET_IM) {
if
(
$self
->{open_elements}->[-1]->[1] == HTML_EL and
@{
$self
->{open_elements}} == 1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
}
else
{
pop
@{
$self
->{open_elements}};
$token
=
$self
->_get_next_token;
}
if
(not
defined
$self
->{inner_html_node} and
not (
$self
->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
$self
->{insertion_mode} = AFTER_FRAMESET_IM;
}
else
{
}
next
B;
}
elsif
(
$token
->{tag_name} eq
'html'
and
$self
->{insertion_mode} == AFTER_FRAMESET_IM) {
$self
->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
$token
=
$self
->_get_next_token;
next
B;
}
else
{
if
(
$self
->{insertion_mode} == IN_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in frameset:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
elsif
(
$self
->{insertion_mode} == AFTER_FRAMESET_IM) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after frameset:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after after frameset:/'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_OF_FILE_TOKEN) {
unless
(
$self
->{open_elements}->[-1]->[1] == HTML_EL and
@{
$self
->{open_elements}} == 1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body:#eof'
,
token
=>
$token
);
}
else
{
}
last
B;
}
else
{
die
"$0: $token->{type}: Unknown token type"
;
}
}
else
{
die
"$0: $self->{insertion_mode}: Unknown insertion mode"
;
}
if
(
$token
->{type} == START_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'script'
) {
$script_start_tag
->(
$self
,
$insert
,
$open_tables
);
next
B;
}
elsif
(
$token
->{tag_name} eq
'style'
) {
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 0);
next
B;
}
elsif
({
base
=> 1,
command
=> 1,
link
=> 1,
basefont
=> 1,
bgsound
=> 1,
}->{
$token
->{tag_name}}) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'meta'
) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
my
$meta_el
=
pop
@{
$self
->{open_elements}};
unless
(
$self
->{confident}) {
if
(
$token
->{attributes}->{charset}) {
$self
->{change_encoding}
->(
$self
,
$token
->{attributes}->{charset}->{value},
$token
);
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS(
undef
,
'charset'
),
manakai_has_reference
=>
$token
->{attributes}->{charset}->{has_reference});
}
elsif
(
$token
->{attributes}->{content} and
$token
->{attributes}->{
'http-equiv'
}) {
if
(
$token
->{attributes}->{
'http-equiv'
}->{value}
=~ /\A[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]\z/ and
$token
->{attributes}->{content}->{value}
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
[\x09\x0A\x0C\x0D\x20]*=
[\x09\x0A\x0C\x0D\x20]*(?>
"([^"
]*)"|
'([^'
]*)'|
([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
/x) {
$self
->{change_encoding}
->(
$self
,
defined
$1 ? $1 :
defined
$2 ? $2 : $3,
$token
);
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS(
undef
,
'content'
),
manakai_has_reference
=>
$token
->{attributes}->{content}->{has_reference});
}
}
}
else
{
if
(
$token
->{attributes}->{charset}) {
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS(
undef
,
'charset'
),
manakai_has_reference
=>
$token
->{attributes}->{charset}->{has_reference});
}
if
(
$token
->{attributes}->{content}) {
$self
->_data(
$meta_el
->[0]->getAttributeNodeNS (
undef
,
'content'
),
manakai_has_reference
=>
$token
->{attributes}->{content}->{has_reference});
}
}
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'title'
) {
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 1);
next
B;
}
elsif
(
$token
->{tag_name} eq
'body'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body'
,
text
=>
'body'
,
token
=>
$token
);
if
(@{
$self
->{open_elements}} == 1 or
not (
$self
->{open_elements}->[1]->[1] == BODY_EL)) {
}
else
{
delete
$self
->{frameset_ok};
my
$body_el
=
$self
->{open_elements}->[1]->[0];
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
unless
(
$body_el
->hasAttribute(
$attr_name
)) {
$body_el
->setAttribute(
$attr_name
,
$token
->{attributes}->{
$attr_name
}->{value});
}
}
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'frameset'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
if
(@{
$self
->{open_elements}} == 1 or
not (
$self
->{open_elements}->[1]->[1] == BODY_EL)) {
}
elsif
(not
$self
->{frameset_ok}) {
}
else
{
my
$body
=
$self
->{open_elements}->[1]->[0];
my
$body_parent
=
$body
->parentNode;
$body_parent
->removeChild (
$body
)
if
$body_parent
;
splice
@{
$self
->{open_elements}}, 1;
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$self
->{insertion_mode} = IN_FRAMESET_IM;
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
address
=> 1,
article
=> 1,
aside
=> 1,
blockquote
=> 1,
center
=> 1,
details
=> 1,
dir
=> 1,
div
=> 1,
dl
=> 1,
fieldset
=> 1,
figcaption
=> 1,
figure
=> 1,
footer
=> 1,
header
=> 1,
hgroup
=> 1,
menu
=> 1,
nav
=> 1,
ol
=> 1,
p
=> 1,
section
=> 1,
ul
=> 1,
summary
=> 1,
h1
=> 1,
h2
=> 1,
h3
=> 1,
h4
=> 1,
h5
=> 1,
h6
=> 1,
pre
=> 1,
listing
=> 1,
form
=> 1,
table
=> 1,
hr
=> 1,
}->{
$token
->{tag_name}}) {
if
(
$token
->{tag_name} eq
'form'
and
defined
$self
->{form_element}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in form:form'
,
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
if
(
$token
->{tag_name} ne
'table'
or
(
$self
->_data(
$self
->{document})->{
'manakai_compat_mode'
}||
''
) ne
'quirks'
) {
INSCOPE:
for
(
reverse
@{
$self
->{open_elements}}) {
if
(
$_
->[1] == P_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'p'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$_
->[1] & BUTTON_SCOPING_EL) {
last
INSCOPE;
}
}
}
if
({
h1
=> 1,
h2
=> 1,
h3
=> 1,
h4
=> 1,
h5
=> 1,
h6
=> 1}->{
$token
->{tag_name}}) {
if
(
$self
->{open_elements}->[-1]->[1] == HEADING_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]->tagName,
token
=>
$token
);
pop
@{
$self
->{open_elements}};
}
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
$attr_name
=~ s/[^A-Za-z0-9:_-]//g;
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
if
(
$token
->{tag_name} eq
'pre'
or
$token
->{tag_name} eq
'listing'
) {
$token
=
$self
->_get_next_token;
if
(
$token
->{type} == CHARACTER_TOKEN) {
$token
->{data} =~ s/^\x0A//;
unless
(
length
$token
->{data}) {
$token
=
$self
->_get_next_token;
}
else
{
}
}
else
{
}
delete
$self
->{frameset_ok};
}
elsif
(
$token
->{tag_name} eq
'form'
) {
$self
->{form_element} =
$self
->{open_elements}->[-1]->[0];
$token
=
$self
->_get_next_token;
}
elsif
(
$token
->{tag_name} eq
'table'
) {
push
@{
$open_tables
}, [
$self
->{open_elements}->[-1]->[0]];
delete
$self
->{frameset_ok};
$self
->{insertion_mode} = IN_TABLE_IM;
$token
=
$self
->_get_next_token;
}
elsif
(
$token
->{tag_name} eq
'hr'
) {
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
delete
$self
->{frameset_ok};
$token
=
$self
->_get_next_token;
}
else
{
$token
=
$self
->_get_next_token;
}
next
B;
}
elsif
(
$token
->{tag_name} eq
'li'
) {
delete
$self
->{frameset_ok};
my
$non_optional
;
my
$i
= -1;
for
my
$node
(
reverse
@{
$self
->{open_elements}}) {
if
(
$node
->[1] == LI_EL) {
{
if
(
$non_optional
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$non_optional
->[0]->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
}
last
;
}
elsif
(
(
$node
->[1] & SPECIAL_EL or
$node
->[1] & SCOPING_EL) and
(not
$node
->[1] & ADDRESS_DIV_P_EL)
) {
last
;
}
elsif
(
$node
->[1] & END_TAG_OPTIONAL_EL) {
}
else
{
$non_optional
||=
$node
;
}
$i
--;
}
INSCOPE:
for
(
reverse
@{
$self
->{open_elements}}) {
if
(
$_
->[1] == P_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'p'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$_
->[1] & BUTTON_SCOPING_EL) {
last
INSCOPE;
}
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'dt'
or
$token
->{tag_name} eq
'dd'
) {
delete
$self
->{frameset_ok};
my
$non_optional
;
my
$i
= -1;
for
my
$node
(
reverse
@{
$self
->{open_elements}}) {
if
(
$node
->[1] == DTDD_EL) {
{
if
(
$non_optional
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$non_optional
->[0]->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
}
last
;
}
elsif
(
(
$node
->[1] & SPECIAL_EL or
$node
->[1] & SCOPING_EL) and
(not
$node
->[1] & ADDRESS_DIV_P_EL)
) {
last
;
}
elsif
(
$node
->[1] & END_TAG_OPTIONAL_EL) {
}
else
{
$non_optional
||=
$node
;
}
$i
--;
}
INSCOPE:
for
(
reverse
@{
$self
->{open_elements}}) {
if
(
$_
->[1] == P_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'p'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$_
->[1] & BUTTON_SCOPING_EL) {
last
INSCOPE;
}
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'plaintext'
) {
INSCOPE:
for
(
reverse
@{
$self
->{open_elements}}) {
if
(
$_
->[1] == P_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'p'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$_
->[1] & BUTTON_SCOPING_EL) {
last
INSCOPE;
}
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$self
->{state} = PLAINTEXT_STATE;
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'a'
) {
AFE:
for
my
$i
(
reverse
0..
$#$active_formatting_elements
) {
my
$node
=
$active_formatting_elements
->[
$i
];
no
warnings;
if
(
$node
->[1] == A_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in a:a'
,
token
=>
$token
);
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'a'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
$formatting_end_tag
->(
$self
,
$active_formatting_elements
,
$open_tables
,
$token
);
AFE2:
for
(
reverse
0..
$#$active_formatting_elements
) {
if
(
$active_formatting_elements
->[
$_
]->[0] eq
$node
->[0]) {
splice
@$active_formatting_elements
,
$_
, 1;
last
AFE2;
}
}
OE:
for
(
reverse
0..$
if
(
$self
->{open_elements}->[
$_
]->[0] eq
$node
->[0]) {
splice
@{
$self
->{open_elements}},
$_
, 1;
last
OE;
}
}
last
AFE;
}
elsif
(
$node
->[0] eq
'#marker'
) {
last
AFE;
}
}
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
if
(
$attr
)
{
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS(
$attr
);
}
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
push
@$active_formatting_elements
,
[
$self
->{open_elements}->[-1]->[0],
$self
->{open_elements}->[-1]->[1],
$token
];
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'nobr'
) {
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == NOBR_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in nobr:nobr'
,
token
=>
$token
);
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'nobr'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$node
->[1] & SCOPING_EL) {
last
INSCOPE;
}
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
push
@$active_formatting_elements
,
[
$self
->{open_elements}->[-1]->[0],
$self
->{open_elements}->[-1]->[1],
$token
];
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'button'
) {
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == BUTTON_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in button:button'
,
token
=>
$token
);
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'button'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$node
->[1] & SCOPING_EL) {
last
INSCOPE;
}
}
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
delete
$self
->{frameset_ok};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
xmp
=> 1,
iframe
=> 1,
noembed
=> 1,
noframes
=> 1,
noscript
=> 0,
}->{
$token
->{tag_name}}) {
if
(
$token
->{tag_name} eq
'xmp'
) {
INSCOPE:
for
(
reverse
@{
$self
->{open_elements}}) {
if
(
$_
->[1] == P_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'p'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
elsif
(
$_
->[1] & BUTTON_SCOPING_EL) {
last
INSCOPE;
}
}
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
delete
$self
->{frameset_ok};
}
elsif
(
$token
->{tag_name} eq
'iframe'
) {
delete
$self
->{frameset_ok};
}
else
{
}
$parse_rcdata
->(
$self
,
$insert
,
$open_tables
, 0);
next
B;
}
elsif
(
$token
->{tag_name} eq
'isindex'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'isindex'
,
token
=>
$token
);
if
(
defined
$self
->{form_element}) {
$token
=
$self
->_get_next_token;
next
B;
}
else
{
delete
$self
->{self_closing};
my
$at
=
$token
->{attributes};
my
$form_attrs
;
$form_attrs
->{action} =
$at
->{action}
if
$at
->{action};
my
$prompt_attr
=
$at
->{prompt};
$at
->{name} = {
name
=>
'name'
,
value
=>
'isindex'
};
delete
$at
->{action};
delete
$at
->{prompt};
my
@tokens
= (
{
type
=> START_TAG_TOKEN,
tag_name
=>
'form'
,
attributes
=>
$form_attrs
,
line
=>
$token
->{line},
column
=>
$token
->{column}},
{
type
=> START_TAG_TOKEN,
tag_name
=>
'hr'
,
line
=>
$token
->{line},
column
=>
$token
->{column}},
{
type
=> START_TAG_TOKEN,
tag_name
=>
'label'
,
line
=>
$token
->{line},
column
=>
$token
->{column}},
);
if
(
$prompt_attr
) {
push
@tokens
, {
type
=> CHARACTER_TOKEN,
data
=>
$prompt_attr
->{value},
};
}
else
{
push
@tokens
, {
type
=> CHARACTER_TOKEN,
data
=>
'This is a searchable index. Enter search keywords: '
,
};
}
push
@tokens
,
{
type
=> START_TAG_TOKEN,
tag_name
=>
'input'
,
attributes
=>
$at
,
line
=>
$token
->{line},
column
=>
$token
->{column}},
{
type
=> END_TAG_TOKEN,
tag_name
=>
'label'
,
line
=>
$token
->{line},
column
=>
$token
->{column}},
{
type
=> START_TAG_TOKEN,
tag_name
=>
'hr'
,
line
=>
$token
->{line},
column
=>
$token
->{column}},
{
type
=> END_TAG_TOKEN,
tag_name
=>
'form'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
unshift
@{
$self
->{token}}, (
@tokens
);
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{tag_name} eq
'textarea'
) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$self
->{ignore_newline} = 1;
$self
->{state} = RCDATA_STATE;
delete
$self
->{escape};
$self
->{insertion_mode} |= IN_CDATA_RCDATA_IM;
delete
$self
->{frameset_ok};
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'optgroup'
or
$token
->{tag_name} eq
'option'
) {
if
(
$self
->{open_elements}->[-1]->[1] == OPTION_EL) {
$token
->{self_closing} =
$self
->{self_closing};
unshift
@{
$self
->{token}},
$token
;
delete
$self
->{self_closing};
$token
= {
type
=> END_TAG_TOKEN,
tag_name
=>
'option'
,
line
=>
$token
->{line},
column
=>
$token
->{column}};
next
B;
}
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
redo
B;
}
elsif
(
$token
->{tag_name} eq
'rt'
or
$token
->{tag_name} eq
'rp'
) {
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == RUBY_EL) {
while
(
$self
->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
unless
(
$self
->{open_elements}->[-1]->[1] == RUBY_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
}
last
INSCOPE;
}
elsif
(
$node
->[1] & SCOPING_EL) {
last
INSCOPE;
}
}
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
$token
=
$self
->_get_next_token;
redo
B;
}
elsif
(
$token
->{tag_name} eq
'math'
or
$token
->{tag_name} eq
'svg'
) {
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
{
my
$el
;
$el
=
$self
->{document}->createElementNS
(
$token
->{tag_name} eq
'math'
? (MML_NS) : (SVG_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
;
if
(
defined
$foreign_attr_xname
->{
$attr_name
})
{
my
$xmlnsuri
=
$foreign_attr_xname
->{
$attr_name
}->[0];
my
$qname
=
join
':'
, @{
$foreign_attr_xname
->{
$attr_name
}->[1]};
$qname
=~ s/(^:)|(:$)//;
$attr
=
$self
->{document}->createAttributeNS(
$xmlnsuri
,
$qname
);
}
elsif
(
$token
->{tag_name} eq
'math'
&&
$attr_name
eq
'definitionurl'
)
{
$attr
=
$self
->{document}->createAttributeNS((MML_NS),
'definitionURL'
);
}
elsif
(
$token
->{tag_name} eq
'math'
)
{
$attr
=
$self
->{document}->createAttributeNS((MML_NS),
$attr_name
);
}
elsif
(
$token
->{tag_name} eq
'svg'
)
{
$attr
=
$self
->{document}->createAttributeNS(
(SVG_NS), (
$svg_attr_name
->{
$attr_name
} ||
$attr_name
));
}
unless
(
$attr
)
{
$attr
=
$self
->{document}->createAttribute(
$attr_name
);
}
if
(
$attr
)
{
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
, (
$el_category_f
->{
$token
->{tag_name} eq
'math'
? MML_NS : SVG_NS}->{
$token
->{tag_name}} || 0) | FOREIGN_EL | ((
$token
->{tag_name} eq
'math'
? MML_NS : SVG_NS) eq SVG_NS ? SVG_EL : (
$token
->{tag_name} eq
'math'
? MML_NS : SVG_NS) eq MML_NS ? MML_EL : 0)];
if
(
$token
->{attributes}->{xmlns} and
$token
->{attributes}->{xmlns}->{value} ne (
$token
->{tag_name} eq
'math'
? (MML_NS) : (SVG_NS))) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bad namespace'
,
token
=>
$token
);
}
if
(
$token
->{attributes}->{
'xmlns:xlink'
} and
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bad namespace'
,
token
=>
$token
);
}
}
if
(
$self
->{self_closing}) {
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
}
else
{
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
caption
=> 1,
col
=> 1,
colgroup
=> 1,
frame
=> 1,
head
=> 1,
tbody
=> 1,
td
=> 1,
tfoot
=> 1,
th
=> 1,
thead
=> 1,
tr
=> 1,
}->{
$token
->{tag_name}}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'in body'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'param'
or
$token
->{tag_name} eq
'source'
or
$token
->{tag_name} eq
'track'
) {
{
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS(
undef
,
$attr_name
);
$attr
->setValue(
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
pop
@{
$self
->{open_elements}};
delete
$self
->{self_closing};
$token
=
$self
->_get_next_token;
redo
B;
}
else
{
if
(
$token
->{tag_name} eq
'image'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'image'
,
token
=>
$token
);
$token
->{tag_name} =
'img'
;
}
else
{
}
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
{
my
$el
;
$token
->{tag_name} =~ s/[^A-Za-z0-9:_-]//g;
$el
=
$self
->{document}->createElementNS((HTML_NS),
$token
->{tag_name});
ATR:
for
my
$attr_name
(
keys
%{
$token
->{attributes}}) {
my
$attr_t
=
$token
->{attributes}->{
$attr_name
};
my
$attr
=
$self
->{document}->createAttributeNS (
undef
,
$attr_name
);
next
ATR
unless
ref
(
$attr
);
$attr
->setValue (
$attr_t
->{value});
$self
->_data(
$attr
,
manakai_source_line
=>
$attr_t
->{line});
$self
->_data(
$attr
,
manakai_source_column
=>
$attr_t
->{column});
$el
->setAttributeNodeNS (
$attr
);
}
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
push
@{
$self
->{open_elements}}, [
$el
,
$el_category
->{
$token
->{tag_name}} || 0];
}
if
({
applet
=> 1,
marquee
=> 1,
object
=> 1,
}->{
$token
->{tag_name}}) {
push
@$active_formatting_elements
, [
'#marker'
,
''
,
undef
];
delete
$self
->{frameset_ok};
}
elsif
({
b
=> 1,
big
=> 1,
code
=>1,
em
=> 1,
font
=> 1,
i
=> 1,
s
=> 1,
small
=> 1,
strike
=> 1,
strong
=> 1,
tt
=> 1,
u
=> 1,
}->{
$token
->{tag_name}}) {
push
@$active_formatting_elements
,
[
$self
->{open_elements}->[-1]->[0],
$self
->{open_elements}->[-1]->[1],
$token
];
}
elsif
(
$token
->{tag_name} eq
'input'
) {
pop
@{
$self
->{open_elements}};
if
(
$token
->{attributes}->{type}) {
my
$type
=
$token
->{attributes}->{type}->{value};
$type
=~
tr
/A-Z/a-z/;
if
(
$type
eq
'hidden'
) {
}
else
{
delete
$self
->{frameset_ok};
}
}
else
{
delete
$self
->{frameset_ok};
}
delete
$self
->{self_closing};
}
elsif
({
area
=> 1,
br
=> 1,
embed
=> 1,
img
=> 1,
wbr
=> 1,
keygen
=> 1,
}->{
$token
->{tag_name}}) {
pop
@{
$self
->{open_elements}};
delete
$self
->{frameset_ok};
delete
$self
->{self_closing};
}
elsif
(
$token
->{tag_name} eq
'select'
) {
delete
$self
->{frameset_ok};
if
(
$self
->{insertion_mode} & TABLE_IMS or
$self
->{insertion_mode} & BODY_TABLE_IMS) {
$self
->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
}
else
{
$self
->{insertion_mode} = IN_SELECT_IM;
}
}
else
{
}
$token
=
$self
->_get_next_token;
next
B;
}
}
elsif
(
$token
->{type} == END_TAG_TOKEN) {
if
(
$token
->{tag_name} eq
'body'
or
$token
->{tag_name} eq
'html'
) {
my
$i
;
INSCOPE: {
for
(
reverse
@{
$self
->{open_elements}}) {
if
(
$_
->[1] == BODY_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$_
->[1] & SCOPING_EL) {
last
;
}
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
next
B;
}
for
(@{
$self
->{open_elements}}) {
unless
(
$_
->[1] & ALL_END_TAG_OPTIONAL_EL ||
$_
->[1] == OPTGROUP_EL ||
$_
->[1] == OPTION_EL ||
$_
->[1] == RUBY_COMPONENT_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$_
->[0]->tagName,
token
=>
$token
);
last
;
}
else
{
}
}
$self
->{insertion_mode} = AFTER_BODY_IM;
if
(
$token
->{tag_name} eq
'body'
) {
$token
=
$self
->_get_next_token;
}
else
{
}
next
B;
}
elsif
({
address
=> 1,
article
=> 1,
aside
=> 1,
blockquote
=> 1,
center
=> 1,
details
=> 1,
dir
=> 1,
div
=> 1,
dl
=> 1,
fieldset
=> 1,
figure
=> 1,
footer
=> 1,
header
=> 1,
hgroup
=> 1,
listing
=> 1,
menu
=> 1,
nav
=> 1,
ol
=> 1,
pre
=> 1,
section
=> 1,
ul
=> 1,
figcaption
=> 1,
summary
=> 1,
dd
=> 1,
dt
=> 1,
li
=> 1,
applet
=> 1,
button
=> 1,
marquee
=> 1,
object
=> 1,
}->{
$token
->{tag_name}}) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[0]->tagName eq
$token
->{tag_name}) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & SCOPING_EL) {
last
INSCOPE;
}
elsif
(
$token
->{tag_name} eq
'li'
and
{
ul
=> 1,
ol
=> 1}->{
$node
->[0]->localname}) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
while
({
dd
=> (
$token
->{tag_name} ne
'dd'
),
dt
=> (
$token
->{tag_name} ne
'dt'
),
li
=> (
$token
->{tag_name} ne
'li'
),
option
=> 1,
optgroup
=> 1,
p
=> 1,
rt
=> 1,
rp
=> 1,
}->{
$self
->{open_elements}->[-1]->[0]->tagName}) {
pop
@{
$self
->{open_elements}};
}
if
(
$self
->{open_elements}->[-1]->[0]->tagName
ne
$token
->{tag_name}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
$clear_up_to_marker
->(
$active_formatting_elements
)
if
{
applet
=> 1,
marquee
=> 1,
object
=> 1,
}->{
$token
->{tag_name}};
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'form'
) {
undef
$self
->{form_element};
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == FORM_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
while
(
$self
->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
if
(
$self
->{open_elements}->[-1]->[0]->tagName
ne
$token
->{tag_name}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
h1
=> 1,
h2
=> 1,
h3
=> 1,
h4
=> 1,
h5
=> 1,
h6
=> 1,
}->{
$token
->{tag_name}}) {
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == HEADING_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & SCOPING_EL) {
last
INSCOPE;
}
}
unless
(
defined
$i
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
while
(
$self
->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
pop
@{
$self
->{open_elements}};
}
if
(
$self
->{open_elements}->[-1]->[0]->tagName
ne
$token
->{tag_name}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
(
$token
->{tag_name} eq
'p'
) {
my
$non_optional
;
my
$i
;
INSCOPE:
for
(
reverse
0..$
my
$node
=
$self
->{open_elements}->[
$_
];
if
(
$node
->[1] == P_EL) {
$i
=
$_
;
last
INSCOPE;
}
elsif
(
$node
->[1] & BUTTON_SCOPING_EL) {
last
INSCOPE;
}
elsif
(
$node
->[1] & END_TAG_OPTIONAL_EL) {
}
else
{
$non_optional
||=
$node
;
}
}
if
(
defined
$i
) {
if
(
$non_optional
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$non_optional
->[0]->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$i
;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'p'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$self
->_data(
$el
,
implied
=> __LINE__);
$insert
->(
$self
,
$el
,
$open_tables
);
}
$token
=
$self
->_get_next_token;
next
B;
}
elsif
({
a
=> 1,
b
=> 1,
big
=> 1,
code
=>1,
em
=> 1,
font
=> 1,
i
=> 1,
nobr
=> 1,
s
=> 1,
small
=> 1,
strike
=> 1,
strong
=> 1,
tt
=> 1,
u
=> 1,
}->{
$token
->{tag_name}}) {
$formatting_end_tag
->(
$self
,
$active_formatting_elements
,
$open_tables
,
$token
);
next
B;
}
elsif
(
$token
->{tag_name} eq
'br'
) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
'br'
,
token
=>
$token
);
my
$insert
=
$self
->{insertion_mode} & TABLE_IMS
?
$insert_to_foster
:
$insert_to_current
;
$reconstruct_active_formatting_elements
->(
$self
,
$insert
,
$active_formatting_elements
,
$open_tables
);
my
$el
;
$el
=
$self
->{document}->createElementNS((HTML_NS),
'br'
);
$self
->_data(
$el
,
manakai_source_line
=>
$token
->{line})
if
defined
$token
->{line};
$self
->_data(
$el
,
manakai_source_column
=>
$token
->{column})
if
defined
$token
->{column};
$insert
->(
$self
,
$el
,
$open_tables
);
$token
=
$self
->_get_next_token;
next
B;
}
else
{
if
(
$token
->{tag_name} eq
'sarcasm'
) {
sleep
0.001;
}
my
$node_i
= -1;
my
$node
=
$self
->{open_elements}->[
$node_i
];
LOOP: {
my
$node_tag_name
=
$node
->[0]->tagName;
$node_tag_name
=~
tr
/A-Z/a-z/;
if
(
$node_tag_name
eq
$token
->{tag_name}) {
while
(
$self
->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL and
$self
->{open_elements}->[-1]->[0]->localname
ne
$token
->{tag_name}) {
pop
@{
$self
->{open_elements}};
$node_i
++;
}
my
$current_tag_name
=
$self
->{open_elements}->[-1]->[0]->tagName;
$current_tag_name
=~
tr
/A-Z/a-z/;
if
(
$current_tag_name
ne
$token
->{tag_name}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'not closed'
,
text
=>
$self
->{open_elements}->[-1]->[0]
->tagName,
token
=>
$token
);
}
else
{
}
splice
@{
$self
->{open_elements}},
$node_i
if
$node_i
< 0;
$token
=
$self
->_get_next_token;
last
LOOP;
}
else
{
if
(
$node
->[1] & SPECIAL_EL or
$node
->[1] & SCOPING_EL) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unmatched end tag'
,
text
=>
$token
->{tag_name},
token
=>
$token
);
$token
=
$self
->_get_next_token;
last
LOOP;
}
}
$node_i
--;
$node
=
$self
->{open_elements}->[
$node_i
];
redo
LOOP;
}
next
B;
}
}
next
B;
}
}
sub
set_inner_html ($$$$;$) {
my
(
$class
,
$self
);
if
(
ref
$_
[0]) {
$self
=
shift
;
$class
=
ref
$self
;
}
else
{
$class
=
shift
;
$self
=
$class
->new;
}
my
$node
=
shift
;
my
$onerror
=
$_
[1];
my
$get_wrapper
=
$_
[2] ||
sub
($) {
return
$_
[0] };
my
$nt
=
$node
->node_type;
if
(
$nt
== 9) {
my
@cn
=
$node
->childNodes;
for
(
@cn
) {
$node
->removeChild (
$_
);
}
$self
->parse_char_string (
$_
[0] =>
$node
,
$onerror
,
$get_wrapper
);
}
elsif
(
$nt
== 1) {
my
$this_doc
=
$node
->ownerDocument;
my
$implementation
=
ref
(
$this_doc
);
my
$doc
=
$implementation
->createDocument;
$self
->_data(
$doc
,
manakai_is_html
=> 1);
my
$node_doc
=
$node
->ownerDocument;
$self
->_data(
$doc
)->{
'manakai_compat_mode'
} =
$self
->_data(
$node_doc
,
'manakai_compat_mode'
);
my
$p
=
$self
;
$p
->{document} =
$doc
;
my
$i
= 0;
$p
->{line_prev} =
$p
->{line} = 1;
$p
->{column_prev} =
$p
->{column} = 0;
my
$input
= HTML::HTML5::Parser::Charset::DecodeHandle::CharString->new (\(
$_
[0]));
$input
=
$get_wrapper
->(
$input
);
$p
->{set_nc} =
sub
{
my
$self
=
shift
;
my
$char
=
''
;
if
(
defined
$self
->{next_nc}) {
$char
=
$self
->{next_nc};
delete
$self
->{next_nc};
$self
->{nc} =
ord
$char
;
}
else
{
$self
->{char_buffer} =
''
;
$self
->{char_buffer_pos} = 0;
my
$count
=
$input
->manakai_read_until
(
$self
->{char_buffer},
qr/[^\x00\x0A\x0D\x{D800}-\x{DFFF}]/
,
$self
->{char_buffer_pos});
if
(
$count
) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
return
;
}
if
(
$input
->
read
(
$char
, 1)) {
$self
->{nc} =
ord
$char
;
}
else
{
$self
->{nc} = -1;
return
;
}
}
(
$p
->{line_prev},
$p
->{column_prev}) = (
$p
->{line},
$p
->{column});
$p
->{column}++;
if
(
$self
->{nc} == 0x000A) {
$p
->{line}++;
$p
->{column} = 0;
}
elsif
(
$self
->{nc} == 0x000D) {
my
$next
=
''
;
if
(
$input
->
read
(
$next
, 1) and
$next
ne
"\x0A"
) {
$self
->{next_nc} =
$next
;
}
$self
->{nc} = 0x000A;
$p
->{line}++;
$p
->{column} = 0;
}
elsif
(0xD800 <=
$self
->{nc} and
$self
->{nc} <= 0xDFFF) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'surrogate'
);
$self
->{nc} = 0xFFFD;
}
};
$p
->{read_until} =
sub
{
return
0
if
defined
$p
->{next_nc};
my
$pattern
=
qr/[^$_[1]\x00\x0A\x0D\x{D800}-\x{DFFF}]/
;
my
$offset
=
$_
[2] || 0;
if
(
$p
->{char_buffer_pos} <
length
$p
->{char_buffer}) {
pos
(
$p
->{char_buffer}) =
$p
->{char_buffer_pos};
if
(
$p
->{char_buffer} =~ /\G(?>
$pattern
)+/) {
substr
(
$_
[0],
$offset
)
=
substr
(
$p
->{char_buffer}, $-[0], $+[0] - $-[0]);
my
$count
= $+[0] - $-[0];
if
(
$count
) {
$p
->{column} +=
$count
;
$p
->{char_buffer_pos} +=
$count
;
$p
->{line_prev} =
$p
->{line};
$p
->{column_prev} =
$p
->{column} - 1;
$p
->{nc} = -1;
}
return
$count
;
}
else
{
return
0;
}
}
else
{
my
$count
=
$input
->manakai_read_until (
$_
[0],
$pattern
,
$_
[2]);
if
(
$count
) {
$p
->{column} +=
$count
;
$p
->{column_prev} +=
$count
;
$p
->{nc} = -1;
}
return
$count
;
}
};
my
$ponerror
=
$onerror
||
sub
{
my
(
%opt
) =
@_
;
my
$line
=
$opt
{line};
my
$column
=
$opt
{column};
if
(
defined
$opt
{token} and
defined
$opt
{token}->{line}) {
$line
=
$opt
{token}->{line};
$column
=
$opt
{token}->{column};
}
warn
"Parse error ($opt{type}) at line $line column $column\n"
;
};
$p
->{parse_error} =
sub
{
$ponerror
->(
line
=>
$p
->{line},
column
=>
$p
->{column},
@_
);
};
my
$char_onerror
=
sub
{
my
(
undef
,
$type
,
%opt
) =
@_
;
$ponerror
->(
layer
=>
'encode'
,
line
=>
$p
->{line},
column
=>
$p
->{column} + 1,
%opt
,
type
=>
$type
);
};
$input
->onerror (
$char_onerror
);
$p
->_initialize_tokenizer;
$p
->_initialize_tree_constructor;
my
$node_ns
=
$node
->namespaceURI ||
''
;
my
$node_ln
=
$node
->localname;
if
(
$node_ns
eq HTML_NS) {
if
(
$node_ln
eq
'title'
or
$node_ln
eq
'textarea'
) {
$p
->{state} = RCDATA_STATE;
}
elsif
(
$node_ln
eq
'script'
) {
$p
->{state} = SCRIPT_DATA_STATE;
}
elsif
({
style
=> 1,
script
=> 1,
xmp
=> 1,
iframe
=> 1,
noembed
=> 1,
noframes
=> 1,
noscript
=> 1,
}->{
$node_ln
}) {
$p
->{state} = RAWTEXT_STATE;
}
elsif
(
$node_ln
eq
'plaintext'
) {
$p
->{state} = PLAINTEXT_STATE;
}
$p
->{inner_html_node} = [
$node
,
$el_category
->{
$node_ln
}];
}
elsif
(
$node_ns
eq SVG_NS) {
$p
->{inner_html_node} = [
$node
,
$el_category_f
->{
$node_ns
}->{
$node_ln
}
|| FOREIGN_EL | SVG_EL];
}
elsif
(
$node_ns
eq MML_NS) {
$p
->{inner_html_node} = [
$node
,
$el_category_f
->{
$node_ns
}->{
$node_ln
}
|| FOREIGN_EL | MML_EL];
}
else
{
$p
->{inner_html_node} = [
$node
, FOREIGN_EL];
}
$doc
->appendChild (
$root
);
push
@{
$p
->{open_elements}}, [
$root
,
$el_category
->{html}];
undef
$p
->{head_element};
$p
->_reset_insertion_mode;
my
$anode
=
$node
;
AN:
while
(
defined
$anode
) {
if
(
$anode
->node_type == 1) {
my
$nsuri
=
$anode
->namespaceURI;
if
(
$anode
->tagName eq
'form'
) {
$p
->{form_element} =
$anode
;
last
AN;
}
}
}
$anode
=
$anode
->parentNode;
}
$p
->{confident} = 1;
{
my
$self
=
$p
;
$token
=
$self
->_get_next_token;
}
$p
->_tree_construction_main;
my
@cn
=
$node
->childNodes;
for
(
@cn
) {
$node
->removeChild (
$_
);
}
@cn
=
$root
->childNodes;
for
(
@cn
) {
$this_doc
->adoptNode (
$_
);
$node
->appendChild (
$_
);
}
$p
->_terminate_tree_constructor;
delete
$p
->{set_nc};
delete
$p
->{read_until};
delete
$p
->{parse_error};
}
else
{
die
"$0: |set_inner_html| is not defined for node of type $nt"
;
}
}
}
sub
new
{
my
(
$class
,
%opts
) =
@_
;
bless
\
%opts
=>
$class
;
}
sub
throw
{
my
(
$class
,
%opts
) =
@_
;
die
$class
->new(
%opts
);
}
1;