our
$VERSION
=
'0.992'
;
no
warnings
'utf8'
;
BEGIN {
push
our
@ISA
,
'Exporter'
;
our
@EXPORT_OK
=
qw(
DOCTYPE_TOKEN
COMMENT_TOKEN
START_TAG_TOKEN
END_TAG_TOKEN
END_OF_FILE_TOKEN
CHARACTER_TOKEN
PI_TOKEN
ABORT_TOKEN
END_OF_DOCTYPE_TOKEN
ATTLIST_TOKEN
ELEMENT_TOKEN
GENERAL_ENTITY_TOKEN
PARAMETER_ENTITY_TOKEN
NOTATION_TOKEN
)
;
our
%EXPORT_TAGS
= (
token
=> [
qw(
DOCTYPE_TOKEN
COMMENT_TOKEN
START_TAG_TOKEN
END_TAG_TOKEN
END_OF_FILE_TOKEN
CHARACTER_TOKEN
PI_TOKEN
ABORT_TOKEN
END_OF_DOCTYPE_TOKEN
ATTLIST_TOKEN
ELEMENT_TOKEN
GENERAL_ENTITY_TOKEN
PARAMETER_ENTITY_TOKEN
NOTATION_TOKEN
)
],
);
}
sub
DOCTYPE_TOKEN () { 1 }
sub
COMMENT_TOKEN () { 2 }
sub
START_TAG_TOKEN () { 3 }
sub
END_TAG_TOKEN () { 4 }
sub
END_OF_FILE_TOKEN () { 5 }
sub
CHARACTER_TOKEN () { 6 }
sub
PI_TOKEN () { 7 }
sub
ABORT_TOKEN () { 8 }
sub
END_OF_DOCTYPE_TOKEN () { 9 }
sub
ATTLIST_TOKEN () { 10 }
sub
ELEMENT_TOKEN () { 11 }
sub
GENERAL_ENTITY_TOKEN () { 12 }
sub
PARAMETER_ENTITY_TOKEN () { 13 }
sub
NOTATION_TOKEN () { 14 }
BEGIN { HTML::HTML5::Parser::Tokenizer->
import
(
':token'
) }
sub
DATA_STATE () { 0 }
sub
RCDATA_STATE () { 107 }
sub
RAWTEXT_STATE () { 108 }
sub
SCRIPT_DATA_STATE () { 109 }
sub
PLAINTEXT_STATE () { 110 }
sub
TAG_OPEN_STATE () { 2 }
sub
RCDATA_LT_STATE () { 111 }
sub
RAWTEXT_LT_STATE () { 112 }
sub
SCRIPT_DATA_LT_STATE () { 113 }
sub
CLOSE_TAG_OPEN_STATE () { 3 }
sub
RCDATA_END_TAG_OPEN_STATE () { 114 }
sub
RAWTEXT_END_TAG_OPEN_STATE () { 115 }
sub
SCRIPT_DATA_END_TAG_OPEN_STATE () { 116 }
sub
SCRIPT_DATA_ESCAPE_START_STATE () { 1 }
sub
SCRIPT_DATA_ESCAPE_START_DASH_STATE () { 12 }
sub
SCRIPT_DATA_ESCAPED_STATE () { 117 }
sub
SCRIPT_DATA_ESCAPED_DASH_STATE () { 118 }
sub
SCRIPT_DATA_ESCAPED_DASH_DASH_STATE () { 119 }
sub
SCRIPT_DATA_ESCAPED_LT_STATE () { 120 }
sub
SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE () { 121 }
sub
SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE () { 122 }
sub
SCRIPT_DATA_DOUBLE_ESCAPED_STATE () { 123 }
sub
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE () { 124 }
sub
SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE () { 125 }
sub
SCRIPT_DATA_DOUBLE_ESCAPED_LT_STATE () { 126 }
sub
SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE () { 127 }
sub
TAG_NAME_STATE () { 4 }
sub
BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
sub
ATTRIBUTE_NAME_STATE () { 6 }
sub
AFTER_ATTRIBUTE_NAME_STATE () { 7 }
sub
BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
sub
ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
sub
ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
sub
ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
sub
MARKUP_DECLARATION_OPEN_STATE () { 13 }
sub
COMMENT_START_STATE () { 14 }
sub
COMMENT_START_DASH_STATE () { 15 }
sub
COMMENT_STATE () { 16 }
sub
COMMENT_END_STATE () { 17 }
sub
COMMENT_END_BANG_STATE () { 102 }
sub
COMMENT_END_DASH_STATE () { 18 }
sub
BOGUS_COMMENT_STATE () { 19 }
sub
DOCTYPE_STATE () { 20 }
sub
BEFORE_DOCTYPE_NAME_STATE () { 21 }
sub
DOCTYPE_NAME_STATE () { 22 }
sub
AFTER_DOCTYPE_NAME_STATE () { 23 }
sub
AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE () { 104 }
sub
BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
sub
DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
sub
DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
sub
AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
sub
BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
sub
DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
sub
DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
sub
BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDS_STATE () { 105 }
sub
AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE () { 106 }
sub
AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
sub
BOGUS_DOCTYPE_STATE () { 32 }
sub
AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
sub
SELF_CLOSING_START_TAG_STATE () { 34 }
sub
CDATA_SECTION_STATE () { 35 }
sub
MD_HYPHEN_STATE () { 36 }
sub
MD_DOCTYPE_STATE () { 37 }
sub
MD_CDATA_STATE () { 38 }
sub
CDATA_SECTION_MSE1_STATE () { 40 }
sub
CDATA_SECTION_MSE2_STATE () { 41 }
sub
PUBLIC_STATE () { 42 }
sub
SYSTEM_STATE () { 43 }
sub
ENTITY_STATE () { 44 }
sub
ENTITY_HASH_STATE () { 45 }
sub
NCR_NUM_STATE () { 46 }
sub
HEXREF_X_STATE () { 47 }
sub
HEXREF_HEX_STATE () { 48 }
sub
ENTITY_NAME_STATE () { 49 }
sub
DATA_MSE1_STATE () { 50 }
sub
DATA_MSE2_STATE () { 128 }
sub
PI_STATE () { 51 }
sub
PI_TARGET_STATE () { 52 }
sub
PI_TARGET_AFTER_STATE () { 53 }
sub
PI_DATA_STATE () { 54 }
sub
PI_AFTER_STATE () { 55 }
sub
PI_DATA_AFTER_STATE () { 56 }
sub
DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
sub
DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
sub
BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
sub
DOCTYPE_TAG_STATE () { 60 }
sub
DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
sub
MD_ATTLIST_STATE () { 62 }
sub
MD_E_STATE () { 63 }
sub
MD_ELEMENT_STATE () { 64 }
sub
MD_ENTITY_STATE () { 65 }
sub
MD_NOTATION_STATE () { 66 }
sub
DOCTYPE_MD_STATE () { 67 }
sub
BEFORE_MD_NAME_STATE () { 68 }
sub
MD_NAME_STATE () { 69 }
sub
DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
sub
DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
sub
BEFORE_ALLOWED_TOKEN_STATE () { 76 }
sub
ALLOWED_TOKEN_STATE () { 77 }
sub
AFTER_ALLOWED_TOKEN_STATE () { 78 }
sub
AFTER_ALLOWED_TOKENS_STATE () { 79 }
sub
BEFORE_ATTR_DEFAULT_STATE () { 80 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
sub
DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
sub
AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
sub
BEFORE_NDATA_STATE () { 85 }
sub
NDATA_STATE () { 86 }
sub
AFTER_NDATA_STATE () { 87 }
sub
BEFORE_NOTATION_NAME_STATE () { 88 }
sub
NOTATION_NAME_STATE () { 89 }
sub
DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE () { 90 }
sub
DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE () { 91 }
sub
ENTITY_VALUE_ENTITY_STATE () { 92 }
sub
AFTER_ELEMENT_NAME_STATE () { 93 }
sub
BEFORE_ELEMENT_CONTENT_STATE () { 94 }
sub
CONTENT_KEYWORD_STATE () { 95 }
sub
AFTER_CM_GROUP_OPEN_STATE () { 96 }
sub
CM_ELEMENT_NAME_STATE () { 97 }
sub
AFTER_CM_ELEMENT_NAME_STATE () { 98 }
sub
AFTER_CM_GROUP_CLOSE_STATE () { 99 }
sub
AFTER_MD_DEF_STATE () { 100 }
sub
BOGUS_MD_STATE () { 101 }
sub
FOREIGN_EL () { 0b1_00000000000 }
my
$charref_map
= {
0x00
=> 0xFFFD,
0x0D
=> 0x000D,
0x80
=> 0x20AC,
0x81
=> 0x0081,
0x82
=> 0x201A,
0x83
=> 0x0192,
0x84
=> 0x201E,
0x85
=> 0x2026,
0x86
=> 0x2020,
0x87
=> 0x2021,
0x88
=> 0x02C6,
0x89
=> 0x2030,
0x8A
=> 0x0160,
0x8B
=> 0x2039,
0x8C
=> 0x0152,
0x8D
=> 0x008D,
0x8E
=> 0x017D,
0x8F
=> 0x008F,
0x90
=> 0x0090,
0x91
=> 0x2018,
0x92
=> 0x2019,
0x93
=> 0x201C,
0x94
=> 0x201D,
0x95
=> 0x2022,
0x96
=> 0x2013,
0x97
=> 0x2014,
0x98
=> 0x02DC,
0x99
=> 0x2122,
0x9A
=> 0x0161,
0x9B
=> 0x203A,
0x9C
=> 0x0153,
0x9D
=> 0x009D,
0x9E
=> 0x017E,
0x9F
=> 0x0178,
};
$charref_map
->{
$_
} = 0xFFFD
for
0xD800..0xDFFF;
$charref_map
->{
$_
} =
$_
for
0x0001..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
0xFDD0..0xFDEF,
0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;
sub
EOF_CHAR () { -1 }
sub
NEVER_CHAR () { -2 }
sub
_initialize_tokenizer ($) {
my
$self
=
shift
;
$self
->{state} = DATA_STATE;
undef
$self
->{ct};
undef
$self
->{ca};
undef
$self
->{last_stag_name};
delete
$self
->{self_closing};
$self
->{char_buffer} =
''
;
$self
->{char_buffer_pos} = 0;
$self
->{nc} = -1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
$self
->{token} = [];
}
my
$is_space
= {
0x0009
=> 1,
0x000A
=> 1,
0x000C
=> 1,
0x000D
=> 1,
0x0020
=> 1,
};
sub
KEY_ELSE_CHAR () { 255 }
sub
KEY_ULATIN_CHAR () { 254 }
sub
KEY_LLATIN_CHAR () { 253 }
sub
KEY_EOF_CHAR () { 252 }
sub
KEY_SPACE_CHAR () { 251 }
my
$Action
;
my
$XMLAction
;
$Action
->[DATA_STATE]->[0x0026] = {
name
=>
'data &'
,
state
=> ENTITY_STATE,
state_set
=> {
entity_add
=> -1,
prev_state
=> DATA_STATE},
};
$Action
->[DATA_STATE]->[0x003C] = {
name
=>
'data <'
,
state
=> TAG_OPEN_STATE,
};
$Action
->[DATA_STATE]->[KEY_EOF_CHAR] = {
name
=>
'data eof'
,
emit
=> END_OF_FILE_TOKEN,
reconsume
=> 1,
};
$Action
->[DATA_STATE]->[0x0000] = {
name
=>
'data null'
,
emit
=> CHARACTER_TOKEN,
error
=>
'NULL'
,
};
$Action
->[DATA_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'data else'
,
emit
=> CHARACTER_TOKEN,
emit_data_read_until
=>
qq{\x00<&}
,
};
$XMLAction
->[DATA_STATE]->[0x005D] = {
name
=>
'data ]'
,
state
=> DATA_MSE1_STATE,
emit
=> CHARACTER_TOKEN,
};
$XMLAction
->[DATA_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'data else xml'
,
emit
=> CHARACTER_TOKEN,
emit_data_read_until
=>
qq{\x00<&\]}
,
};
$Action
->[RCDATA_STATE]->[0x0026] = {
name
=>
'rcdata &'
,
state
=> ENTITY_STATE,
state_set
=> {
entity_add
=> -1,
prev_state
=> RCDATA_STATE},
};
$Action
->[RCDATA_STATE]->[0x003C] = {
name
=>
'rcdata <'
,
state
=> RCDATA_LT_STATE,
};
$Action
->[RCDATA_STATE]->[KEY_EOF_CHAR] =
$Action
->[DATA_STATE]->[KEY_EOF_CHAR];
$Action
->[RCDATA_STATE]->[0x0000] = {
name
=>
'rcdata null'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
"\x{FFFD}"
,
error
=>
'NULL'
,
};
$Action
->[RCDATA_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'rcdata else'
,
emit
=> CHARACTER_TOKEN,
emit_data_read_until
=>
qq{\x00<&}
,
};
$Action
->[RAWTEXT_STATE]->[0x003C] = {
name
=>
'rawtext <'
,
state
=> RAWTEXT_LT_STATE,
};
$Action
->[RAWTEXT_STATE]->[KEY_EOF_CHAR] =
$Action
->[DATA_STATE]->[KEY_EOF_CHAR];
$Action
->[RAWTEXT_STATE]->[0x0000] =
$Action
->[RCDATA_STATE]->[0x0000];
$Action
->[RAWTEXT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'rawtext else'
,
emit
=> CHARACTER_TOKEN,
emit_data_read_until
=>
qq{\x00<}
,
};
$Action
->[SCRIPT_DATA_STATE]->[0x003C] = {
name
=>
'script data <'
,
state
=> SCRIPT_DATA_LT_STATE,
};
$Action
->[SCRIPT_DATA_STATE]->[KEY_EOF_CHAR] =
$Action
->[DATA_STATE]->[KEY_EOF_CHAR];
$Action
->[SCRIPT_DATA_STATE]->[0x0000] =
$Action
->[RAWTEXT_STATE]->[0x0000];
$Action
->[SCRIPT_DATA_STATE]->[KEY_ELSE_CHAR] =
$Action
->[RAWTEXT_STATE]->[KEY_ELSE_CHAR];
$Action
->[PLAINTEXT_STATE]->[KEY_EOF_CHAR] =
$Action
->[DATA_STATE]->[KEY_EOF_CHAR];
$Action
->[PLAINTEXT_STATE]->[0x0000] =
$Action
->[RAWTEXT_STATE]->[0x0000];
$Action
->[PLAINTEXT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'plaintext else'
,
emit
=> CHARACTER_TOKEN,
emit_data_read_until
=>
qq{\x00}
,
};
$Action
->[TAG_OPEN_STATE]->[0x0021] = {
name
=>
'tag open !'
,
state
=> MARKUP_DECLARATION_OPEN_STATE,
};
$Action
->[TAG_OPEN_STATE]->[0x002F] = {
name
=>
'tag open /'
,
state
=> CLOSE_TAG_OPEN_STATE,
};
$Action
->[TAG_OPEN_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'tag open uc'
,
ct
=> {
type
=> START_TAG_TOKEN,
delta
=> 1,
append_tag_name
=> 0x0020,
},
state
=> TAG_NAME_STATE,
};
$XMLAction
->[TAG_OPEN_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'tag open uc xml'
,
ct
=> {
type
=> START_TAG_TOKEN,
delta
=> 1,
append_tag_name
=> 0x0000,
},
state
=> TAG_NAME_STATE,
};
$Action
->[TAG_OPEN_STATE]->[KEY_LLATIN_CHAR] = {
name
=>
'tag open lc'
,
ct
=> {
type
=> START_TAG_TOKEN,
delta
=> 1,
append_tag_name
=> 0x0000,
},
state
=> TAG_NAME_STATE,
};
$Action
->[TAG_OPEN_STATE]->[0x003F] = {
name
=>
'tag open ?'
,
state
=> BOGUS_COMMENT_STATE,
error
=>
'pio'
,
error_delta
=> 1,
ct
=> {
type
=> COMMENT_TOKEN,
},
reconsume
=> 1,
};
$XMLAction
->[TAG_OPEN_STATE]->[0x003F] = {
name
=>
'tag open ? xml'
,
state
=> PI_STATE,
};
$Action
->[TAG_OPEN_STATE]->[KEY_SPACE_CHAR] =
$Action
->[TAG_OPEN_STATE]->[0x003E] = {
name
=>
'tag open else'
,
error
=>
'bare stago'
,
error_delta
=> 1,
state
=> DATA_STATE,
reconsume
=> 1,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
emit_delta
=> 1,
};
$Action
->[TAG_OPEN_STATE]->[KEY_ELSE_CHAR] =
$Action
->[TAG_OPEN_STATE]->[0x003E];
$XMLAction
->[TAG_OPEN_STATE]->[0x0000] = {
name
=>
'tag open null xml'
,
ct
=> {
type
=> START_TAG_TOKEN,
delta
=> 1,
append_tag_name
=> 0xFFFD,
},
error
=>
'NULL'
,
state
=> TAG_NAME_STATE,
};
$XMLAction
->[TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'tag open else xml'
,
ct
=> {
type
=> START_TAG_TOKEN,
delta
=> 1,
append_tag_name
=> 0x0000,
},
state
=> TAG_NAME_STATE,
};
$Action
->[RCDATA_LT_STATE]->[0x002F] = {
name
=>
'rcdata lt /'
,
state
=> RCDATA_END_TAG_OPEN_STATE,
buffer
=> {
clear
=> 1},
};
$Action
->[RAWTEXT_LT_STATE]->[0x002F] = {
name
=>
'rawtext lt /'
,
state
=> RAWTEXT_END_TAG_OPEN_STATE,
buffer
=> {
clear
=> 1},
};
$Action
->[SCRIPT_DATA_LT_STATE]->[0x002F] = {
name
=>
'script data lt /'
,
state
=> SCRIPT_DATA_END_TAG_OPEN_STATE,
buffer
=> {
clear
=> 1},
};
$Action
->[SCRIPT_DATA_ESCAPED_LT_STATE]->[0x002F] = {
name
=>
'script data escaped lt /'
,
state
=> SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE,
buffer
=> {
clear
=> 1},
};
$Action
->[SCRIPT_DATA_LT_STATE]->[0x0021] = {
name
=>
'script data lt !'
,
state
=> SCRIPT_DATA_ESCAPE_START_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<!'
,
};
$Action
->[SCRIPT_DATA_ESCAPED_LT_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'script data escaped lt uc'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
emit_data_append
=> 1,
buffer
=> {
clear
=> 1,
append
=> 0x0020},
state
=> SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE,
};
$Action
->[SCRIPT_DATA_ESCAPED_LT_STATE]->[KEY_LLATIN_CHAR] = {
name
=>
'script data escaped lt lc'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
emit_data_append
=> 1,
buffer
=> {
clear
=> 1,
append
=> 0x0000},
state
=> SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE,
};
$Action
->[RCDATA_LT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'rcdata lt else'
,
state
=> RCDATA_STATE,
reconsume
=> 1,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[RAWTEXT_LT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'rawtext lt else'
,
state
=> RAWTEXT_STATE,
reconsume
=> 1,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[SCRIPT_DATA_LT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data lt else'
,
state
=> SCRIPT_DATA_STATE,
reconsume
=> 1,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[SCRIPT_DATA_ESCAPED_LT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data escaped lt else'
,
state
=> SCRIPT_DATA_ESCAPED_STATE,
reconsume
=> 1,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[CLOSE_TAG_OPEN_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'end tag open uc'
,
ct
=> {
type
=> END_TAG_TOKEN,
delta
=> 2,
append_tag_name
=> 0x0020,
},
state
=> TAG_NAME_STATE,
};
$XMLAction
->[CLOSE_TAG_OPEN_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'end tag open uc xml'
,
ct
=> {
type
=> END_TAG_TOKEN,
delta
=> 2,
append_tag_name
=> 0x0000,
},
state
=> TAG_NAME_STATE,
};
$Action
->[CLOSE_TAG_OPEN_STATE]->[KEY_LLATIN_CHAR] = {
name
=>
'end tag open lc'
,
ct
=> {
type
=> END_TAG_TOKEN,
delta
=> 2,
append_tag_name
=> 0x0000,
},
state
=> TAG_NAME_STATE,
};
$Action
->[CLOSE_TAG_OPEN_STATE]->[0x003E] = {
name
=>
'end tag open >'
,
error
=>
'empty end tag'
,
error_delta
=> 2,
state
=> DATA_STATE,
};
$XMLAction
->[CLOSE_TAG_OPEN_STATE]->[0x003E] = {
name
=>
'end tag open > xml'
,
error
=>
'empty end tag'
,
error_delta
=> 2,
state
=> DATA_STATE,
ct
=> {
type
=> END_TAG_TOKEN,
delta
=> 2,
},
emit
=>
''
,
};
$Action
->[CLOSE_TAG_OPEN_STATE]->[KEY_EOF_CHAR] = {
name
=>
'end tag open eof'
,
error
=>
'bare etago'
,
state
=> DATA_STATE,
reconsume
=> 1,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'</'
,
emit_delta
=> 2,
};
$Action
->[CLOSE_TAG_OPEN_STATE]->[KEY_SPACE_CHAR] =
$Action
->[CLOSE_TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'end tag open else'
,
error
=>
'bogus end tag'
,
error_delta
=> 2,
state
=> BOGUS_COMMENT_STATE,
ct
=> {
type
=> COMMENT_TOKEN,
delta
=> 2,
},
reconsume
=> 1,
};
$XMLAction
->[CLOSE_TAG_OPEN_STATE]->[0x0000] = {
name
=>
'end tag open null xml'
,
ct
=> {
type
=> END_TAG_TOKEN,
delta
=> 2,
append_tag_name
=> 0xFFFD,
},
error
=>
'NULL'
,
state
=> TAG_NAME_STATE,
};
$XMLAction
->[CLOSE_TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'end tag open else xml'
,
ct
=> {
type
=> END_TAG_TOKEN,
delta
=> 2,
append_tag_name
=> 0x0000,
},
state
=> TAG_NAME_STATE,
};
$Action
->[TAG_NAME_STATE]->[KEY_SPACE_CHAR] = {
name
=>
'tag name sp'
,
state
=> BEFORE_ATTRIBUTE_NAME_STATE,
};
$Action
->[TAG_NAME_STATE]->[0x003E] = {
name
=>
'tag name >'
,
state
=> DATA_STATE,
emit
=>
''
,
};
$Action
->[TAG_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'tag name uc'
,
ct
=> {
append_tag_name
=> 0x0020,
},
};
$XMLAction
->[TAG_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'tag name uc xml'
,
ct
=> {
append_tag_name
=> 0x0000,
},
};
$Action
->[TAG_NAME_STATE]->[KEY_EOF_CHAR] = {
name
=>
'tag name eof'
,
error
=>
'unclosed tag'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[TAG_NAME_STATE]->[0x002F] = {
name
=>
'tag name /'
,
state
=> SELF_CLOSING_START_TAG_STATE,
};
$Action
->[TAG_NAME_STATE]->[0x0000] = {
name
=>
'tag name null'
,
ct
=> {
append_tag_name
=> 0xFFFD,
},
error
=>
'NULL'
,
};
$Action
->[TAG_NAME_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'tag name else'
,
ct
=> {
append_tag_name
=> 0x0000,
},
};
$Action
->[SCRIPT_DATA_ESCAPE_START_STATE]->[0x002D] = {
name
=>
'script data escape start -'
,
state
=> SCRIPT_DATA_ESCAPE_START_DASH_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_ESCAPE_START_DASH_STATE]->[0x002D] = {
name
=>
'script data escape start dash -'
,
state
=> SCRIPT_DATA_ESCAPED_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_ESCAPE_START_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data escape start else'
,
state
=> SCRIPT_DATA_STATE,
reconsume
=> 1,
};
$Action
->[SCRIPT_DATA_ESCAPE_START_DASH_STATE]->[KEY_ELSE_CHAR] =
$Action
->[SCRIPT_DATA_ESCAPE_START_STATE]->[KEY_ELSE_CHAR];
$Action
->[SCRIPT_DATA_ESCAPED_STATE]->[0x002D] = {
name
=>
'script data escaped -'
,
state
=> SCRIPT_DATA_ESCAPED_DASH_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_STATE]->[0x002D] = {
name
=>
'script data escaped dash -'
,
state
=> SCRIPT_DATA_ESCAPED_DASH_DASH_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[0x002D] = {
name
=>
'script data escaped dash dash -'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_STATE]->[0x002D] = {
name
=>
'script data double escaped -'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE]->[0x002D] = {
name
=>
'script data double escaped -'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[0x002D] = {
name
=>
'script data double escaped dash dash -'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'-'
,
};
$Action
->[SCRIPT_DATA_ESCAPED_STATE]->[0x003C] = {
name
=>
'script data escaped <'
,
state
=> SCRIPT_DATA_ESCAPED_LT_STATE,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_STATE]->[0x003C] = {
name
=>
'script data escaped dash <'
,
state
=> SCRIPT_DATA_ESCAPED_LT_STATE,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[0x003C] = {
name
=>
'script data escaped dash dash <'
,
state
=> SCRIPT_DATA_ESCAPED_LT_STATE,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_STATE]->[0x003C] = {
name
=>
'script data double escaped <'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_LT_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE]->[0x003C] = {
name
=>
'script data double escaped dash <'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_LT_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[0x003C] = {
name
=>
'script data double escaped dash dash <'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_LT_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'<'
,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[0x003E] = {
name
=>
'script data escaped dash dash >'
,
state
=> SCRIPT_DATA_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'>'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[0x003E] =
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[0x003E];
$Action
->[SCRIPT_DATA_ESCAPED_STATE]->[KEY_EOF_CHAR] =
$Action
->[SCRIPT_DATA_ESCAPED_DASH_STATE]->[KEY_EOF_CHAR] =
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[KEY_EOF_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_STATE]->[KEY_EOF_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE]->[KEY_EOF_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[KEY_EOF_CHAR] = {
name
=>
'script data escaped eof'
,
error
=>
'eof in escaped script data'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[SCRIPT_DATA_ESCAPED_STATE]->[0x0000] =
$Action
->[SCRIPT_DATA_ESCAPED_DASH_STATE]->[0x0000] =
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[0x0000] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_STATE]->[0x0000] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE]->[0x0000] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[0x0000] = {
name
=>
'script data escaped null'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
"\x{FFFD}"
,
error
=>
'NULL'
,
state
=> SCRIPT_DATA_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_ESCAPED_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data escaped else'
,
emit
=> CHARACTER_TOKEN,
state
=> SCRIPT_DATA_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data escaped dash else'
,
emit
=> CHARACTER_TOKEN,
state
=> SCRIPT_DATA_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data escaped dash dash else'
,
emit
=> CHARACTER_TOKEN,
state
=> SCRIPT_DATA_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data double escaped else'
,
emit
=> CHARACTER_TOKEN,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data double escaped dash else'
,
emit
=> CHARACTER_TOKEN,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data double escaped dash dash else'
,
emit
=> CHARACTER_TOKEN,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_STATE,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE]->[KEY_SPACE_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE]->[KEY_SPACE_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE]->[0x003E] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE]->[0x003E] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE]->[0x002F] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE]->[0x002F] = {
name
=>
'script data double escape start sp>/'
,
skip
=> 1,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE]->[KEY_ULATIN_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'script data double escape start uc'
,
emit
=> CHARACTER_TOKEN,
buffer
=> {
append
=> 0x0020},
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE]->[KEY_LLATIN_CHAR] =
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE]->[KEY_LLATIN_CHAR] = {
name
=>
'script data double escape start lc'
,
emit
=> CHARACTER_TOKEN,
buffer
=> {
append
=> 0x0000},
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data double escape start else'
,
state
=> SCRIPT_DATA_ESCAPED_STATE,
reconsume
=> 1,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data double escape end else'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_STATE,
reconsume
=> 1,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_LT_STATE]->[0x002F] = {
name
=>
'script data double escaped lt /'
,
buffer
=> {
clear
=> 1},
state
=> SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'/'
,
};
$Action
->[SCRIPT_DATA_DOUBLE_ESCAPED_LT_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'script data double escaped lt else'
,
state
=> SCRIPT_DATA_DOUBLE_ESCAPED_STATE,
reconsume
=> 1,
};
$Action
->[DATA_MSE1_STATE]->[0x005D] = {
name
=>
'data mse1 ]'
,
state
=> DATA_MSE2_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
']'
,
};
$Action
->[DATA_MSE1_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'data mse1 else'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[DATA_MSE2_STATE]->[0x003E] = {
name
=>
'data mse2 >'
,
error
=>
'unmatched mse'
,
error_delta
=> 2,
state
=> DATA_STATE,
emit
=> CHARACTER_TOKEN,
emit_data
=>
'>'
,
};
$Action
->[DATA_MSE2_STATE]->[0x005D] = {
name
=>
'data mse2 ]'
,
emit
=> CHARACTER_TOKEN,
emit_data
=>
']'
,
};
$Action
->[DATA_MSE2_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'data mse2 else'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[KEY_SPACE_CHAR] = {
name
=>
'before attr name sp'
,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x003E] = {
name
=>
'before attr name >'
,
emit
=>
''
,
state
=> DATA_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'before attr name uc'
,
ca
=> {
set_name
=> 0x0020,
},
state
=> ATTRIBUTE_NAME_STATE,
};
$XMLAction
->[BEFORE_ATTRIBUTE_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'before attr name uc xml'
,
ca
=> {
set_name
=> 0x0000,
},
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x002F] = {
name
=>
'before attr name /'
,
state
=> SELF_CLOSING_START_TAG_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[KEY_EOF_CHAR] = {
name
=>
'before attr name eof'
,
error
=>
'unclosed tag'
,
state
=> DATA_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x0022] =
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x0027] =
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x003C] =
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x003D] = {
name
=>
q[before attr name "'<=]
,
error
=>
'bad attribute name'
,
ca
=> {
set_name
=> 0x0000},
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x0000] = {
name
=>
'before attr name null'
,
ca
=> {
set_name
=> 0xFFFD},
error
=>
'NULL'
,
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'before attr name else'
,
ca
=> {
set_name
=> 0x0000},
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[KEY_SPACE_CHAR] = {
name
=>
'attr name sp'
,
ca
=> {
leave
=> 1},
state
=> AFTER_ATTRIBUTE_NAME_STATE,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[0x003D] = {
name
=>
'attr name ='
,
ca
=> {
leave
=> 1},
state
=> BEFORE_ATTRIBUTE_VALUE_STATE,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[0x003E] = {
name
=>
'attr name >'
,
ca
=> {
leave
=> 1},
emit
=>
''
,
state
=> DATA_STATE,
};
$XMLAction
->[ATTRIBUTE_NAME_STATE]->[0x003E] = {
name
=>
'attr name > xml'
,
error
=>
'no attr value'
,
ca
=> {
leave
=> 1},
emit
=>
''
,
state
=> DATA_STATE,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'attr name uc'
,
ca
=> {
name
=> 0x0020},
};
$XMLAction
->[ATTRIBUTE_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'attr name uc'
,
ca
=> {
name
=> 0x0000},
};
$Action
->[ATTRIBUTE_NAME_STATE]->[0x002F] = {
name
=>
'attr name /'
,
ca
=> {
leave
=> 1},
state
=> SELF_CLOSING_START_TAG_STATE,
};
$XMLAction
->[ATTRIBUTE_NAME_STATE]->[0x002F] = {
name
=>
'attr name / xml'
,
error
=>
'no attr value'
,
ca
=> {
leave
=> 1},
state
=> SELF_CLOSING_START_TAG_STATE,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[KEY_EOF_CHAR] = {
name
=>
'attr name eof'
,
error
=>
'unclosed tag'
,
ca
=> {
leave
=> 1},
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[0x0022] =
$Action
->[ATTRIBUTE_NAME_STATE]->[0x0027] =
$Action
->[ATTRIBUTE_NAME_STATE]->[0x003C] = {
name
=>
q[attr name "'<]
,
error
=>
'bad attribute name'
,
ca
=> {
name
=> 0x0000},
};
$Action
->[ATTRIBUTE_NAME_STATE]->[0x0000] = {
name
=>
'attr name null'
,
ca
=> {
name
=> 0xFFFD},
error
=>
'NULL'
,
};
$Action
->[ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'attr name else'
,
ca
=> {
name
=> 0x0000},
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_SPACE_CHAR] = {
name
=>
'after attr name sp'
,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x003D] = {
name
=>
'after attr name ='
,
state
=> BEFORE_ATTRIBUTE_VALUE_STATE,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x003E] = {
name
=>
'after attr name >'
,
emit
=>
''
,
state
=> DATA_STATE,
};
$XMLAction
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x003E] = {
name
=>
'after attr name > xml'
,
error
=>
'no attr value'
,
emit
=>
''
,
state
=> DATA_STATE,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'after attr name uc'
,
ca
=> {
set_name
=> 0x0020},
state
=> ATTRIBUTE_NAME_STATE,
};
$XMLAction
->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_ULATIN_CHAR] = {
name
=>
'after attr name uc xml'
,
ca
=> {
set_name
=> 0x0000},
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x002F] = {
name
=>
'after attr name /'
,
state
=> SELF_CLOSING_START_TAG_STATE,
};
$XMLAction
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x002F] = {
name
=>
'after attr name / xml'
,
error
=>
'no attr value'
,
state
=> SELF_CLOSING_START_TAG_STATE,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_EOF_CHAR] = {
name
=>
'after attr name eof'
,
error
=>
'unclosed tag'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x0022] =
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x0027] =
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x003C] = {
name
=>
q[after attr name "'<]
,
error
=>
'bad attribute name'
,
ca
=> {
set_name
=> 0x0000},
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[0x0000] = {
name
=>
q[after attr name else]
,
ca
=> {
set_name
=> 0xFFFD},
error
=>
'NULL'
,
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = {
name
=>
q[after attr name else]
,
ca
=> {
set_name
=> 0x0000},
state
=> ATTRIBUTE_NAME_STATE,
};
$XMLAction
->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = {
name
=>
q[after attr name else]
,
error
=>
'no attr value'
,
ca
=> {
set_name
=> 0x0000},
state
=> ATTRIBUTE_NAME_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[KEY_SPACE_CHAR] = {
name
=>
'before attr value sp'
,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0022] = {
name
=>
'before attr value "'
,
state
=> ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE,
};
$XMLAction
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0026] = {
name
=>
'before attr value &'
,
error
=>
'unquoted attr value'
,
state
=> ATTRIBUTE_VALUE_UNQUOTED_STATE,
reconsume
=> 1,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0026] = {
name
=>
'before attr value &'
,
state
=> ATTRIBUTE_VALUE_UNQUOTED_STATE,
reconsume
=> 1,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0027] = {
name
=>
"before attr value '"
,
state
=> ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x003E] = {
name
=>
'before attr value >'
,
error
=>
'empty unquoted attribute value'
,
emit
=>
''
,
state
=> DATA_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[KEY_EOF_CHAR] = {
name
=>
'before attr value eof'
,
error
=>
'unclosed tag'
,
state
=> DATA_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x003C] =
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x003D] =
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0060] = {
name
=>
'before attr value <=`'
,
error
=>
'bad attribute value'
,
ca
=> {
value
=> 1},
state
=> ATTRIBUTE_VALUE_UNQUOTED_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0000] = {
name
=>
'before attr value null'
,
ca
=> {
value
=>
"\x{FFFD}"
},
error
=>
'NULL'
,
state
=> ATTRIBUTE_VALUE_UNQUOTED_STATE,
};
$XMLAction
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'before attr value else xml'
,
error
=>
'unquoted attr value'
,
ca
=> {
value
=> 1},
state
=> ATTRIBUTE_VALUE_UNQUOTED_STATE,
};
$Action
->[BEFORE_ATTRIBUTE_VALUE_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'before attr value else'
,
ca
=> {
value
=> 1},
state
=> ATTRIBUTE_VALUE_UNQUOTED_STATE,
};
$Action
->[AFTER_ATTRIBUTE_VALUE_QUOTED_STATE]->[KEY_SPACE_CHAR] = {
name
=>
'after attr value quoted sp'
,
state
=> BEFORE_ATTRIBUTE_NAME_STATE,
};
$Action
->[AFTER_ATTRIBUTE_VALUE_QUOTED_STATE]->[0x003E] = {
name
=>
'after attr value quoted >'
,
emit
=>
''
,
state
=> DATA_STATE,
};
$Action
->[AFTER_ATTRIBUTE_VALUE_QUOTED_STATE]->[0x002F] = {
name
=>
'after attr value quoted /'
,
state
=> SELF_CLOSING_START_TAG_STATE,
};
$Action
->[AFTER_ATTRIBUTE_VALUE_QUOTED_STATE]->[KEY_EOF_CHAR] = {
name
=>
'after attr value quoted eof'
,
error
=>
'unclosed tag'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[AFTER_ATTRIBUTE_VALUE_QUOTED_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'after attr value quoted else'
,
error
=>
'no space between attributes'
,
state
=> BEFORE_ATTRIBUTE_NAME_STATE,
reconsume
=> 1,
};
$Action
->[SELF_CLOSING_START_TAG_STATE]->[0x003E] = {
name
=>
'self closing start tag >'
,
skip
=> 1,
};
$Action
->[SELF_CLOSING_START_TAG_STATE]->[KEY_EOF_CHAR] = {
name
=>
'self closing start tag eof'
,
error
=>
'unclosed tag'
,
state
=> DATA_STATE,
reconsume
=> 1,
};
$Action
->[SELF_CLOSING_START_TAG_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'self closing start tag else'
,
error
=>
'nestc'
,
state
=> BEFORE_ATTRIBUTE_NAME_STATE,
reconsume
=> 1,
};
$Action
->[MD_HYPHEN_STATE]->[0x002D] = {
name
=>
'md hyphen -'
,
ct
=> {
type
=> COMMENT_TOKEN,
data
=>
''
,
delta
=> 3},
state
=> COMMENT_START_STATE,
};
$Action
->[MD_HYPHEN_STATE]->[KEY_ELSE_CHAR] = {
name
=>
'md hyphen else'
,
error
=>
'bogus comment'
,
error_delta
=> 3,
state
=> BOGUS_COMMENT_STATE,
reconsume
=> 1,
ct
=> {
type
=> COMMENT_TOKEN,
data
=>
'-'
,
delta
=> 3},
};
my
$c_to_key
= [];
$c_to_key
->[255] = KEY_EOF_CHAR;
$c_to_key
->[
$_
] =
$_
for
0x0000..0x007F;
$c_to_key
->[
$_
] = KEY_SPACE_CHAR
for
keys
%$is_space
;
$c_to_key
->[
$_
] = KEY_ULATIN_CHAR
for
0x0041..0x005A;
$c_to_key
->[
$_
] = KEY_LLATIN_CHAR
for
0x0061..0x007A;
sub
_get_next_token ($) {
my
$self
=
shift
;
if
(
$self
->{self_closing}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'nestc'
,
token
=>
$self
->{ct});
delete
$self
->{self_closing};
}
if
(@{
$self
->{token}}) {
$self
->{self_closing} =
$self
->{token}->[0]->{self_closing};
return
shift
@{
$self
->{token}};
}
A: {
my
$nc
=
$self
->{nc};
my
$state
=
$self
->{state};
my
$c
=
$nc
> 0x007F ? KEY_ELSE_CHAR :
$c_to_key
->[
$nc
];
my
$action
=
$Action
->[
$state
]->[
$c
] ||
$Action
->[
$state
]->[KEY_ELSE_CHAR];
if
(
$self
->{is_xml}) {
$action
=
$XMLAction
->[
$state
]->[
$c
]
||
$Action
->[
$state
]->[
$c
]
||
$XMLAction
->[
$state
]->[KEY_ELSE_CHAR]
||
$Action
->[
$state
]->[KEY_ELSE_CHAR];
}
if
(
$action
and not
$action
->{skip}) {
if
(
defined
$action
->{error}) {
if
(
$action
->{error_delta}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
$action
->{error},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} -
$action
->{error_delta} + 1);
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
$action
->{error});
}
}
if
(
defined
$action
->{state}) {
$self
->{state} =
$action
->{state};
if
(
$action
->{state_set}) {
for
(
keys
%{
$action
->{state_set}}) {
$self
->{
$_
} =
$action
->{state_set}->{
$_
};
}
}
}
if
(
my
$act
=
$action
->{ct}) {
if
(
defined
$act
->{type}) {
$self
->{ct} = {
type
=>
$act
->{type},
tag_name
=>
''
,
data
=>
$act
->{data}};
if
(
$act
->{delta}) {
$self
->{ct}->{line} =
$self
->{line_prev};
$self
->{ct}->{column} =
$self
->{column_prev} -
$act
->{delta} + 1;
}
else
{
$self
->{ct}->{line} =
$self
->{line};
$self
->{ct}->{column} =
$self
->{column};
}
}
if
(
defined
$act
->{append_tag_name}) {
$self
->{ct}->{tag_name} .=
chr
(
$nc
+
$act
->{append_tag_name});
}
}
if
(
my
$aca
=
$action
->{ca}) {
if
(
$aca
->{value}) {
$self
->{ca}->{value} .=
$aca
->{value} ne
'1'
?
$aca
->{value} :
chr
$nc
;
}
elsif
(
defined
$aca
->{name}) {
$self
->{ca}->{name} .=
chr
(
$nc
+
$aca
->{name});
}
elsif
(
defined
$aca
->{set_name}) {
$self
->{ca} = {
name
=>
chr
(
$nc
+
$aca
->{set_name}),
value
=>
''
,
line
=>
$self
->{line},
column
=>
$self
->{column},
};
}
elsif
(
$aca
->{leave}) {
if
(
exists
$self
->{ct}->{attributes}->{
$self
->{ca}->{name}}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'duplicate attribute'
,
text
=>
$self
->{ca}->{name},
line
=>
$self
->{ca}->{line},
column
=>
$self
->{ca}->{column});
}
else
{
$self
->{ct}->{attributes}->{
$self
->{ca}->{name}} =
$self
->{ca};
$self
->{ca}->{
index
} = ++
$self
->{ct}->{last_index};
}
}
}
if
(
defined
$action
->{buffer}) {
$self
->{kwd} =
''
if
$action
->{buffer}->{clear};
$self
->{kwd} .=
chr
(
$nc
+
$action
->{buffer}->{append})
if
defined
$action
->{buffer}->{append};
}
if
(
defined
$action
->{emit}) {
if
(
$action
->{emit} eq
''
) {
if
(
$self
->{ct}->{type} == START_TAG_TOKEN) {
$self
->{last_stag_name} =
$self
->{ct}->{tag_name};
}
elsif
(
$self
->{ct}->{type} == END_TAG_TOKEN) {
if
(
$self
->{ct}->{attributes}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'end tag attribute'
);
}
else
{
}
}
else
{
die
"$0: $self->{ct}->{type}: Unknown token type"
;
}
if
(
$action
->{reconsume}) {
}
else
{
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
return
(
$self
->{ct});
}
else
{
my
$token
= {
type
=>
$action
->{emit}};
if
(
defined
$action
->{emit_data}) {
$token
->{data} =
$action
->{emit_data};
if
(
$action
->{emit_data_append}) {
$token
->{data} .=
chr
$nc
;
}
}
elsif
(
$action
->{emit} == CHARACTER_TOKEN) {
$token
->{data} .=
chr
$nc
;
}
if
(
$action
->{emit_delta}) {
$token
->{line} =
$self
->{line_prev};
$token
->{column} =
$self
->{column_prev} -
$action
->{emit_delta} + 1;
}
else
{
$token
->{line} =
$self
->{line};
$token
->{column} =
$self
->{column};
}
if
(
defined
$action
->{emit_data_read_until}) {
$self
->{read_until}->(
$token
->{data},
$action
->{emit_data_read_until},
length
$token
->{data});
}
if
(
$action
->{reconsume}) {
}
else
{
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
return
(
$token
);
}
}
else
{
if
(
$action
->{reconsume}) {
}
else
{
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
}
redo
A;
}
if
({
(RCDATA_END_TAG_OPEN_STATE) => 1,
(RAWTEXT_END_TAG_OPEN_STATE) => 1,
(SCRIPT_DATA_END_TAG_OPEN_STATE) => 1,
(SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE) => 1,
}->{
$state
}) {
my
(
$l
,
$c
) = (
$self
->{line_prev},
$self
->{column_prev} - 1);
if
(
defined
$self
->{last_stag_name}) {
}
else
{
$self
->{state} = {
(RCDATA_END_TAG_OPEN_STATE) => RCDATA_STATE,
(RAWTEXT_END_TAG_OPEN_STATE) => RAWTEXT_STATE,
(SCRIPT_DATA_END_TAG_OPEN_STATE) => SCRIPT_DATA_STATE,
(SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE)
=> SCRIPT_DATA_ESCAPED_STATE,
}->{
$state
} or
die
"${state}'s next state not found"
;
return
({
type
=> CHARACTER_TOKEN,
data
=>
'</'
,
line
=>
$l
,
column
=>
$c
});
redo
A;
}
my
$ch
=
substr
$self
->{last_stag_name},
length
$self
->{kwd}, 1;
if
(
length
$ch
) {
my
$CH
=
$ch
;
$ch
=~
tr
/a-z/A-Z/;
my
$nch
=
chr
$nc
;
if
(
$nch
eq
$ch
or
$nch
eq
$CH
) {
$self
->{kwd} .=
$nch
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{state} = {
(RCDATA_END_TAG_OPEN_STATE) => RCDATA_STATE,
(RAWTEXT_END_TAG_OPEN_STATE) => RAWTEXT_STATE,
(SCRIPT_DATA_END_TAG_OPEN_STATE) => SCRIPT_DATA_STATE,
(SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE)
=> SCRIPT_DATA_ESCAPED_STATE,
}->{
$state
} or
die
"${state}'s next state not found"
;
return
({
type
=> CHARACTER_TOKEN,
data
=>
'</'
.
$self
->{kwd},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd},
});
redo
A;
}
}
else
{
unless
(
$is_space
->{
$nc
} or
{
0x003E
=> 1,
0x002F
=> 1,
}->{
$nc
}) {
$self
->{state} = {
(RCDATA_END_TAG_OPEN_STATE) => RCDATA_STATE,
(RAWTEXT_END_TAG_OPEN_STATE) => RAWTEXT_STATE,
(SCRIPT_DATA_END_TAG_OPEN_STATE) => SCRIPT_DATA_STATE,
(SCRIPT_DATA_ESCAPED_END_TAG_OPEN_STATE)
=> SCRIPT_DATA_ESCAPED_STATE,
}->{
$self
->{state}} or
die
"${state}'s next state not found"
;
return
({
type
=> CHARACTER_TOKEN,
data
=>
'</'
.
$self
->{kwd},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd},
});
redo
A;
}
else
{
$self
->{ct}
= {
type
=> END_TAG_TOKEN,
tag_name
=>
$self
->{last_stag_name},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd}};
$self
->{state} = TAG_NAME_STATE;
redo
A;
}
}
}
elsif
(
$state
== SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE or
$state
== SCRIPT_DATA_DOUBLE_ESCAPE_END_STATE) {
if
(
$is_space
->{
$nc
} or
$nc
== 0x002F or
$nc
== 0x003E) {
my
$token
= {
type
=> CHARACTER_TOKEN,
data
=>
chr
$nc
,
line
=>
$self
->{line},
column
=>
$self
->{column}};
if
(
$state
== SCRIPT_DATA_DOUBLE_ESCAPE_START_STATE) {
$self
->{state} =
$self
->{kwd} eq
'script'
? SCRIPT_DATA_DOUBLE_ESCAPED_STATE
: SCRIPT_DATA_ESCAPED_STATE;
}
else
{
$self
->{state} =
$self
->{kwd} eq
'script'
? SCRIPT_DATA_ESCAPED_STATE
: SCRIPT_DATA_DOUBLE_ESCAPED_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$token
);
redo
A;
}
else
{
die
"$state/$nc is implemented"
;
}
}
elsif
(
$state
== ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
if
(
$nc
== 0x0022) {
if
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
}
else
{
$self
->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0026) {
$self
->{prev_state} =
$state
;
$self
->{entity_add} = 0x0022;
$self
->{state} = ENTITY_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{is_xml} and
$is_space
->{
$nc
}) {
$self
->{ca}->{value} .=
' '
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed attribute value'
);
if
(
$self
->{ct}->{type} == START_TAG_TOKEN) {
$self
->{last_stag_name} =
$self
->{ct}->{tag_name};
$self
->{state} = DATA_STATE;
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{ct}->{type} == END_TAG_TOKEN) {
if
(
$self
->{ct}->{attributes}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'end tag attribute'
);
}
else
{
}
$self
->{state} = DATA_STATE;
redo
A;
}
elsif
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
die
"$0: $self->{ct}->{type}: Unknown token type"
;
}
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ca}->{value} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$self
->{is_xml} and
$nc
== 0x003C) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lt in attr value'
);
}
else
{
}
$self
->{ca}->{value} .=
chr
(
$nc
);
$self
->{read_until}->(
$self
->{ca}->{value},
qq[\x00"&<\x09\x0C\x20]
,
length
$self
->{ca}->{value});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
if
(
$nc
== 0x0027) {
if
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
}
else
{
$self
->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0026) {
$self
->{entity_add} = 0x0027;
$self
->{prev_state} =
$state
;
$self
->{state} = ENTITY_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{is_xml} and
$is_space
->{
$nc
}) {
$self
->{ca}->{value} .=
' '
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed attribute value'
);
if
(
$self
->{ct}->{type} == START_TAG_TOKEN) {
$self
->{last_stag_name} =
$self
->{ct}->{tag_name};
$self
->{state} = DATA_STATE;
redo
A;
}
elsif
(
$self
->{ct}->{type} == END_TAG_TOKEN) {
if
(
$self
->{ct}->{attributes}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'end tag attribute'
);
}
else
{
}
$self
->{state} = DATA_STATE;
redo
A;
}
elsif
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
die
"$0: $self->{ct}->{type}: Unknown token type"
;
}
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ca}->{value} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$self
->{is_xml} and
$nc
== 0x003C) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lt in attr value'
);
}
else
{
}
$self
->{ca}->{value} .=
chr
(
$nc
);
$self
->{read_until}->(
$self
->{ca}->{value},
qq[\x00'&<\x09\x0C\x20]
,
length
$self
->{ca}->{value});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== ATTRIBUTE_VALUE_UNQUOTED_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
}
else
{
$self
->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0026) {
$self
->{entity_add} = 0x003E;
$self
->{prev_state} =
$state
;
$self
->{state} = ENTITY_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
if
(
$self
->{ct}->{type} == START_TAG_TOKEN) {
$self
->{last_stag_name} =
$self
->{ct}->{tag_name};
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{ct}->{type} == END_TAG_TOKEN) {
if
(
$self
->{ct}->{attributes}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'end tag attribute'
);
}
else
{
}
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
die
"$0: $self->{ct}->{type}: Unknown token type"
;
}
}
elsif
(
$nc
== -1) {
if
(
$self
->{ct}->{type} == START_TAG_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed tag'
);
$self
->{last_stag_name} =
$self
->{ct}->{tag_name};
$self
->{state} = DATA_STATE;
redo
A;
}
elsif
(
$self
->{ct}->{type} == END_TAG_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed tag'
);
if
(
$self
->{ct}->{attributes}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'end tag attribute'
);
}
else
{
}
$self
->{state} = DATA_STATE;
redo
A;
}
elsif
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
die
"$0: $self->{ct}->{type}: Unknown token type"
;
}
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ca}->{value} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
({
0x0022
=> 1,
0x0027
=> 1,
0x003D
=> 1,
0x003C
=> 1,
0x0060
=> 1,
}->{
$nc
}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bad attribute value'
);
}
else
{
}
$self
->{ca}->{value} .=
chr
(
$nc
);
$self
->{read_until}->(
$self
->{ca}->{value},
qq[\x00"'=&` \x09\x0C<>]
,
length
$self
->{ca}->{value});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== SELF_CLOSING_START_TAG_STATE) {
if
(
$nc
== 0x003E) {
if
(
$self
->{ct}->{type} == END_TAG_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'nestc'
,
token
=>
$self
->{ct});
if
(
$self
->{ct}->{attributes}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'end tag attribute'
);
}
else
{
}
}
else
{
$self
->{self_closing} = 1;
}
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
die
"$state/$nc is implemented"
;
}
}
elsif
(
$state
== BOGUS_COMMENT_STATE) {
if
(
$nc
== 0x003E) {
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{data} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
chr
(
$nc
);
$self
->{read_until}->(
$self
->{ct}->{data},
qq[\x00>]
,
length
$self
->{ct}->{data});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== MARKUP_DECLARATION_OPEN_STATE) {
if
(
$nc
== 0x002D) {
$self
->{state} = MD_HYPHEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0044 or
$nc
== 0x0064) {
$self
->{state} = MD_DOCTYPE_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
$self
->{state} = MD_CDATA_STATE;
$self
->{kwd} =
'['
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1);
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1,
};
redo
A;
}
elsif
(
$state
== MD_DOCTYPE_STATE) {
if
(
$nc
== [
undef
,
0x004F,
0x0043,
0x0054,
0x0059,
0x0050,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
0x006F,
0x0063,
0x0074,
0x0079,
0x0070,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 6 and
(
$nc
== 0x0045 or
$nc
== 0x0065)) {
if
(
$self
->{is_xml} and
(
$self
->{kwd} ne
'DOCTYP'
or
$nc
== 0x0065)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'DOCTYPE'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 5);
}
else
{
}
$self
->{state} = DOCTYPE_STATE;
$self
->{ct} = {
type
=> DOCTYPE_TOKEN,
quirks
=> 1,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 7,
};
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd});
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
$self
->{kwd},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd},
};
redo
A;
}
}
elsif
(
$state
== MD_CDATA_STATE) {
if
(
$nc
== {
'['
=> 0x0043,
'[C'
=> 0x0044,
'[CD'
=> 0x0041,
'[CDA'
=> 0x0054,
'[CDAT'
=> 0x0041,
'[CDATA'
=> NEVER_CHAR,
}->{
$self
->{kwd}}) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{kwd} eq
'[CDATA'
and
$nc
== 0x005B) {
if
(
$self
->{is_xml} and
not
$self
->{tainted} and
@{
$self
->{open_elements} or []} == 0) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'cdata outside of root element'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 7);
$self
->{tainted} = 1;
}
else
{
}
$self
->{ct} = {
type
=> CHARACTER_TOKEN,
data
=>
''
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 7};
$self
->{state} = CDATA_SECTION_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd});
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
$self
->{kwd},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1 -
length
$self
->{kwd},
};
redo
A;
}
}
elsif
(
$state
== COMMENT_START_STATE) {
if
(
$nc
== 0x002D) {
$self
->{state} = COMMENT_START_DASH_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ct}->{data} .=
"\x{FFFD}"
;
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data}
.=
chr
(
$nc
);
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== COMMENT_START_DASH_STATE) {
if
(
$nc
== 0x002D) {
$self
->{state} = COMMENT_END_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ct}->{data} .=
"-\x{FFFD}"
;
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data}
.=
'-'
.
chr
(
$nc
);
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== COMMENT_STATE) {
if
(
$nc
== 0x002D) {
$self
->{state} = COMMENT_END_DASH_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ct}->{data} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
chr
(
$nc
);
$self
->{read_until}->(
$self
->{ct}->{data},
qq[-\x00]
,
length
$self
->{ct}->{data});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== COMMENT_END_DASH_STATE) {
if
(
$nc
== 0x002D) {
$self
->{state} = COMMENT_END_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
$self
->{ct}->{data} .=
"-\x{FFFD}"
;
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
'-'
.
chr
(
$nc
);
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== COMMENT_END_STATE or
$state
== COMMENT_END_BANG_STATE) {
if
(
$nc
== 0x003E) {
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x002D) {
if
(
$state
== COMMENT_END_BANG_STATE) {
$self
->{ct}->{data} .=
'--!'
;
$self
->{state} = COMMENT_END_DASH_STATE;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'dash in comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev});
$self
->{ct}->{data} .=
'-'
;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$state
!= COMMENT_END_BANG_STATE and
$nc
== 0x0021) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'comment end bang'
);
$self
->{state} = COMMENT_END_BANG_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed comment'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
if
(
$state
== COMMENT_END_BANG_STATE) {
$self
->{ct}->{data} .=
"--!\x{FFFD}"
;
}
else
{
$self
->{ct}->{data} .=
"--\x{FFFD}"
;
}
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$state
== COMMENT_END_BANG_STATE) {
$self
->{ct}->{data} .=
'--!'
.
chr
(
$nc
);
}
else
{
$self
->{ct}->{data} .=
'--'
.
chr
(
$nc
);
}
$self
->{state} = COMMENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_DOCTYPE_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{ct}->{quirks} = 1;
$self
->{state} = DATA_STATE;
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before DOCTYPE name'
);
$self
->{state} = BEFORE_DOCTYPE_NAME_STATE;
redo
A;
}
}
elsif
(
$state
== BEFORE_DOCTYPE_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no DOCTYPE name'
);
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(0x0041 <=
$nc
and
$nc
<= 0x005A) {
$self
->{ct}->{name}
=
chr
(
$nc
+ (
$self
->{is_xml} ? 0 : 0x0020));
delete
$self
->{ct}->{quirks};
$self
->{state} = DOCTYPE_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no DOCTYPE name'
);
$self
->{state} = DATA_STATE;
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$nc
== 0x005B) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no DOCTYPE name'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{name} =
"\x{FFFD}"
;
delete
$self
->{ct}->{quirks};
$self
->{state} = DOCTYPE_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{name} =
chr
$nc
;
delete
$self
->{ct}->{quirks};
$self
->{state} = DOCTYPE_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = AFTER_DOCTYPE_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(0x0041 <=
$nc
and
$nc
<= 0x005A) {
$self
->{ct}->{name}
.=
chr
(
$nc
+ (
$self
->{is_xml} ? 0 : 0x0020));
delete
$self
->{ct}->{quirks};
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$nc
== 0x005B) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{name} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{name} .=
chr
(
$nc
);
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_DOCTYPE_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no md def'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0050 or
$nc
== 0x0070) {
$self
->{state} = PUBLIC_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0053 or
$nc
== 0x0073) {
$self
->{state} = SYSTEM_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022 and
(
$self
->{ct}->{type} == GENERAL_ENTITY_TOKEN or
$self
->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
$self
->{state} = DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE;
$self
->{ct}->{value} =
''
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027 and
(
$self
->{ct}->{type} == GENERAL_ENTITY_TOKEN or
$self
->{ct}->{type} == PARAMETER_ENTITY_TOKEN)) {
$self
->{state} = DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE;
$self
->{ct}->{value} =
''
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{is_xml} and
$self
->{ct}->{type} == DOCTYPE_TOKEN and
$nc
== 0x005B) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after DOCTYPE name'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{ct}->{quirks} = 1;
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== PUBLIC_STATE) {
if
(
$nc
== [
undef
,
0x0055,
0x0042,
0x004C,
0x0049,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
0x0075,
0x0062,
0x006C,
0x0069,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 5 and
(
$nc
== 0x0043 or
$nc
== 0x0063)) {
if
(
$self
->{is_xml} and
(
$self
->{kwd} ne
'PUBLI'
or
$nc
== 0x0063)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'PUBLIC'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 4);
}
else
{
}
$self
->{state} = AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after DOCTYPE name'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} + 1 -
length
$self
->{kwd});
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{ct}->{quirks} = 1;
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
redo
A;
}
}
elsif
(
$state
== SYSTEM_STATE) {
if
(
$nc
== [
undef
,
0x0059,
0x0053,
0x0054,
0x0045,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
0x0079,
0x0073,
0x0074,
0x0065,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 5 and
(
$nc
== 0x004D or
$nc
== 0x006D)) {
if
(
$self
->{is_xml} and
(
$self
->{kwd} ne
'SYSTE'
or
$nc
== 0x006D)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'SYSTEM'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 4);
}
else
{
}
$self
->{state} = AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after DOCTYPE name'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} + 1 -
length
$self
->{kwd});
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{ct}->{quirks} = 1;
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
redo
A;
}
}
elsif
(
$state
== AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE or
$state
== BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
if
(
$state
== AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before pubid literal'
);
}
else
{
}
$self
->{ct}->{pubid} =
''
;
$self
->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
if
(
$state
== AFTER_DOCTYPE_PUBLIC_KEYWORD_STATE) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before pubid literal'
);
}
else
{
}
$self
->{ct}->{pubid} =
''
;
$self
->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no PUBLIC literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$self
->{ct}->{type} == DOCTYPE_TOKEN and
$nc
== 0x005B) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no PUBLIC literal'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after PUBLIC'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{ct}->{quirks} = 1;
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
if
(
$nc
== 0x0022) {
$self
->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed PUBLIC literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed PUBLIC literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{pubid} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{pubid} .=
chr
$nc
;
$self
->{read_until}->(
$self
->{ct}->{pubid},
qq[\x00">]
,
length
$self
->{ct}->{pubid});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
if
(
$nc
== 0x0027) {
$self
->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed PUBLIC literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed PUBLIC literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{pubid} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{pubid} .=
chr
$nc
;
$self
->{read_until}->(
$self
->{ct}->{pubid},
qq[\x00'>]
,
length
$self
->{ct}->{pubid});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE or
$state
== BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDS_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDS_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
if
(
$state
== AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before system literal'
);
}
else
{
}
$self
->{ct}->{sysid} =
''
;
$self
->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
if
(
$state
== AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before system literal'
);
}
else
{
}
$self
->{ct}->{sysid} =
''
;
$self
->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
if
(
$self
->{is_xml}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no SYSTEM literal'
);
}
else
{
}
$self
->{state} = DATA_STATE;
}
else
{
if
(
$self
->{ct}->{type} == NOTATION_TOKEN) {
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no SYSTEM literal'
);
}
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$self
->{ct}->{type} == DOCTYPE_TOKEN and
$nc
== 0x005B) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no SYSTEM literal'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after PUBLIC literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{ct}->{quirks} = 1;
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE or
$state
== BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
if
(
$state
== AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before system literal'
);
}
else
{
}
$self
->{ct}->{sysid} =
''
;
$self
->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
if
(
$state
== AFTER_DOCTYPE_SYSTEM_KEYWORD_STATE) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before system literal'
);
}
else
{
}
$self
->{ct}->{sysid} =
''
;
$self
->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no SYSTEM literal'
);
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$self
->{ct}->{type} == DOCTYPE_TOKEN and
$nc
== 0x005B) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no SYSTEM literal'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after SYSTEM'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{ct}->{quirks} = 1;
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
if
(
$nc
== 0x0022) {
$self
->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(not
$self
->{is_xml} and
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed SYSTEM literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed SYSTEM literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{sysid} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{sysid} .=
chr
$nc
;
$self
->{read_until}->(
$self
->{ct}->{sysid},
qq[\x00">]
,
length
$self
->{ct}->{sysid});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
if
(
$nc
== 0x0027) {
$self
->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(not
$self
->{is_xml} and
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed SYSTEM literal'
);
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
$self
->{ct}->{quirks} = 1;
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed SYSTEM literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0000) {
$self
->{ct}->{sysid} .=
"\x{FFFD}"
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{sysid} .=
chr
$nc
;
$self
->{read_until}->(
$self
->{ct}->{sysid},
qq[\x00'>]
,
length
$self
->{ct}->{sysid});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{ct}->{type} == GENERAL_ENTITY_TOKEN) {
$self
->{state} = BEFORE_NDATA_STATE;
}
else
{
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = DATA_STATE;
}
else
{
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{ct}->{type} == GENERAL_ENTITY_TOKEN and
(
$nc
== 0x004E or
$nc
== 0x006E)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before NDATA'
);
$self
->{state} = NDATA_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
$self
->{ct}->{quirks} = 1;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$self
->{ct}->{type} == DOCTYPE_TOKEN and
$nc
== 0x005B) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after SYSTEM literal'
);
if
(
$self
->{ct}->{type} == DOCTYPE_TOKEN) {
$self
->{state} = BOGUS_DOCTYPE_STATE;
}
else
{
$self
->{state} = BOGUS_MD_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== BEFORE_NDATA_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x004E or
$nc
== 0x006E) {
$self
->{state} = NDATA_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
return
(
$self
->{ct});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after SYSTEM literal'
);
$self
->{state} = BOGUS_MD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== BOGUS_DOCTYPE_STATE) {
if
(
$nc
== 0x003E) {
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$self
->{is_xml} and
$nc
== 0x005B) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
$self
->{ct}->{has_internal_subset} = 1;
$self
->{in_subset} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== -1) {
$self
->{state} = DATA_STATE;
return
(
$self
->{ct});
redo
A;
}
else
{
my
$s
=
''
;
$self
->{read_until}->(
$s
,
q{>[}
, 0);
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== CDATA_SECTION_STATE) {
if
(
$nc
== 0x005D) {
$self
->{state} = CDATA_SECTION_MSE1_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== -1) {
if
(
$self
->{is_xml}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no mse'
);
}
else
{
}
$self
->{state} = DATA_STATE;
if
(
length
$self
->{ct}->{data}) {
return
(
$self
->{ct});
}
else
{
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
chr
$nc
;
$self
->{read_until}->(
$self
->{ct}->{data},
qq<\x00]>
,
length
$self
->{ct}->{data});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== CDATA_SECTION_MSE1_STATE) {
if
(
$nc
== 0x005D) {
$self
->{state} = CDATA_SECTION_MSE2_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
']'
;
$self
->{state} = CDATA_SECTION_STATE;
redo
A;
}
}
elsif
(
$state
== CDATA_SECTION_MSE2_STATE) {
if
(
$nc
== 0x003E) {
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
if
(
length
$self
->{ct}->{data}) {
return
(
$self
->{ct});
}
else
{
}
redo
A;
}
elsif
(
$nc
== 0x005D) {
$self
->{ct}->{data} .=
']'
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
']]'
;
$self
->{state} = CDATA_SECTION_STATE;
redo
A;
}
}
elsif
(
$state
== ENTITY_STATE) {
if
(
$is_space
->{
$nc
} or
{
0x003C
=> 1,
0x0026
=> 1,
-1
=> 1,
0x0022
=> 1,
0x0027
=> 1,
0x0060
=> 1,
0x003D
=> 1,
$self
->{entity_add} => 1,
}->{
$nc
}) {
if
(
$self
->{is_xml}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare ero'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev}
+ (
$nc
== -1 ? 1 : 0));
}
else
{
}
}
elsif
(
$nc
== 0x0023) {
$self
->{state} = ENTITY_HASH_STATE;
$self
->{kwd} =
'#'
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{is_xml} or
(0x0041 <=
$nc
and
$nc
<= 0x005A) or
(0x0061 <=
$nc
and
$nc
<= 0x007A)) {
$self
->{state} = ENTITY_NAME_STATE;
$self
->{kwd} =
chr
$nc
;
$self
->{entity__value} =
$self
->{kwd};
$self
->{entity__match} = 0;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
}
if
(
$self
->{prev_state} == DATA_STATE or
$self
->{prev_state} == RCDATA_STATE) {
$self
->{state} =
$self
->{prev_state};
return
({
type
=> CHARACTER_TOKEN,
data
=>
'&'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev},
});
redo
A;
}
else
{
$self
->{ca}->{value} .=
'&'
;
$self
->{state} =
$self
->{prev_state};
redo
A;
}
}
elsif
(
$state
== ENTITY_HASH_STATE) {
if
(
$nc
== 0x0078) {
$self
->{state} = HEXREF_X_STATE;
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0058) {
if
(
$self
->{is_xml}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'uppercase hcro'
);
}
$self
->{state} = HEXREF_X_STATE;
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(0x0030 <=
$nc
and
$nc
<= 0x0039) {
$self
->{state} = NCR_NUM_STATE;
$self
->{kwd} =
$nc
- 0x0030;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare nero'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1);
if
(
$self
->{prev_state} == DATA_STATE or
$self
->{prev_state} == RCDATA_STATE) {
$self
->{state} =
$self
->{prev_state};
return
({
type
=> CHARACTER_TOKEN,
data
=>
'&#'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1,
});
redo
A;
}
else
{
$self
->{ca}->{value} .=
'&#'
;
$self
->{state} =
$self
->{prev_state};
redo
A;
}
}
}
elsif
(
$state
== NCR_NUM_STATE) {
if
(0x0030 <=
$nc
and
$nc
<= 0x0039) {
$self
->{kwd} *= 10;
$self
->{kwd} +=
$nc
- 0x0030;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003B) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no refc'
);
}
my
$code
=
$self
->{kwd};
my
$l
=
$self
->{line_prev};
my
$c
=
$self
->{column_prev};
if
((not
$self
->{is_xml} and
$charref_map
->{
$code
}) or
(
$self
->{is_xml} and 0xD800 <=
$code
and
$code
<= 0xDFFF) or
(
$self
->{is_xml} and
$code
== 0x0000)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'invalid character reference'
,
text
=> (
sprintf
'U+%04X'
,
$code
),
line
=>
$l
,
column
=>
$c
);
$code
=
$charref_map
->{
$code
};
}
elsif
(
$code
> 0x10FFFF) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'invalid character reference'
,
text
=> (
sprintf
'U-%08X'
,
$code
),
line
=>
$l
,
column
=>
$c
);
$code
= 0xFFFD;
}
if
(
$self
->{prev_state} == DATA_STATE or
$self
->{prev_state} == RCDATA_STATE) {
$self
->{state} =
$self
->{prev_state};
return
({
type
=> CHARACTER_TOKEN,
data
=>
chr
$code
,
has_reference
=> 1,
line
=>
$l
,
column
=>
$c
,
});
redo
A;
}
else
{
$self
->{ca}->{value} .=
chr
$code
;
$self
->{ca}->{has_reference} = 1;
$self
->{state} =
$self
->{prev_state};
redo
A;
}
}
elsif
(
$state
== HEXREF_X_STATE) {
if
((0x0030 <=
$nc
and
$nc
<= 0x0039) or
(0x0041 <=
$nc
and
$nc
<= 0x0046) or
(0x0061 <=
$nc
and
$nc
<= 0x0066)) {
$self
->{state} = HEXREF_HEX_STATE;
$self
->{kwd} = 0;
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare hcro'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 2);
if
(
$self
->{prev_state} == DATA_STATE or
$self
->{prev_state} == RCDATA_STATE) {
$self
->{state} =
$self
->{prev_state};
return
({
type
=> CHARACTER_TOKEN,
data
=>
'&'
.
$self
->{kwd},
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} -
length
$self
->{kwd},
});
redo
A;
}
else
{
$self
->{ca}->{value} .=
'&'
.
$self
->{kwd};
$self
->{state} =
$self
->{prev_state};
redo
A;
}
}
}
elsif
(
$state
== HEXREF_HEX_STATE) {
if
(0x0030 <=
$nc
and
$nc
<= 0x0039) {
$self
->{kwd} *= 0x10;
$self
->{kwd} +=
$nc
- 0x0030;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(0x0061 <=
$nc
and
$nc
<= 0x0066) {
$self
->{kwd} *= 0x10;
$self
->{kwd} +=
$nc
- 0x0060 + 9;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(0x0041 <=
$nc
and
$nc
<= 0x0046) {
$self
->{kwd} *= 0x10;
$self
->{kwd} +=
$nc
- 0x0040 + 9;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003B) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no refc'
,
line
=>
$self
->{line},
column
=>
$self
->{column});
}
my
$code
=
$self
->{kwd};
my
$l
=
$self
->{line_prev};
my
$c
=
$self
->{column_prev};
if
((not
$self
->{is_xml} and
$charref_map
->{
$code
}) or
(
$self
->{is_xml} and 0xD800 <=
$code
and
$code
<= 0xDFFF) or
(
$self
->{is_xml} and
$code
== 0x0000)) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'invalid character reference'
,
text
=> (
sprintf
'U+%04X'
,
$code
),
line
=>
$l
,
column
=>
$c
);
$code
=
$charref_map
->{
$code
};
}
elsif
(
$code
> 0x10FFFF) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'invalid character reference'
,
text
=> (
sprintf
'U-%08X'
,
$code
),
line
=>
$l
,
column
=>
$c
);
$code
= 0xFFFD;
}
if
(
$self
->{prev_state} == DATA_STATE or
$self
->{prev_state} == RCDATA_STATE) {
$self
->{state} =
$self
->{prev_state};
return
({
type
=> CHARACTER_TOKEN,
data
=>
chr
$code
,
has_reference
=> 1,
line
=>
$l
,
column
=>
$c
,
});
redo
A;
}
else
{
$self
->{ca}->{value} .=
chr
$code
;
$self
->{ca}->{has_reference} = 1;
$self
->{state} =
$self
->{prev_state};
redo
A;
}
}
elsif
(
$state
== ENTITY_NAME_STATE) {
if
((0x0041 <=
$nc
and
$nc
<= 0x005A) or
(0x0061 <=
$nc
and
$nc
<= 0x007A) or
(0x0030 <=
$nc
and
$nc
<= 0x0039) or
$nc
== 0x003B or
(
$self
->{is_xml} and
not (
$is_space
->{
$nc
} or
{
0x003C
=> 1,
0x0026
=> 1,
-1
=> 1,
0x0022
=> 1,
0x0027
=> 1,
0x0060
=> 1,
0x003D
=> 1,
$self
->{entity_add} => 1,
}->{
$nc
}))) {
$self
->{kwd} .=
chr
$nc
;
if
(
defined
$entity2char
{
$self
->{kwd}} or
$self
->{ge}->{
$self
->{kwd}}) {
if
(
$nc
== 0x003B) {
if
(
defined
$self
->{ge}->{
$self
->{kwd}}) {
if
(
$self
->{ge}->{
$self
->{kwd}}->{only_text}) {
$self
->{entity__value} =
$self
->{ge}->{
$self
->{kwd}}->{value};
}
else
{
if
(
defined
$self
->{ge}->{
$self
->{kwd}}->{notation}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unparsed entity'
,
value
=>
$self
->{kwd});
}
else
{
}
$self
->{entity__value} =
'&'
.
$self
->{kwd};
}
}
else
{
if
(
$self
->{is_xml}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'entity not declared'
,
value
=>
$self
->{kwd},
level
=> {
'amp;'
=>
$self
->{level}->{
warn
},
'quot;'
=>
$self
->{level}->{
warn
},
'lt;'
=>
$self
->{level}->{
warn
},
'gt;'
=>
$self
->{level}->{
warn
},
'apos;'
=>
$self
->{level}->{
warn
},
}->{
$self
->{kwd}} ||
$self
->{level}->{must},
line
=>
$self
->{line_prev},
column
=>
$self
->{column} -
length
$self
->{kwd});
}
else
{
}
$self
->{entity__value} =
$entity2char
{
$self
->{kwd}};
}
$self
->{entity__match} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
else
{
$self
->{entity__value} =
$entity2char
{
$self
->{kwd}};
$self
->{entity__match} = -1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
else
{
if
(
$nc
== 0x003B) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'entity not declared'
,
value
=>
$self
->{kwd},
level
=>
$self
->{level}->{must},
line
=>
$self
->{line_prev},
column
=>
$self
->{column} -
length
$self
->{kwd});
$self
->{entity__value} .=
chr
$nc
;
$self
->{entity__match} *= 2;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
}
else
{
$self
->{entity__value} .=
chr
$nc
;
$self
->{entity__match} *= 2;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
}
elsif
(
$nc
== 0x003D) {
if
(
$self
->{entity__match} < 0 and
$self
->{prev_state} != DATA_STATE and
$self
->{prev_state} != RCDATA_STATE) {
$self
->{entity__match} = 0;
}
}
my
$data
;
my
$has_ref
;
if
(
$self
->{entity__match} > 0) {
$data
=
$self
->{entity__value};
$has_ref
= 1;
}
elsif
(
$self
->{entity__match} < 0) {
if
(
$self
->{prev_state} != DATA_STATE and
$self
->{prev_state} != RCDATA_STATE and
$self
->{entity__match} < -1) {
$data
=
'&'
.
$self
->{kwd};
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no refc'
);
$data
=
$self
->{entity__value};
$has_ref
= 1;
}
}
else
{
if
(
$self
->{is_xml} and not
$self
->{kwd} =~ /;$/) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare ero'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} -
length
$self
->{kwd});
}
else
{
}
$data
=
'&'
.
$self
->{kwd};
}
if
(
$self
->{prev_state} == DATA_STATE or
$self
->{prev_state} == RCDATA_STATE) {
$self
->{state} =
$self
->{prev_state};
return
({
type
=> CHARACTER_TOKEN,
data
=>
$data
,
has_reference
=>
$has_ref
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} + 1 -
length
$self
->{kwd},
});
redo
A;
}
else
{
$self
->{ca}->{value} .=
$data
;
$self
->{ca}->{has_reference} = 1
if
$has_ref
;
$self
->{state} =
$self
->{prev_state};
redo
A;
}
}
elsif
(
$state
== PI_STATE) {
if
(
$is_space
->{
$nc
} or
$nc
== 0x003F or
$nc
== -1) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare pio'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev}
- 1 * (
$nc
!= -1));
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
'?'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev}
- 1 * (
$nc
!= -1),
};
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct} = {
type
=> PI_TOKEN,
target
=>
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
,
data
=>
''
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1,
};
$self
->{state} = PI_TARGET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== PI_TARGET_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = PI_TARGET_AFTER_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no pic'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
({
type
=> COMMENT_TOKEN,
data
=>
'?'
.
$self
->{ct}->{target},
line
=>
$self
->{ct}->{line},
column
=>
$self
->{ct}->{column}});
redo
A;
}
elsif
(
$nc
== 0x003F) {
$self
->{state} = PI_AFTER_STATE;
$self
->{kwd} =
''
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{target} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== PI_TARGET_AFTER_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{state} = PI_DATA_STATE;
redo
A;
}
}
elsif
(
$state
== PI_DATA_STATE) {
if
(
$nc
== 0x003F) {
$self
->{state} = PI_DATA_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no pic'
);
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
return
({
type
=> COMMENT_TOKEN,
data
=>
'?'
.
$self
->{ct}->{target} .
$self
->{kwd} .
$self
->{ct}->{data},
line
=>
$self
->{ct}->{line},
column
=>
$self
->{ct}->{column}});
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{data} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
$self
->{read_until}->(
$self
->{ct}->{data},
qq[\x00?]
,
length
$self
->{ct}->{data});
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== PI_AFTER_STATE) {
if
(
$nc
== 0x003E) {
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x003F) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no s after target'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev});
$self
->{ct}->{data} .=
'?'
;
$self
->{state} = PI_DATA_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no s after target'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev}
+ 1 * (
$nc
== -1));
$self
->{ct}->{data} .=
'?'
;
$self
->{state} = PI_DATA_STATE;
redo
A;
}
}
elsif
(
$state
== PI_DATA_AFTER_STATE) {
if
(
$nc
== 0x003E) {
if
(
$self
->{in_subset}) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
}
else
{
$self
->{state} = DATA_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x003F) {
$self
->{ct}->{data} .=
'?'
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ct}->{data} .=
'?'
;
$self
->{state} = PI_DATA_STATE;
redo
A;
}
}
elsif
(
$state
== DOCTYPE_INTERNAL_SUBSET_STATE) {
if
(
$nc
== 0x003C) {
$self
->{state} = DOCTYPE_TAG_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0025) {
if
(not
$self
->{stop_processing} and
not
$self
->{document}->xml_standalone) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'stop processing'
,
level
=>
$self
->{level}->{info});
$self
->{stop_processing} = 1;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x005D) {
delete
$self
->{in_subset};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed internal subset'
);
delete
$self
->{in_subset};
$self
->{state} = DATA_STATE;
return
({
type
=> END_OF_DOCTYPE_TOKEN});
redo
A;
}
else
{
unless
(
$self
->{internal_subset_tainted}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string in internal subset'
);
$self
->{internal_subset_tainted} = 1;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
if
(
$nc
== 0x003E) {
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
({
type
=> END_OF_DOCTYPE_TOKEN});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed DOCTYPE'
);
$self
->{state} = DATA_STATE;
return
({
type
=> END_OF_DOCTYPE_TOKEN});
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after internal subset'
);
$self
->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
if
(
$nc
== 0x003E) {
$self
->{state} = DATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
({
type
=> END_OF_DOCTYPE_TOKEN});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{state} = DATA_STATE;
return
({
type
=> END_OF_DOCTYPE_TOKEN});
redo
A;
}
else
{
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_TAG_STATE) {
if
(
$nc
== 0x0021) {
$self
->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003F) {
$self
->{state} = PI_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare stago'
);
$self
->{state} = DATA_STATE;
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare stago'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev});
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
,
};
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
if
(
$nc
== 0x002D) {
$self
->{state} = MD_HYPHEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0045 or
$nc
== 0x0065) {
$self
->{state} = MD_E_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0041 or
$nc
== 0x0061) {
$self
->{state} = MD_ATTLIST_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x004E or
$nc
== 0x006E) {
$self
->{state} = MD_NOTATION_STATE;
$self
->{kwd} =
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
}
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1);
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
elsif
(
$state
== MD_E_STATE) {
if
(
$nc
== 0x004E or
$nc
== 0x006E) {
$self
->{state} = MD_ENTITY_STATE;
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x004C or
$nc
== 0x006C) {
$self
->{state} = MD_ELEMENT_STATE;
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 2
+ 1 * (
$nc
== EOF_CHAR));
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
}
elsif
(
$state
== MD_ENTITY_STATE) {
if
(
$nc
== [
undef
,
undef
,
0x0054,
0x0049,
0x0054,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
undef
,
0x0074,
0x0069,
0x0074,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 5 and
(
$nc
== 0x0059 or
$nc
== 0x0079)) {
if
(
$self
->{kwd} ne
'ENTIT'
or
$nc
== 0x0079) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'ENTITY'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 4);
}
$self
->{ct} = {
type
=> GENERAL_ENTITY_TOKEN,
name
=>
''
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 6};
$self
->{state} = DOCTYPE_MD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1
- (
length
$self
->{kwd})
+ 1 * (
$nc
== EOF_CHAR));
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
}
elsif
(
$state
== MD_ELEMENT_STATE) {
if
(
$nc
== [
undef
,
undef
,
0x0045,
0x004D,
0x0045,
0x004E,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
undef
,
0x0065,
0x006D,
0x0065,
0x006E,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 6 and
(
$nc
== 0x0054 or
$nc
== 0x0074)) {
if
(
$self
->{kwd} ne
'ELEMEN'
or
$nc
== 0x0074) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'ELEMENT'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 5);
}
$self
->{ct} = {
type
=> ELEMENT_TOKEN,
name
=>
''
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 7};
$self
->{state} = DOCTYPE_MD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1
- (
length
$self
->{kwd})
+ 1 * (
$nc
== EOF_CHAR));
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
}
elsif
(
$state
== MD_ATTLIST_STATE) {
if
(
$nc
== [
undef
,
0x0054,
0x0054,
0x004C,
0x0049,
0x0053,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
0x0074,
0x0074,
0x006C,
0x0069,
0x0073,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 6 and
(
$nc
== 0x0054 or
$nc
== 0x0074)) {
if
(
$self
->{kwd} ne
'ATTLIS'
or
$nc
== 0x0074) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'ATTLIST'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 5);
}
$self
->{ct} = {
type
=> ATTLIST_TOKEN,
name
=>
''
,
attrdefs
=> [],
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 7};
$self
->{state} = DOCTYPE_MD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1
- (
length
$self
->{kwd})
+ 1 * (
$nc
== EOF_CHAR));
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
}
elsif
(
$state
== MD_NOTATION_STATE) {
if
(
$nc
== [
undef
,
0x004F,
0x0054,
0x0041,
0x0054,
0x0049,
0x004F,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
0x006F,
0x0074,
0x0061,
0x0074,
0x0069,
0x006F,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 7 and
(
$nc
== 0x004E or
$nc
== 0x006E)) {
if
(
$self
->{kwd} ne
'NOTATIO'
or
$nc
== 0x006E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'NOTATION'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 6);
}
$self
->{ct} = {
type
=> NOTATION_TOKEN,
name
=>
''
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 8};
$self
->{state} = DOCTYPE_MD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bogus comment'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 1
- (
length
$self
->{kwd})
+ 1 * (
$nc
== EOF_CHAR));
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
}
elsif
(
$state
== DOCTYPE_MD_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_MD_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{ct}->{type} == GENERAL_ENTITY_TOKEN and
$nc
== 0x0025) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before md name'
);
$self
->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no md name'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before md name'
);
$self
->{state} = BEFORE_MD_NAME_STATE;
redo
A;
}
}
elsif
(
$state
== BEFORE_MD_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$self
->{ct}->{type} == GENERAL_ENTITY_TOKEN and
$nc
== 0x0025) {
$self
->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no md name'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{name} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
$self
->{state} = MD_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
$self
->{state} = BEFORE_MD_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no md name'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space after ENTITY percent'
);
$self
->{state} = BOGUS_COMMENT_STATE;
$self
->{ct} = {
type
=> COMMENT_TOKEN,
data
=>
''
};
redo
A;
}
}
elsif
(
$state
== MD_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
$self
->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
}
elsif
(
$self
->{ct}->{type} == ELEMENT_TOKEN) {
$self
->{state} = AFTER_ELEMENT_NAME_STATE;
}
else
{
$self
->{state} = AFTER_DOCTYPE_NAME_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
if
(
$self
->{ct}->{type} == ATTLIST_TOKEN) {
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no md def'
);
}
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{name} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ca} = {
name
=>
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
,
tokens
=> [],
line
=>
$self
->{line},
column
=>
$self
->{column}};
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr type'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0028) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before paren'
);
$self
->{state} = BEFORE_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ca}->{name} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr type'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0028) {
$self
->{state} = BEFORE_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ca}->{type} =
chr
$nc
;
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0023) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr default'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== 0x0028) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before paren'
);
$self
->{state} = BEFORE_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ca}->{type} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0028) {
$self
->{state} = BEFORE_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0023) {
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr default'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unquoted attr value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
redo
A;
}
}
elsif
(
$state
== BEFORE_ALLOWED_TOKEN_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x007C) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'empty allowed token'
);
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'empty allowed token'
);
$self
->{state} = AFTER_ALLOWED_TOKENS_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed allowed tokens'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
push
@{
$self
->{ca}->{tokens}},
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
$self
->{state} = ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== ALLOWED_TOKEN_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = AFTER_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x007C) {
$self
->{state} = BEFORE_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
$self
->{state} = AFTER_ALLOWED_TOKENS_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed allowed tokens'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ca}->{tokens}->[-1] .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_ALLOWED_TOKEN_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x007C) {
$self
->{state} = BEFORE_ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
$self
->{state} = AFTER_ALLOWED_TOKENS_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed allowed tokens'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'space in allowed token'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev});
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ca}->{tokens}->[-1] .=
' '
. (
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
);
$self
->{state} = ALLOWED_TOKEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_ALLOWED_TOKENS_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_ATTR_DEFAULT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0023) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr default'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unquoted attr value'
);
$self
->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
redo
A;
}
}
elsif
(
$state
== BEFORE_ATTR_DEFAULT_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0023) {
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr default'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unquoted attr value'
);
$self
->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no default type'
);
$self
->{state} = BOGUS_MD_STATE;
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no attr default'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ca}->{
default
} =
chr
$nc
;
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before default value'
);
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{ca}->{
default
} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0022) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0027) {
$self
->{ca}->{value} =
''
;
$self
->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$self
->{ca}->{
default
} eq
'FIXED'
) {
$self
->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
}
else
{
push
@{
$self
->{ct}->{attrdefs}},
$self
->{ca};
$self
->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
}
redo
A;
}
}
elsif
(
$state
== AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
if
(
$is_space
->{
$nc
} or
$nc
== EOF_CHAR or
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no space before attr name'
);
$self
->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
redo
A;
}
}
elsif
(
$state
== NDATA_STATE) {
if
(
$nc
== [
undef
,
0x0044,
0x0041,
0x0054,
NEVER_CHAR,
]->[
length
$self
->{kwd}] or
$nc
== [
undef
,
0x0064,
0x0061,
0x0074,
NEVER_CHAR,
]->[
length
$self
->{kwd}]) {
$self
->{kwd} .=
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
((
length
$self
->{kwd}) == 4 and
(
$nc
== 0x0041 or
$nc
== 0x0061)) {
if
(
$self
->{kwd} ne
'NDAT'
or
$nc
== 0x0061) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'lowercase keyword'
,
text
=>
'NDATA'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} - 4);
}
else
{
}
$self
->{state} = AFTER_NDATA_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after literal'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} + 1
-
length
$self
->{kwd});
$self
->{state} = BOGUS_MD_STATE;
redo
A;
}
}
elsif
(
$state
== AFTER_NDATA_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_NOTATION_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no notation name'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after literal'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev} + 1
-
length
$self
->{kwd});
$self
->{state} = BOGUS_MD_STATE;
redo
A;
}
}
elsif
(
$state
== BEFORE_NOTATION_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no notation name'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{notation} =
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
$self
->{state} = NOTATION_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== NOTATION_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = AFTER_MD_DEF_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{notation} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ENTITY_VALUE_DOUBLE_QUOTED_STATE) {
if
(
$nc
== 0x0022) {
$self
->{state} = AFTER_MD_DEF_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0026) {
$self
->{prev_state} =
$state
;
$self
->{state} = ENTITY_VALUE_ENTITY_STATE;
$self
->{entity_add} = 0x0022;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed entity value'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{value} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== DOCTYPE_ENTITY_VALUE_SINGLE_QUOTED_STATE) {
if
(
$nc
== 0x0027) {
$self
->{state} = AFTER_MD_DEF_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0026) {
$self
->{prev_state} =
$state
;
$self
->{state} = ENTITY_VALUE_ENTITY_STATE;
$self
->{entity_add} = 0x0027;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed entity value'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{value} .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== ENTITY_VALUE_ENTITY_STATE) {
if
(
$is_space
->{
$nc
} or
{
0x003C
=> 1,
0x0026
=> 1, (EOF_CHAR) => 1,
$self
->{entity_add} => 1,
}->{
$nc
}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'bare ero'
,
line
=>
$self
->{line_prev},
column
=>
$self
->{column_prev}
+ (
$nc
== EOF_CHAR ? 1 : 0));
}
elsif
(
$nc
== 0x0023) {
$self
->{ca} =
$self
->{ct};
$self
->{state} = ENTITY_HASH_STATE;
$self
->{kwd} =
'#'
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
}
$self
->{ct}->{value} .=
'&'
;
$self
->{state} =
$self
->{prev_state};
redo
A;
}
elsif
(
$state
== AFTER_ELEMENT_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = BEFORE_ELEMENT_CONTENT_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0028) {
$self
->{state} = AFTER_CM_GROUP_OPEN_STATE;
$self
->{ct}->{content} = [
'('
];
$self
->{group_depth} = 1;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'no md def'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{content} = [
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
];
$self
->{state} = CONTENT_KEYWORD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== CONTENT_KEYWORD_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = AFTER_MD_DEF_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{content}->[-1] .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_CM_GROUP_OPEN_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0028) {
$self
->{group_depth}++;
push
@{
$self
->{ct}->{content}},
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x007C or
$nc
== 0x002C) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'empty element name'
);
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'empty element name'
);
push
@{
$self
->{ct}->{content}},
chr
$nc
;
$self
->{group_depth}--;
$self
->{state} = AFTER_CM_GROUP_CLOSE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed cm group'
);
push
@{
$self
->{ct}->{content}}, (
')'
) x
$self
->{group_depth};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
push
@{
$self
->{ct}->{content}},
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
$self
->{state} = CM_ELEMENT_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== CM_ELEMENT_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
$self
->{state} = AFTER_CM_ELEMENT_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x002A or
$nc
== 0x002B or
$nc
== 0x003F) {
push
@{
$self
->{ct}->{content}},
chr
$nc
;
$self
->{state} = AFTER_CM_ELEMENT_NAME_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x007C or
$nc
== 0x002C) {
push
@{
$self
->{ct}->{content}},
$nc
== 0x007C ?
' | '
:
', '
;
$self
->{state} = AFTER_CM_GROUP_OPEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
$self
->{group_depth}--;
push
@{
$self
->{ct}->{content}},
chr
$nc
;
$self
->{state} = AFTER_CM_GROUP_CLOSE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed cm group'
);
push
@{
$self
->{ct}->{content}}, (
')'
) x
$self
->{group_depth};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$nc
== 0x0000) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'NULL'
);
}
$self
->{ct}->{content}->[-1] .=
$nc
== 0x0000 ?
"\x{FFFD}"
:
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_CM_ELEMENT_NAME_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x007C or
$nc
== 0x002C) {
push
@{
$self
->{ct}->{content}},
$nc
== 0x007C ?
' | '
:
', '
;
$self
->{state} = AFTER_CM_GROUP_OPEN_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
$self
->{group_depth}--;
push
@{
$self
->{ct}->{content}},
chr
$nc
;
$self
->{state} = AFTER_CM_GROUP_CLOSE_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed cm group'
);
push
@{
$self
->{ct}->{content}}, (
')'
) x
$self
->{group_depth};
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'after element name'
);
push
@{
$self
->{ct}->{content}}, (
')'
) x
$self
->{group_depth};
$self
->{state} = BOGUS_MD_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
elsif
(
$state
== AFTER_CM_GROUP_CLOSE_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{group_depth}) {
$self
->{state} = AFTER_CM_ELEMENT_NAME_STATE;
}
else
{
$self
->{state} = AFTER_MD_DEF_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x002A or
$nc
== 0x002B or
$nc
== 0x003F) {
push
@{
$self
->{ct}->{content}},
chr
$nc
;
if
(
$self
->{group_depth}) {
$self
->{state} = AFTER_CM_ELEMENT_NAME_STATE;
}
else
{
$self
->{state} = AFTER_MD_DEF_STATE;
}
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x0029) {
if
(
$self
->{group_depth}) {
$self
->{group_depth}--;
push
@{
$self
->{ct}->{content}},
chr
$nc
;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after md def'
);
$self
->{state} = BOGUS_MD_STATE;
redo
A;
}
}
elsif
(
$nc
== 0x003E) {
if
(
$self
->{group_depth}) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed cm group'
);
push
@{
$self
->{ct}->{content}}, (
')'
) x
$self
->{group_depth};
}
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
if
(
$self
->{group_depth}) {
$self
->{state} = AFTER_CM_ELEMENT_NAME_STATE;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after md def'
);
$self
->{state} = BOGUS_MD_STATE;
}
redo
A;
}
}
elsif
(
$state
== AFTER_MD_DEF_STATE) {
if
(
$is_space
->{
$nc
}) {
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
elsif
(
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'unclosed md'
);
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
else
{
$self
->{parse_error}->(
level
=>
$self
->{level}->{must},
type
=>
'string after md def'
);
$self
->{state} = BOGUS_MD_STATE;
redo
A;
}
}
elsif
(
$state
== BOGUS_MD_STATE) {
if
(
$nc
== 0x003E) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
return
(
$self
->{ct});
redo
A;
}
elsif
(
$nc
== EOF_CHAR) {
$self
->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
redo
A;
}
else
{
if
(
$self
->{char_buffer_pos} <
length
$self
->{char_buffer}) {
$self
->{line_prev} =
$self
->{line};
$self
->{column_prev} =
$self
->{column};
$self
->{column}++;
$self
->{nc}
=
ord
substr
(
$self
->{char_buffer},
$self
->{char_buffer_pos}++, 1);
}
else
{
$self
->{set_nc}->(
$self
);
}
redo
A;
}
}
else
{
die
"$0: $state: Unknown state"
;
}
}
die
"$0: _get_next_token: unexpected case"
;
}
1;