$VERSION
=
'1.02'
;
if
($] < 5.006) {
}
else
{
}
@ISA
= (
'XML::SAX::Base'
);
my
%int_ents
= (
amp
=>
'&'
,
lt
=>
'<'
,
gt
=>
'>'
,
quot
=>
'"'
,
apos
=>
"'"
,
);
sub
_parse_characterstream {
my
$self
=
shift
;
my
(
$fh
) =
@_
;
confess(
"CharacterStream is not yet correctly implemented"
);
my
$reader
= XML::SAX::PurePerl::Reader::Stream->new(
$fh
);
return
$self
->_parse(
$reader
);
}
sub
_parse_bytestream {
my
$self
=
shift
;
my
(
$fh
) =
@_
;
my
$reader
= XML::SAX::PurePerl::Reader::Stream->new(
$fh
);
return
$self
->_parse(
$reader
);
}
sub
_parse_string {
my
$self
=
shift
;
my
(
$str
) =
@_
;
my
$reader
= XML::SAX::PurePerl::Reader::String->new(
$str
);
return
$self
->_parse(
$reader
);
}
sub
_parse_systemid {
my
$self
=
shift
;
my
(
$uri
) =
@_
;
my
$reader
= XML::SAX::PurePerl::Reader::URI->new(
$uri
);
return
$self
->_parse(
$reader
);
}
sub
_parse {
my
(
$self
,
$reader
) =
@_
;
$reader
->public_id(
$self
->{ParseOptions}{Source}{PublicId});
$reader
->system_id(
$self
->{ParseOptions}{Source}{SystemId});
$self
->{NSHelper} = XML::NamespaceSupport->new({
xmlns
=> 1});
$self
->set_document_locator(
XML::SAX::DocumentLocator->new(
sub
{
$reader
->public_id },
sub
{
$reader
->system_id },
sub
{
$reader
->line },
sub
{
$reader
->column },
sub
{
$reader
->get_encoding },
sub
{
$reader
->get_xml_version },
),
);
$self
->start_document({});
if
(
defined
$self
->{ParseOptions}{Source}{Encoding}) {
$reader
->set_encoding(
$self
->{ParseOptions}{Source}{Encoding});
}
else
{
$self
->encoding_detect(
$reader
);
}
$self
->document(
$reader
);
return
$self
->end_document({});
}
sub
parser_error {
my
$self
=
shift
;
my
(
$error
,
$reader
) =
@_
;
my
$exception
= XML::SAX::Exception::Parse->new(
Message
=>
$error
,
ColumnNumber
=>
$reader
->column,
LineNumber
=>
$reader
->line,
PublicId
=>
$reader
->public_id,
SystemId
=>
$reader
->system_id,
);
$self
->fatal_error(
$exception
);
$exception
->throw;
}
sub
document {
my
(
$self
,
$reader
) =
@_
;
$self
->prolog(
$reader
);
$self
->element(
$reader
) ||
$self
->parser_error(
"Document requires an element"
,
$reader
);
while
(
length
(
$reader
->data)) {
$self
->Misc(
$reader
) ||
$self
->parser_error(
"Only Comments, PIs and whitespace allowed at end of document"
,
$reader
);
}
}
sub
prolog {
my
(
$self
,
$reader
) =
@_
;
$self
->XMLDecl(
$reader
);
1
while
(
$self
->Misc(
$reader
));
if
(
$self
->doctypedecl(
$reader
)) {
while
(
length
(
$reader
->data)) {
$self
->Misc(
$reader
) ||
last
;
}
}
}
sub
element {
my
(
$self
,
$reader
) =
@_
;
return
0
unless
$reader
->match(
'<'
);
my
$name
=
$self
->Name(
$reader
) ||
$self
->parser_error(
"Invalid element name"
,
$reader
);
my
%attribs
;
while
(
my
(
$k
,
$v
) =
$self
->Attribute(
$reader
) ) {
$attribs
{
$k
} =
$v
;
}
my
$have_namespaces
=
$self
->get_feature(Namespaces);
$self
->{NSHelper}->push_context;
my
@new_ns
;
if
(
$have_namespaces
) {
while
(
my
(
$k
,
$v
) =
each
%attribs
) {
if
(
$k
=~ m/^xmlns(:(.*))?$/) {
my
$prefix
= $2 ||
''
;
$self
->{NSHelper}->declare_prefix(
$prefix
,
$v
);
my
$ns
=
{
Prefix
=>
$prefix
,
NamespaceURI
=>
$v
,
};
push
@new_ns
,
$ns
;
$self
->SUPER::start_prefix_mapping(
$ns
);
}
}
}
my
%attrib_hash
;
while
(
my
(
$name
,
$value
) =
each
%attribs
) {
my
(
$ns
,
$prefix
,
$lname
);
if
(
$have_namespaces
) {
(
$ns
,
$prefix
,
$lname
) =
$self
->{NSHelper}->process_attribute_name(
$name
);
}
$ns
||=
''
;
$prefix
||=
''
;
$lname
||=
''
;
$attrib_hash
{
"{$ns}$lname"
} = {
Name
=>
$name
,
LocalName
=>
$lname
,
Prefix
=>
$prefix
,
NamespaceURI
=>
$ns
,
Value
=>
$value
,
};
}
%attribs
= ();
my
(
$ns
,
$prefix
,
$lname
);
if
(
$self
->get_feature(Namespaces)) {
(
$ns
,
$prefix
,
$lname
) =
$self
->{NSHelper}->process_element_name(
$name
);
}
else
{
$lname
=
$name
;
}
$ns
||=
''
;
$prefix
||=
''
;
$lname
||=
''
;
$self
->skip_whitespace(
$reader
);
my
$have_content
;
my
$data
=
$reader
->data(2);
if
(
$data
=~ /^\/>/) {
$reader
->move_along(2);
}
else
{
$data
=~ /^>/ or
$self
->parser_error(
"No close element tag"
,
$reader
);
$reader
->move_along(1);
$have_content
++;
}
my
$el
=
{
Name
=>
$name
,
LocalName
=>
$lname
,
Prefix
=>
$prefix
,
NamespaceURI
=>
$ns
,
Attributes
=> \
%attrib_hash
,
};
$self
->start_element(
$el
);
if
(
$have_content
) {
$self
->content(
$reader
);
my
$data
=
$reader
->data(2);
$data
=~ /^<\// or
$self
->parser_error(
"No close tag marker"
,
$reader
);
$reader
->move_along(2);
my
$end_name
=
$self
->Name(
$reader
);
$end_name
eq
$name
||
$self
->parser_error(
"End tag mismatch ($end_name != $name)"
,
$reader
);
$self
->skip_whitespace(
$reader
);
$reader
->match(
'>'
) or
$self
->parser_error(
"No close '>' on end tag"
,
$reader
);
}
my
%end_el
=
%$el
;
delete
$end_el
{Attributes};
$self
->end_element(\
%end_el
);
for
my
$ns
(
@new_ns
) {
$self
->end_prefix_mapping(
$ns
);
}
$self
->{NSHelper}->pop_context;
return
1;
}
sub
content {
my
(
$self
,
$reader
) =
@_
;
while
(1) {
$self
->CharData(
$reader
);
my
$data
=
$reader
->data(2);
if
(
$data
=~ /^<\//) {
return
1;
}
elsif
(
$data
=~ /^&/) {
$self
->Reference(
$reader
) or
$self
->parser_error(
"bare & not allowed in content"
,
$reader
);
next
;
}
elsif
(
$data
=~ /^<!/) {
(
$self
->CDSect(
$reader
)
or
$self
->Comment(
$reader
))
and
next
;
}
elsif
(
$data
=~ /^<\?/) {
$self
->PI(
$reader
) and
next
;
}
elsif
(
$data
=~ /^</) {
$self
->element(
$reader
) and
next
;
}
last
;
}
return
1;
}
sub
CDSect {
my
(
$self
,
$reader
) =
@_
;
my
$data
=
$reader
->data(9);
return
0
unless
$data
=~ /^<!\[CDATA\[/;
$reader
->move_along(9);
$self
->start_cdata({});
$data
=
$reader
->data;
while
(1) {
$self
->parser_error(
"EOF looking for CDATA section end"
,
$reader
)
unless
length
(
$data
);
if
(
$data
=~ /^(.*?)\]\]>/s) {
my
$chars
= $1;
$reader
->move_along(
length
(
$chars
) + 3);
$self
->characters({
Data
=>
$chars
});
last
;
}
else
{
$self
->characters({
Data
=>
$data
});
$reader
->move_along(
length
(
$data
));
$data
=
$reader
->data;
}
}
$self
->end_cdata({});
return
1;
}
sub
CharData {
my
(
$self
,
$reader
) =
@_
;
my
$data
=
$reader
->data;
while
(1) {
return
unless
length
(
$data
);
if
(
$data
=~ /^([^<&]*)[<&]/s) {
my
$chars
= $1;
$self
->parser_error(
"String ']]>' not allowed in character data"
,
$reader
)
if
$chars
=~ /\]\]>/;
$reader
->move_along(
length
(
$chars
));
$self
->characters({
Data
=>
$chars
})
if
length
(
$chars
);
last
;
}
else
{
$self
->characters({
Data
=>
$data
});
$reader
->move_along(
length
(
$data
));
$data
=
$reader
->data;
}
}
}
sub
Misc {
my
(
$self
,
$reader
) =
@_
;
if
(
$self
->Comment(
$reader
)) {
return
1;
}
elsif
(
$self
->PI(
$reader
)) {
return
1;
}
elsif
(
$self
->skip_whitespace(
$reader
)) {
return
1;
}
return
0;
}
sub
Reference {
my
(
$self
,
$reader
) =
@_
;
return
0
unless
$reader
->match(
'&'
);
my
$data
=
$reader
->data;
if
(
$data
=~ /^(
$data
=
$reader
->data(
length
(
$data
) + 6);
}
if
(
$data
=~ /^
my
$ref
= $1;
$reader
->move_along(
length
(
$ref
) + 3);
my
$char
= chr_ref(
hex
(
$ref
));
$self
->parser_error(
"Character reference &#$ref; refers to an illegal XML character ($char)"
,
$reader
)
unless
$char
=~ /
$SingleChar
/o;
$self
->characters({
Data
=>
$char
});
return
1;
}
elsif
(
$data
=~ /^
my
$ref
= $1;
$reader
->move_along(
length
(
$ref
) + 2);
my
$char
= chr_ref(
$ref
);
$self
->parser_error(
"Character reference &#$ref; refers to an illegal XML character ($char)"
,
$reader
)
unless
$char
=~ /
$SingleChar
/o;
$self
->characters({
Data
=>
$char
});
return
1;
}
else
{
my
$name
=
$self
->Name(
$reader
)
||
$self
->parser_error(
"Invalid name in entity"
,
$reader
);
$reader
->match(
';'
) or
$self
->parser_error(
"No semi-colon found after entity name"
,
$reader
);
if
(
$self
->_is_entity(
$name
)) {
if
(
$self
->_is_external(
$name
)) {
my
$value
=
$self
->_get_entity(
$name
);
my
$ent_reader
= XML::SAX::PurePerl::Reader::URI->new(
$value
);
$self
->encoding_detect(
$ent_reader
);
$self
->extParsedEnt(
$ent_reader
);
}
else
{
my
$value
=
$self
->_stringify_entity(
$name
);
my
$ent_reader
= XML::SAX::PurePerl::Reader::String->new(
$value
);
$self
->content(
$ent_reader
);
}
return
1;
}
elsif
(
$name
=~ /^(?:amp|gt|lt|quot|apos)$/) {
$self
->characters({
Data
=>
$int_ents
{
$name
} });
return
1;
}
else
{
$self
->parser_error(
"Undeclared entity"
,
$reader
);
}
}
}
sub
AttReference {
my
(
$self
,
$name
,
$reader
) =
@_
;
if
(
$name
=~ /^
my
$chr
= chr_ref(
hex
($1));
$chr
=~ /
$SingleChar
/o or
$self
->parser_error(
"Character reference '&$name;' refers to an illegal XML character"
,
$reader
);
return
$chr
;
}
elsif
(
$name
=~ /^
my
$chr
= chr_ref($1);
$chr
=~ /
$SingleChar
/o or
$self
->parser_error(
"Character reference '&$name;' refers to an illegal XML character"
,
$reader
);
return
$chr
;
}
else
{
if
(
$self
->_is_entity(
$name
)) {
if
(
$self
->_is_external(
$name
)) {
$self
->parser_error(
"No external entity references allowed in attribute values"
,
$reader
);
}
else
{
my
$value
=
$self
->_stringify_entity(
$name
);
return
$value
;
}
}
elsif
(
$name
=~ /^(?:amp|lt|gt|quot|apos)$/) {
return
$int_ents
{
$name
};
}
else
{
$self
->parser_error(
"Undeclared entity '$name'"
,
$reader
);
}
}
}
sub
extParsedEnt {
my
(
$self
,
$reader
) =
@_
;
$self
->TextDecl(
$reader
);
$self
->content(
$reader
);
}
sub
_is_external {
my
(
$self
,
$name
) =
@_
;
if
(
$self
->{ParseOptions}{external_entities}{
$name
}) {
return
1;
}
return
;
}
sub
_is_entity {
my
(
$self
,
$name
) =
@_
;
if
(
exists
$self
->{ParseOptions}{entities}{
$name
}) {
return
1;
}
return
0;
}
sub
_stringify_entity {
my
(
$self
,
$name
) =
@_
;
if
(
exists
$self
->{ParseOptions}{expanded_entity}{
$name
}) {
return
$self
->{ParseOptions}{expanded_entity}{
$name
};
}
my
$reader
= XML::SAX::PurePerl::Reader::URI->new(
$self
->{ParseOptions}{entities}{
$name
});
my
$ent
=
''
;
while
(1) {
my
$data
=
$reader
->data;
$ent
.=
$data
;
$reader
->move_along(
length
(
$data
)) or
last
;
}
return
$self
->{ParseOptions}{expanded_entity}{
$name
} =
$ent
;
}
sub
_get_entity {
my
(
$self
,
$name
) =
@_
;
return
$self
->{ParseOptions}{entities}{
$name
};
}
sub
skip_whitespace {
my
(
$self
,
$reader
) =
@_
;
my
$data
=
$reader
->data;
my
$found
= 0;
while
(
$data
=~ s/^([\x20\x0A\x0D\x09]*)//) {
last
unless
length
($1);
$found
++;
$reader
->move_along(
length
($1));
$data
=
$reader
->data;
}
return
$found
;
}
sub
Attribute {
my
(
$self
,
$reader
) =
@_
;
$self
->skip_whitespace(
$reader
) ||
return
;
my
$data
=
$reader
->data(2);
return
if
$data
=~ /^\/?>/;
if
(
my
$name
=
$self
->Name(
$reader
)) {
$self
->skip_whitespace(
$reader
);
$reader
->match(
'='
) or
$self
->parser_error(
"No '=' in Attribute"
,
$reader
);
$self
->skip_whitespace(
$reader
);
my
$value
=
$self
->AttValue(
$reader
);
if
(!
$self
->cdata_attrib(
$name
)) {
$value
=~ s/^\x20*//;
$value
=~ s/\x20*$//;
$value
=~ s/ {1,}/ /g;
}
return
$name
,
$value
;
}
return
;
}
sub
cdata_attrib {
return
1;
}
sub
AttValue {
my
(
$self
,
$reader
) =
@_
;
my
$quote
=
$self
->quote(
$reader
);
my
$value
=
''
;
while
(1) {
my
$data
=
$reader
->data;
$self
->parser_error(
"EOF found while looking for the end of attribute value"
,
$reader
)
unless
length
(
$data
);
if
(
$data
=~ /^([^
$quote
]*)
$quote
/) {
$reader
->move_along(
length
($1) + 1);
$value
.= $1;
last
;
}
else
{
$value
.=
$data
;
$reader
->move_along(
length
(
$data
));
}
}
if
(
$value
=~ /</) {
$self
->parser_error(
"< character not allowed in attribute values"
,
$reader
);
}
$value
=~ s/[\x09\x0A\x0D]/\x20/g;
$value
=~ s/&(
return
$value
;
}
sub
Comment {
my
(
$self
,
$reader
) =
@_
;
my
$data
=
$reader
->data(4);
if
(
$data
=~ /^<!--/) {
$reader
->move_along(4);
my
$comment_str
=
''
;
while
(1) {
my
$data
=
$reader
->data;
$self
->parser_error(
"End of data seen while looking for close comment marker"
,
$reader
)
unless
length
(
$data
);
if
(
$data
=~ /^(.*?)-->/s) {
$comment_str
.= $1;
$self
->parser_error(
"Invalid comment (dash)"
,
$reader
)
if
$comment_str
=~ /-$/;
$reader
->move_along(
length
($1) + 3);
last
;
}
else
{
$comment_str
.=
$data
;
$reader
->move_along(
length
(
$data
));
}
}
$self
->comment({
Data
=>
$comment_str
});
return
1;
}
return
0;
}
sub
PI {
my
(
$self
,
$reader
) =
@_
;
my
$data
=
$reader
->data(2);
if
(
$data
=~ /^<\?/) {
$reader
->move_along(2);
my
(
$target
);
$target
=
$self
->Name(
$reader
) ||
$self
->parser_error(
"PI has no target"
,
$reader
);
my
$pi_data
=
''
;
if
(
$self
->skip_whitespace(
$reader
)) {
while
(1) {
my
$data
=
$reader
->data;
$self
->parser_error(
"End of data seen while looking for close PI marker"
,
$reader
)
unless
length
(
$data
);
if
(
$data
=~ /^(.*?)\?>/s) {
$pi_data
.= $1;
$reader
->move_along(
length
($1) + 2);
last
;
}
else
{
$pi_data
.=
$data
;
$reader
->move_along(
length
(
$data
));
}
}
}
else
{
my
$data
=
$reader
->data(2);
$data
=~ /^\?>/ or
$self
->parser_error(
"PI closing sequence not found"
,
$reader
);
$reader
->move_along(2);
}
$self
->processing_instruction({
Target
=>
$target
,
Data
=>
$pi_data
});
return
1;
}
return
0;
}
sub
Name {
my
(
$self
,
$reader
) =
@_
;
my
$name
=
''
;
while
(1) {
my
$data
=
$reader
->data;
return
unless
length
(
$data
);
$data
=~ /^([^\s>\/&\?;=<\)\(\[\],\%\
$name
.= $1;
my
$len
=
length
($1);
$reader
->move_along(
$len
);
last
if
(
$len
!=
length
(
$data
));
}
return
unless
length
(
$name
);
$name
=~ /
$NameChar
/o or
$self
->parser_error(
"Name <$name> does not match NameChar production"
,
$reader
);
return
$name
;
}
sub
quote {
my
(
$self
,
$reader
) =
@_
;
my
$data
=
$reader
->data;
$data
=~ /^(['
"])/ or $self->parser_error("
Invalid quote token",
$reader
);
$reader
->move_along(1);
return
$1;
}
1;