use
5.008_001;
our
$VERSION
=
'0.15'
;
our
@EXPORT_OK
=
qw(selector_to_xpath)
;
*import
= \
&Exporter::import
;
sub
selector_to_xpath {
__PACKAGE__->new(
shift
)->to_xpath(
@_
);
}
my
$ident
=
qr/(?![0-9]|-[-0-9])[-_a-zA-Z0-9]+/
;
my
$reg
= {
element
=>
qr/^([#.]?)([a-z0-9\\*_-]*)((\|)([a-z0-9\\*_-]*))?/
i,
attr1
=>
qr/^\[ \s* ($ident) \s* \]/
x,
attr2
=>
qr/^\[ \s* ($ident) \s*
( [~|*^\$!]? = ) \s*
(?: ($ident) | "([^"]*)" | '([^']*)') \s* \] /
x,
badattr
=>
qr/^\[/
,
attrN
=>
qr/^:not\((.*?)\)/
i,
pseudo
=>
qr/^:([()a-z0-9_+-]+)/
i,
combinator
=>
qr/^(\s*[>+~\s](?!,))/
i,
comma
=>
qr/^\s*,\s*/
i,
};
sub
new {
my
(
$class
,
$exp
) =
@_
;
bless
{
expression
=>
$exp
},
$class
;
}
sub
selector {
my
$self
=
shift
;
$self
->{expression} =
shift
if
@_
;
$self
->{expression};
}
sub
convert_attribute_match {
my
(
$left
,
$op
,
$right
) =
@_
;
if
(
$op
eq
'!='
) {
"\@$left!='$right'"
;
}
elsif
(
$op
eq
'~='
) {
"contains(concat(' ', \@$left, ' '), ' $right ')"
;
}
elsif
(
$op
eq
'*='
) {
"contains(\@$left, '$right')"
;
}
elsif
(
$op
eq
'|='
) {
"\@$left='$right' or starts-with(\@$left, '$right-')"
;
}
elsif
(
$op
eq
'^='
) {
"starts-with(\@$left,'$^N')"
;
}
elsif
(
$op
eq
'$='
) {
"ends-with(\@$left,'$^N')"
;
}
else
{
"\@$left='$^N'"
;
}
};
sub
_generate_child {
my
(
$direction
,
$a
,
$b
) =
@_
;
if
(
$a
== 0) {
$b
--;
"[count($direction-sibling::*) = $b and parent::*]"
}
elsif
(
$a
> 0) {
return
"[not((count($direction-sibling::*)+1)<$b) and ((count($direction-sibling::*) + 1) - $b) mod $a = 0 and parent::*]"
}
else
{
$a
= -
$a
;
return
"[not((count($direction-sibling::*)+1)>$b) and (($b - (count($direction-sibling::*) + 1)) mod $a) = 0 and parent::*]"
};
};
sub
nth_child {
my
(
$a
,
$b
) =
@_
;
if
(
@_
== 1) {
(
$a
,
$b
) = (0,
$a
);
};
_generate_child(
'preceding'
,
$a
,
$b
);
};
sub
nth_last_child {
my
(
$a
,
$b
) =
@_
;
if
(
@_
== 1) {
(
$a
,
$b
) = (0,
$a
);
};
_generate_child(
'following'
,
$a
,
$b
);
};
sub
to_xpath {
my
$self
=
shift
;
my
$rule
=
$self
->{expression} or
return
;
my
%parms
=
@_
;
my
$root
=
$parms
{root} ||
'/'
;
my
@parts
= (
"$root/"
);
my
$last_rule
=
''
;
my
@next_parts
;
my
$tag
;
my
$wrote_tag
;
my
$tag_index
;
my
$root_index
= 0;
while
(
length
$rule
&&
$rule
ne
$last_rule
) {
$last_rule
=
$rule
;
$rule
=~ s/^\s*|\s*$//g;
last
unless
length
$rule
;
if
(
$rule
=~ /
$reg
->{combinator}/) {
$rule
=
"* $rule"
;
};
if
(
$rule
=~ s/
$reg
->{element}//) {
my
(
$id_class
,
$name
,
$lang
) = ($1,$2,$3);
if
(
@next_parts
) {
push
@parts
,
@next_parts
;
@next_parts
= ();
}
if
(
$id_class
eq
''
) {
$tag
=
$name
||
'*'
;
}
else
{
$tag
=
'*'
;
}
if
(
defined
$parms
{prefix} and not
$tag
=~ /[*:|]/) {
$tag
=
join
':'
,
$parms
{prefix},
$tag
;
}
if
(!
$wrote_tag
++) {
push
@parts
,
$tag
;
$tag_index
=
$#parts
;
};
if
(
$id_class
eq
'#'
) { # ID
push
@parts
,
"[\@id='$name']"
;
}
elsif
(
$id_class
eq
'.'
) {
push
@parts
,
"[contains(concat(' ', \@class, ' '), ' $name ')]"
;
};
};
if
(
$rule
=~ s/
$reg
->{attr2}//) {
push
@parts
,
"["
, convert_attribute_match( $1, $2, $^N ),
"]"
;
}
elsif
(
$rule
=~ s/
$reg
->{attr1}//) {
if
(!
$wrote_tag
++) {
push
@parts
,
'*'
;
$tag_index
=
$#parts
;
};
push
@parts
,
"[\@$1]"
;
}
elsif
(
$rule
=~
$reg
->{badattr}) {
Carp::croak
"Invalid attribute-value selector '$rule'"
;
}
if
(
$rule
=~ s/
$reg
->{attrN}//) {
my
$sub_rule
= $1;
if
(
$sub_rule
=~ s/
$reg
->{attr2}//) {
push
@parts
,
"[not("
, convert_attribute_match( $1, $2, $^N ),
")]"
;
}
elsif
(
$sub_rule
=~ s/
$reg
->{attr1}//) {
push
@parts
,
"[not(\@$1)]"
;
}
elsif
(
$rule
=~
$reg
->{badattr}) {
Carp::croak
"Invalid attribute-value selector '$rule'"
;
}
else
{
my
$xpath
= selector_to_xpath(
$sub_rule
);
$xpath
=~ s!^//!!;
push
@parts
,
"[not(self::$xpath)]"
;
}
}
while
(
$rule
=~ s/
$reg
->{pseudo}//) {
if
(
my
@expr
=
$self
->parse_pseudo($1, \
$rule
) ) {
push
@parts
,
@expr
;
}
elsif
( $1 eq
'first-child'
) {
push
@parts
, nth_child(1);
}
elsif
( $1 eq
'last-child'
) {
push
@parts
, nth_last_child(1);
}
elsif
( $1 eq
'only-child'
) {
push
@parts
, nth_child(1), nth_last_child(1);
}
elsif
($1 =~ /^lang\(([\w\-]+)\)$/) {
push
@parts
,
"[\@xml:lang='$1' or starts-with(\@xml:lang, '$1-')]"
;
}
elsif
($1 =~ /^nth-child\((\d+)\)$/) {
push
@parts
, nth_child($1);
}
elsif
($1 =~ /^nth-child\((\d+)n(?:\+(\d+))?\)$/) {
push
@parts
, nth_child($1, $2||0);
}
elsif
($1 =~ /^nth-
last
-child\((\d+)\)$/) {
push
@parts
, nth_last_child($1);
}
elsif
($1 =~ /^nth-
last
-child\((\d+)n(?:\+(\d+))?\)$/) {
push
@parts
, nth_last_child($1, $2||0);
}
elsif
($1 =~ /^first-of-type$/) {
push
@parts
,
"[1]"
;
}
elsif
($1 =~ /^nth-of-type\((\d+)\)$/) {
push
@parts
,
"[$1]"
;
}
elsif
($1 =~ /^contains\($/) {
$rule
=~ s/^\s*
"([^"
]*)"\s*\)//
or
die
"Malformed string in :contains(): '$rule'"
;
push
@parts
,
qq{[text()[contains(string(.),"$1")]]}
;
}
elsif
( $1 eq
'root'
) {
$parts
[
$root_index
] =
$root
;
}
elsif
( $1 eq
'empty'
) {
push
@parts
,
"[not(* or text())]"
;
}
else
{
Carp::croak
"Can't translate '$1' pseudo-class"
;
}
}
if
(
$rule
=~ s/
$reg
->{combinator}//) {
my
$match
= $1;
if
(
$match
=~ />/) {
push
@parts
,
"/"
;
}
elsif
(
$match
=~ /\+/) {
push
@parts
,
"/following-sibling::*[1]/self::"
;
$tag_index
=
$#parts
;
}
elsif
(
$match
=~ /\~/) {
push
@parts
,
"/following-sibling::"
;
}
elsif
(
$match
=~ /^\s*$/) {
push
@parts
,
"//"
}
else
{
die
"Weird combinator '$match'"
}
undef
$tag
;
undef
$wrote_tag
;
}
if
(
$rule
=~ s/
$reg
->{comma}//) {
push
@parts
,
" | "
,
"$root/"
;
$root_index
=
$#parts
;
undef
$tag
;
undef
$wrote_tag
;
}
}
return
join
''
,
@parts
;
}
sub
parse_pseudo {
}
1;