has
xml
=> (
is
=>
'ro'
,
isa
=>
'XML::LibXML::Document'
,
required
=> 1,
);
has
xpc
=> (
is
=>
'ro'
,
isa
=>
'XML::LibXML::XPathContext'
,
required
=> 1,
);
has
is_strict
=> (
is
=>
'ro'
,
isa
=>
'Bool'
,
required
=> 1,
);
sub
to_string {
my
$self
=
shift
;
return
$self
->xml->toString();
}
my
%XML_NS
= (
);
sub
new_from_xml {
my
$class
=
shift
;
my
$part_name
=
shift
;
my
$xml
=
shift
;
my
$is_strict
=
shift
;
my
$doc
= XML::LibXML->load_xml(
string
=>
$xml
);
my
$xpc
= XML::LibXML::XPathContext->new();
if
(
$is_strict
) {
$xpc
->registerNs(
'w'
=>
$XML_NS
{strict});
$xpc
->registerNs(
'r'
=>
$XML_NS
{relationships_strict});
}
else
{
$xpc
->registerNs(
'w'
=>
$XML_NS
{transitional});
$xpc
->registerNs(
'r'
=>
$XML_NS
{relationships});
}
return
$class
->new(
part_name
=>
$part_name
,
xml
=>
$doc
,
xpc
=>
$xpc
,
is_strict
=>
$is_strict
,
);
}
sub
_clone_run {
my
$self
=
shift
;
my
$run
=
shift
;
my
$new_run
=
$run
->cloneNode(0);
my
(
$run_props
) =
$self
->xpc->findnodes(
'./w:rPr'
,
$run
);
if
(
$run_props
) {
my
$new_props
=
$run_props
->cloneNode(1);
$new_run
->appendChild(
$new_props
);
}
return
$new_run
;
}
sub
find_text_nodes {
my
$self
=
shift
;
my
$regex
=
shift
;
my
$exclude_tables
=
shift
;
my
$text_element_query
=
$exclude_tables
?
'//w:r[child::w:t and not(ancestor::w:tbl)]'
:
'//w:r[child::w:t]'
;
my
@matching_nodes
;
my
$runs
=
$self
->xpc->findnodes(
$text_element_query
,
$self
->xml->documentElement);
$runs
->
foreach
(
sub
{
my
$run
=
shift
;
my
$run_was_split
= 0;
my
$text_nodes
=
$self
->xpc->findnodes(
'./w:t'
,
$run
);
$text_nodes
->
foreach
(
sub
{
my
$t
=
shift
;
my
$run
=
$t
->parentNode;
my
@parts
=
split
(/(
$regex
)/,
$t
->textContent);
return
if
@parts
== 1;
$run_was_split
= 1;
for
(
my
$i
=
$#parts
;
$i
>= 0;
$i
--) {
my
$part
=
$parts
[
$i
];
next
if
$part
eq
''
;
my
$new_run
=
$self
->_clone_run(
$run
);
my
$new_text
=
$t
->cloneNode(0);
$new_text
->setAttributeNS(XML_XML_NS,
'xml:space'
,
'preserve'
);
$new_text
->appendText(
$part
);
$new_text
->normalize();
$new_run
->appendChild(
$new_text
);
$run
->parentNode->insertAfter(
$new_run
,
$run
);
if
(
$i
% 2 != 0) {
push
@matching_nodes
,
$new_text
->childNodes;
}
}
});
$run
->parentNode->removeChild(
$run
)
if
$run_was_split
;
});
for
my
$part
(
$self
->referenced_parts) {
my
$nodes
=
$part
->find_text_nodes(
$regex
,
$exclude_tables
,
);
push
@matching_nodes
,
@$nodes
;
}
return
\
@matching_nodes
;
}
sub
style_text {
my
$self
=
shift
;
my
$regex
=
shift
;
my
%options
=
@_
;
my
$text_nodes
=
$self
->find_text_nodes(
$regex
,
delete
$options
{exclude_tables});
for
my
$text_node
(
@$text_nodes
) {
my
(
$run_props
) =
$self
->xpc->findnodes(
'ancestor::w:r[1]/w:rPr'
,
$text_node
);
if
(!
$run_props
) {
my
(
$run
) =
$self
->xpc->findnodes(
'ancestor::w:r[1]'
,
$text_node
);
my
$run_ns
=
$run
->namespaceURI;
$run_props
=
$self
->xml->createElement(
'rPr'
);
$run_props
->setNamespace(
$run_ns
=>
$run
->lookupNamespacePrefix(
$run_ns
));
$run
->insertBefore(
$run_props
,
$run
->firstChild);
}
if
(
exists
$options
{bold}) {
my
(
$bold
) =
$self
->xpc->findnodes(
'./w:b'
,
$run_props
);
if
(!
$bold
&&
$options
{bold}) {
$run_props
->addNewChild(
$run_props
->
namespaceURI
=>
'b'
);
}
if
(
$bold
&& !
$options
{bold}) {
$run_props
->removeChild(
$bold
);
}
}
if
(
exists
$options
{italic}) {
my
(
$italic
) =
$self
->xpc->findnodes(
'./w:i'
,
$run_props
);
if
(!
$italic
&&
$options
{italic}) {
$run_props
->addNewChild(
$run_props
->
namespaceURI
=>
'i'
);
}
if
(
$italic
&& !
$options
{italic}) {
$run_props
->removeChild(
$italic
);
}
}
if
(
exists
$options
{color}) {
my
$color_to_set
= _check_color_syntax(
$options
{color});
my
(
$color_element
) =
$self
->xpc->findnodes(
'./w:color'
,
$run_props
);
if
(!
$color_element
&&
$options
{color}) {
$color_element
=
$run_props
->addNewChild(
$run_props
->
namespaceURI
=>
'color'
);
}
if
(
$color_element
&&
$options
{color}) {
my
$ns
=
$color_element
->namespaceURI;
$color_element
->setAttributeNS(
$ns
,
'val'
=>
$color_to_set
);
$color_element
->removeAttributeNS(
$ns
=>
'themeColor'
);
$color_element
->removeAttributeNS(
$ns
=>
'themeShade'
);
$color_element
->removeAttributeNS(
$ns
=>
'themeTint'
);
}
if
(
$color_element
&& not
defined
$options
{color}) {
$run_props
->removeChild(
$color_element
);
}
}
if
(
exists
$options
{underline_style}) {
my
$underline_style
= _check_underline_style(
$options
{underline_style});
my
(
$underline_element
) =
$self
->xpc->findnodes(
'./w:u'
,
$run_props
);
if
(!
$underline_element
&&
$options
{underline_style}) {
$underline_element
=
$run_props
->addNewChild(
$run_props
->
namespaceURI
=>
'u'
);
}
if
(
$underline_element
&&
$options
{underline_style}) {
$underline_element
->setAttributeNS(
$underline_element
->namespaceURI,
'val'
=>
$options
{underline_style}
);
if
(
exists
$options
{underline_color}) {
if
(
defined
$options
{underline_color}) {
my
$color_to_set
= _check_color_syntax(
$options
{underline_color});
$underline_element
->setAttributeNS(
$underline_element
->namespaceURI,
'color'
=>
$color_to_set
,
);
}
else
{
$underline_element
->removeAttributeNS(
$underline_element
->
namespaceURI
=>
'color'
);
}
}
}
if
(
$underline_element
&& not
defined
$options
{underline_style}) {
$run_props
->removeChild(
$underline_element
);
}
}
}
return
scalar
@$text_nodes
;
}
sub
_check_color_syntax {
my
$color
=
shift
;
return
if
not
defined
$color
;
return
$color
if
$color
eq
'auto'
;
return
uc
(
$color
)
if
$color
=~ /^[0-9a-fA-F]{6}$/;
croak(
"Invalid color value: '$color'"
);
}
my
%underline_styles
= (
map
{
$_
=> 1 }
qw(
dash
dashDotDotHeavy
dashDotHeavy
dashedHeavy
dashLong
dashLongHeavy
dotDash
dotDotDash
dotted
dottedHeavy
double
none
single
thick
wave
wavyDouble
wavyHeavy
words
)
);
sub
_check_underline_style {
my
$underline_style
=
shift
;
return
if
not
defined
$underline_style
;
return
$underline_style
if
exists
$underline_styles
{
$underline_style
};
croak(
"Invalid underline style: '$underline_style'"
);
}
sub
remove_spellcheck_markers {
my
$self
=
shift
;
my
$spellcheck_nodes
=
$self
->xpc->findnodes(
'//w:proofErr'
,
$self
->xml->documentElement,
);
$spellcheck_nodes
->
foreach
(
sub
{
my
$node
=
shift
;
$node
->parentNode->removeChild(
$node
);
});
for
my
$part
(
$self
->referenced_parts) {
$part
->remove_spellcheck_markers();
}
return
;
}
sub
extract_words {
my
$self
=
shift
;
my
@words
;
my
$text_nodes
=
$self
->xpc->findnodes(
'//w:t'
,
$self
->xml->documentElement,
);
$text_nodes
->
foreach
(
sub
{
my
$node
=
shift
;
push
@words
,
grep
{ !/^\s*$/ }
split
(/[\W]+/,
$node
->textContent);
});
for
my
$part
(
$self
->referenced_parts) {
push
@words
, @{
$part
->extract_words() };
}
return
\
@words
;
}
sub
merge_runs {
my
$self
=
shift
;
my
$runs
=
$self
->xpc->findnodes(
'//w:p/w:r'
,
$self
->xml->documentElement,
);
my
$active_run
;
my
$active_run_last_child
;
my
$active_run_props
;
$runs
->
foreach
(
sub
{
my
$run
=
shift
;
if
( !
defined
$active_run
|| !
$run
->parentNode->isSameNode(
$active_run
->parentNode)
) {
undef
$active_run
;
undef
$active_run_last_child
;
undef
$active_run_props
;
}
my
(
$this_run_props
) =
$self
->xpc->findnodes(
'./w:rPr'
,
$run
);
$this_run_props
=
defined
$this_run_props
?
$this_run_props
->toString
:
''
;
if
(
defined
$active_run_props
&&
$this_run_props
eq
$active_run_props
) {
my
$children
=
$run
->childNodes;
$children
->
foreach
(
sub
{
my
$node
=
shift
;
return
if
$node
->nodeName eq
'w:rPr'
;
if
(
$active_run_last_child
&&
$active_run_last_child
->nodeName eq
'w:t'
&&
$node
->nodeName eq
'w:t'
) {
$node
->childNodes->
foreach
(
sub
{
my
$child
=
shift
;
$active_run_last_child
->appendChild(
$child
);
});
$node
->parentNode->removeChild(
$node
);
}
else
{
$active_run
->appendChild(
$node
);
if
(
$node
->isa(
'XML::LibXML::Element'
)
|| (
$node
->isa(
'XML::LibXML::Text'
) &&
$node
->textContent !~ /^\s*$/)
) {
$active_run_last_child
=
$node
;
}
}
});
$run
->parentNode->removeChild(
$run
);
}
else
{
$active_run
=
$run
;
$active_run_props
=
$this_run_props
;
$active_run_last_child
= first {
$_
->isa(
'XML::LibXML::Element'
)
&&
$_
->nodeName ne
'w:rPr'
}
$run
->childNodes->
reverse
->get_nodelist;
}
});
for
my
$part
(
$self
->referenced_parts) {
$part
->merge_runs();
}
return
;
}
sub
replace_text {
my
$self
=
shift
;
my
$search
=
shift
;
my
$replace
=
shift
;
my
$text_nodes
=
$self
->xpc->findnodes(
'//w:t'
,
$self
->xml->documentElement,
);
$text_nodes
->
foreach
(
sub
{
my
$text
=
shift
;
$text
->childNodes->
foreach
(
sub
{
my
$child
=
shift
;
return
unless
$child
->isa(
'XML::LibXML::Text'
);
$child
->replaceDataString(
$search
,
$replace
, 1);
});
});
for
my
$part
(
$self
->referenced_parts) {
$part
->replace_text(
$search
,
$replace
);
}
return
;
}
{
my
@EXPLICIT_REFERENCE_IDS
= (
'//w:footerReference/@r:id'
,
'//w:headerReference/@r:id'
,
);
my
%IMPLICIT_REFERENCE_ELEMENTS
= (
strict
=> {
},
transitional
=> {
},
);
sub
referenced_parts {
my
$self
=
shift
;
my
@parts
;
for
my
$ref_type
(
@EXPLICIT_REFERENCE_IDS
) {
my
$references
=
$self
->xpc->findnodes(
$ref_type
,
$self
->xml->documentElement,
);
$references
->
foreach
(
sub
{
my
$ref
=
shift
;
push
@parts
,
$self
->find_referenced_part_by_id(
$ref
->value);
});
}
my
$strict
=
$self
->is_strict ?
'strict'
:
'transitional'
;
for
my
$ref_type
(
keys
%{
$IMPLICIT_REFERENCE_ELEMENTS
{
$strict
} }) {
my
$references
=
$self
->xpc->findnodes(
$ref_type
,
$self
->xml->documentElement,
);
next
if
not
$references
->size();
push
@parts
,
$self
->find_referenced_part_by_type(
$IMPLICIT_REFERENCE_ELEMENTS
{
$strict
}{
$ref_type
}
);
}
return
@parts
;
}
}
__PACKAGE__->meta->make_immutable();