sub
new {
my
(
$class
,
%options
) =
@_
;
my
$self
=
$class
->SUPER::new(
%options
);
$$self
{permuted} =
$options
{permuted};
$$self
{
split
} =
$options
{
split
};
return
$self
; }
sub
toProcess {
my
(
$self
,
$doc
) =
@_
;
return
$doc
->findnodes(
'//ltx:index[not(ltx:indexlist)] | //ltx:glossary[not(ltx:glossarylist)]'
); }
sub
process {
my
(
$self
,
$doc
,
@lists
) =
@_
;
my
@newdocs
= ();
$doc
->addDate();
foreach
my
$list
(
@lists
) {
my
$tag
=
$doc
->getQName(
$list
);
if
(
$tag
eq
'ltx:index'
) {
my
(
$tree
,
$allphrases
) =
$self
->build_tree(
$doc
,
$list
);
if
(
$tree
) {
if
(
$$self
{
split
}) {
push
(
@newdocs
,
$self
->makeSubCollectionDocuments(
$doc
,
$list
,
map
{ (
$_
=>
$self
->makeIndexList(
$doc
,
$allphrases
,
$$tree
{subtrees}{
$_
})) }
keys
%{
$$tree
{subtrees} })); }
else
{
$doc
->addNodes(
$list
,
$self
->makeIndexList(
$doc
,
$allphrases
,
$tree
)); } } }
elsif
(
$tag
eq
'ltx:glossary'
) {
if
(
my
@entries
=
$self
->getGlossaryEntries(
$doc
,
$list
)) {
if
(
$$self
{
split
}) {
my
$split
= {};
foreach
my
$entry
(
@entries
) {
push
(@{
$$split
{
$$entry
{initial} } },
$entry
); }
push
(
@newdocs
,
$self
->makeSubCollectionDocuments(
$doc
,
$list
,
map
{ (
$_
=>
$self
->makeGlossaryList(
$doc
, @{
$$split
{
$_
} })) }
sort
keys
%$split
)); }
else
{
$doc
->addNodes(
$list
,
$self
->makeGlossaryList(
$doc
,
@entries
)); } } } }
my
@docs
=
map
{
$self
->rescan(
$_
); } (
@newdocs
?
@newdocs
: (
$doc
));
return
@docs
; }
sub
build_tree {
my
(
$self
,
$doc
,
$index
) =
@_
;
my
$defaultlistname
=
'idx'
;
my
$listname
=
'idx'
;
if
(
my
@keys
=
grep
{ /^INDEX:/ }
$$self
{db}->getKeys) {
NoteLog(
"MakeIndex: "
.
scalar
(
@keys
) .
" entries"
);
my
$id
=
$index
->getAttribute(
'xml:id'
);
my
$allphrases
= {};
my
$tree
= {
subtrees
=> {},
referrers
=> {},
id
=>
$id
,
parent
=>
undef
};
foreach
my
$key
(
@keys
) {
my
$entry
=
$$self
{db}->lookup(
$key
);
my
$phrases
=
$entry
->getValue(
'phrases'
);
my
@phrases
=
@$phrases
;
if
((
$entry
->getValue(
'inlist'
) ||
$defaultlistname
) ne
$listname
) {
next
; }
if
(!
scalar
(
@phrases
)) {
Warn(
'expected'
,
$key
,
undef
,
"Missing phrases in indexmark: '$key'"
);
next
; }
if
(
$$self
{permuted}) {
map
{
$self
->add_entry(
$doc
,
$allphrases
,
$tree
,
$entry
, @{
$_
}) }
cyclic_permute(
@phrases
); }
else
{
$self
->add_entry(
$doc
,
$allphrases
,
$tree
,
$entry
,
@phrases
); } }
return
(
$tree
,
$allphrases
); }
else
{
return
(
undef
,
undef
); } }
sub
add_entry {
my
(
$self
,
$doc
,
$allphrases
,
$tree
,
$entry
,
@phrases
) =
@_
;
if
(
$$self
{
split
}) {
my
$init
=
$doc
->initial(
$phrases
[0]->getAttribute(
'key'
));
my
$subtree
=
$$tree
{subtrees}{
$init
};
if
(!
$subtree
) {
$subtree
=
$$tree
{subtrees}{
$init
}
= {
phrase
=>
$init
,
subtrees
=> {},
referrers
=> {},
id
=>
$$tree
{id},
parent
=>
$tree
}; }
add_rec(
$doc
,
$allphrases
,
$subtree
,
$entry
,
@phrases
); }
else
{
add_rec(
$doc
,
$allphrases
,
$tree
,
$entry
,
@phrases
); }
return
; }
sub
add_rec {
my
(
$doc
,
$allphrases
,
$tree
,
$entry
,
@phrases
) =
@_
;
if
(
@phrases
) {
my
$phrase
=
shift
(
@phrases
);
my
$key
=
$phrase
->getAttribute(
'key'
);
my
$keyid
= getIndexKeyID(
$key
);
my
$subtree
=
$$tree
{subtrees}{
$key
};
if
(!
$subtree
) {
my
$id
=
$$tree
{id} .
'.'
.
$keyid
;
my
$fullkey
= (
$$tree
{key} ?
"$$tree{key}."
:
''
) .
$key
;
my
$phrasetext
= getIndexContentKey(
$phrase
);
my
$fullphrasetext
= (
$$tree
{fullphrasetext} ?
$$tree
{fullphrasetext} .
' '
:
''
)
.
$phrasetext
;
$$allphrases
{
$fullkey
} =
$id
;
$$allphrases
{
lc
(
$fullkey
) } =
$id
;
$$allphrases
{
$fullphrasetext
} =
$id
;
$$allphrases
{
lc
(
$fullphrasetext
) } =
$id
;
my
$phrasecopy
=
$doc
->cloneNode(
$phrase
);
$subtree
=
$$tree
{subtrees}{
$key
} = {
key
=>
$fullkey
,
id
=>
$id
,
phrase
=>
$phrasecopy
,
phrasetext
=>
$phrasetext
,
fullphrasetext
=>
$fullphrasetext
,
subtrees
=> {},
referrers
=> {},
parent
=>
$tree
};
}
add_rec(
$doc
,
$allphrases
,
$subtree
,
$entry
,
@phrases
); }
else
{
if
(
my
$seealso
=
$entry
->getValue(
'see_also'
)) {
$$tree
{see_also} =
$seealso
; }
if
(
my
$refs
=
$entry
->getValue(
'referrers'
)) {
map
{
$$tree
{referrers}{
$_
} =
$$refs
{
$_
} }
keys
%$refs
; } }
return
; }
sub
getIndexContentKey {
my
(
$node
) =
@_
;
my
$string
= (
ref
$node
?
$node
->textContent :
$node
);
$string
=~ s/^\s+//s;
$string
=~ s/\s+$//s;
$string
=~ s/\s+/ /gs;
$string
=~ s/\s*[\.\,\;]+$//s;
return
$string
; }
my
%GREEK_ASCII_MAP
= (
"\x{03B1}"
=>
'alpha'
,
"\x{03B2}"
=>
'beta'
,
"\x{03B3}"
=>
'gamma'
,
"\x{03B4}"
=>
'delta'
,
"\x{03F5}"
=>
'epsilon'
,
"\x{03B5}"
=>
'varepsilon'
,
"\x{03B6}"
=>
'zeta'
,
"\x{03B7}"
=>
'eta'
,
"\x{03B8}"
=>
'theta'
,
"\x{03D1}"
=>
'vartheta'
,
"\x{03B9}"
=>
'iota'
,
"\x{03BA}"
=>
'kappa'
,
"\x{03BB}"
=>
'lambda'
,
"\x{03BC}"
=>
'mu'
,
"\x{03BD}"
=>
'nu'
,
"\x{03BE}"
=>
'xi'
,
"\x{03C0}"
=>
'pi'
,
"\x{03D6}"
=>
'varpi'
,
"\x{03C1}"
=>
'rho'
,
"\x{03F1}"
=>
'varrho'
,
"\x{03C3}"
=>
'sigma'
,
"\x{03C2}"
=>
'varsigma'
,
"\x{03C4}"
=>
'tau'
,
"\x{03C5}"
=>
'upsilon'
,
"\x{03D5}"
=>
'phi'
,
"\x{03C6}"
=>
'varphi'
,
"\x{03C7}"
=>
'chi'
,
"\x{03C8}"
=>
'psi'
,
"\x{03C9}"
=>
'omega'
,
"\x{0393}"
=>
'Gamma'
,
"\x{0394}"
=>
'Delta'
,
"\x{0398}"
=>
'Theta'
,
"\x{039B}"
=>
'Lambda'
,
"\x{039E}"
=>
'Xi'
,
"\x{03A0}"
=>
'Pi'
,
"\x{03A3}"
=>
'Sigma'
,
"\x{03A5}"
=>
'Upsilon'
,
"\x{03A6}"
=>
'Phi'
,
"\x{03A8}"
=>
'Psi'
,
"\x{03A9}"
=>
'Omega'
);
my
$GREEK_RE
=
'('
.
join
(
'|'
,
sort
keys
%GREEK_ASCII_MAP
) .
')'
;
sub
getIndexKeyID {
my
(
$key
) =
@_
;
$key
=~ s/^\s+//s;
$key
=~ s/\s+$//s;
$key
= NFD(
$key
);
$key
=~ s/
$GREEK_RE
/
$GREEK_ASCII_MAP
{$1}/eg;
$key
= unidecode(
$key
);
$key
=~ s/[^a-zA-Z0-9]//g;
return
$key
; }
sub
permute {
my
(
@l
) =
@_
;
if
(
scalar
(
@l
) > 1) {
return
map
{ permute_aux(
$l
[
$_
],
@l
[0 ..
$_
- 1],
@l
[
$_
+ 1 ..
$#l
]) } 0 ..
$#l
; }
else
{
return
[
@l
]; } }
sub
permute_aux {
my
(
$first
,
@rest
) =
@_
;
return
map
{ [
$first
,
@$_
] } permute(
@rest
); }
sub
cyclic_permute {
my
(
@l
) =
@_
;
if
(
scalar
(
@l
) > 1) {
return
map
{ [
@l
[
$_
..
$#l
],
@l
[0 ..
$_
- 1]] } 0 ..
$#l
; }
else
{
return
[
@l
]; } }
sub
makeIndexList {
my
(
$self
,
$doc
,
$allphrases
,
$tree
) =
@_
;
my
$subtrees
=
$$tree
{subtrees};
if
(
my
@keys
=
$doc
->unisort(
keys
%$subtrees
)) {
return
[
'ltx:indexlist'
, {},
map
{
$self
->makeIndexEntry(
$doc
,
$allphrases
,
$$subtrees
{
$_
}) }
@keys
]; }
else
{
return
(); } }
sub
makeIndexEntry {
my
(
$self
,
$doc
,
$allphrases
,
$tree
) =
@_
;
my
$refs
=
$$tree
{referrers};
my
$seealso
=
$$tree
{see_also};
my
@links
= ();
if
(
keys
%$refs
) {
push
(
@links
, [
'ltx:text'
, {},
' '
],
$self
->combineIndexEntries(
$doc
,
$refs
)); }
if
(
$seealso
) {
my
%saw
= ();
foreach
my
$see
(
@$seealso
) {
push
(
@links
,
', '
);
if
(
my
$name
=
$see
->getAttribute(
'name'
)) {
push
(
@links
, [
'ltx:text'
, {
font
=>
'italic'
},
$name
,
' '
]); }
my
$phrase
= getIndexContentKey(
$see
);
if
(
my
@seelinks
=
$self
->seealsoSearch(
$doc
,
$allphrases
,
$tree
,
$see
)) {
push
(
@links
,
@seelinks
); }
else
{
Warn(
'expected'
,
$phrase
,
undef
,
"Missing index see-also term '$phrase'"
,
"(seen under $$tree{key})"
)
unless
$doc
->findnodes(
"descendant-or-self::ltx:ref"
,
$see
);
push
(
@links
, [
'ltx:text'
, {},
$see
->childNodes]); } } }
return
[
'ltx:indexentry'
, {
'xml:id'
=>
$$tree
{id} },
[
'ltx:indexphrase'
, {},
$doc
->trimChildNodes(
$$tree
{phrase})],
(
@links
? ([
'ltx:indexrefs'
, {},
@links
]) : ()),
$self
->makeIndexList(
$doc
,
$allphrases
,
$tree
)]; }
sub
alphacmp {
return
(
lc
(
$a
) cmp
lc
(
$b
)) || (
$a
cmp
$b
); }
sub
combineIndexEntries {
my
(
$self
,
$doc
,
$refs
) =
@_
;
my
@ids
=
sort
alphacmp
keys
%$refs
;
my
@links
= ();
while
(
@ids
) {
my
$id
=
shift
(
@ids
);
my
$entry
=
$$refs
{
$id
};
if
(
$$entry
{rangestart}) {
my
$startid
=
$id
;
my
$endid
=
$id
;
my
$lvl
= 1;
while
(
@ids
) {
$endid
=
shift
(
@ids
);
$lvl
--
if
$$refs
{
$endid
}{rangestart};
$lvl
--
if
$$refs
{
$endid
}{rangeend};
last
unless
$lvl
; }
push
(
@links
,
[
'ltx:text'
, {},
$self
->makeIndexRefs(
$doc
,
$startid
,
grep
{
$_
ne
'rangestart'
}
sort
keys
%$entry
),
"\x{2014}"
,
$self
->makeIndexRefs(
$doc
,
$endid
,
grep
{
$_
ne
'rangeend'
}
sort
keys
%{
$$refs
{
$endid
} })]); }
else
{
push
(
@links
,
$self
->makeIndexRefs(
$doc
,
$id
,
sort
keys
%$entry
)); } }
return
conjoin(
@links
); }
sub
makeIndexRefs {
my
(
$self
,
$doc
,
$id
,
@styles
) =
@_
;
return
(((
$styles
[0] ||
'normal'
) ne
'normal'
)
? [
'ltx:text'
, {
font
=>
$styles
[0] }, [
'ltx:ref'
, {
idref
=>
$id
,
show
=>
'typerefnum'
}]]
: [
'ltx:ref'
, {
idref
=>
$id
,
show
=>
'typerefnum'
}]); }
sub
seealsoSearch {
my
(
$self
,
$doc
,
$allphrases
,
$contexttree
,
$see
) =
@_
;
return
seealsoSearch_rec(
$doc
,
$allphrases
,
$contexttree
, seealsoPartition(
$doc
,
$see
)); }
sub
seealsoSearch_rec {
my
(
$doc
,
$allphrases
,
$contexttree
,
@parts
) =
@_
;
my
(
$link
,
@links
);
if
(
scalar
(
@parts
) < 1) {
return
(); }
elsif
(
scalar
(
@parts
) < 3) {
if
(
$link
= lookupSeealsoPhrase(
$doc
,
$allphrases
,
$contexttree
,
$parts
[0])) {
return
(
$link
, (
$parts
[1] ? cdr(
$parts
[1]) : ())); } }
elsif
(
@links
= seealsoSearch_rec(
$doc
,
$allphrases
,
$contexttree
,
seealsoJoin(
@parts
[0 .. 2]),
@parts
[3 ..
$#parts
])) {
return
@links
; }
elsif
((
$link
= lookupSeealsoPhrase(
$doc
,
$allphrases
,
$contexttree
,
$parts
[0]))
&& (
@links
= seealsoSearch_rec(
$doc
,
$allphrases
,
$contexttree
,
@parts
[2 ..
$#parts
]))) {
return
(
$link
, cdr(
$parts
[1]),
@links
); }
return
; }
sub
car {
return
$$_
[0][0]; }
sub
cdr {
my
(
$key
,
@xml
) = @{
$_
[0] };
return
@xml
; }
sub
seealsoJoin {
my
(
@parts
) =
@_
;
return
[getIndexContentKey(
join
(
''
,
map
{
$$_
[0] }
@parts
)),
map
{ cdr(
$_
) }
@parts
]; }
sub
lookupSeealsoPhrase {
my
(
$doc
,
$allphrases
,
$contexttree
,
$pair
) =
@_
;
my
(
$phrase
,
@xml
) =
@$pair
;
my
$pnc
=
$phrase
;
$pnc
=~ s/,\s*/ /sg;
my
$ps
=
$phrase
;
$ps
=~ s/(\w+)s\b/$1/sg;
my
$psnc
=
$ps
;
$psnc
=~ s/,\s*/ /sg;
my
$pnlvl
=
$phrase
;
$pnlvl
=~ s/,\s*/./sg;
foreach
my
$trial
(
$phrase
,
lc
(
$phrase
),
$pnc
,
lc
(
$pnc
),
$ps
,
lc
(
$ps
),
$psnc
,
lc
(
$psnc
),
$pnlvl
,
lc
(
$pnlvl
),
) {
my
$t
=
$contexttree
;
while
(
$t
) {
if
(
my
$id
=
$$allphrases
{ (
$$t
{fullphrasetext} ?
$$t
{fullphrasetext} .
" "
:
''
) .
$trial
}) {
return
[
'ltx:ref'
, {
idref
=>
$id
},
@xml
]; }
$t
=
$$t
{parent}; } }
return
; }
sub
seealsoPartition {
my
(
$doc
,
$see
) =
@_
;
my
@parts
= seealsoPartition_aux(
$doc
,
$see
);
my
@result
= (
shift
(
@parts
));
while
(
@parts
) {
my
$next
=
shift
(
@parts
);
my
$prev_is
= (
$result
[-1][0] =~ /^,?\s*(?:,|\.|\s+|\band\s+also|\band|\bor)\s*$/);
my
$next_is
= (
$$next
[0] =~ /^(?:,|\.|\s+|and\b|or\b)/);
if
(!(
$prev_is
xor
$next_is
)) {
my
(
$k
,
@x
) =
@$next
;
$result
[-1][0] .=
$k
;
push
(@{
$result
[-1] },
@x
); }
else
{
push
(
@result
,
$next
); } }
@parts
=
@result
;
@result
= (
shift
(
@parts
));
while
(
@parts
) {
my
$next
=
shift
(
@parts
);
if
((
$$next
[0] =~ /^\s+$/s) &&
scalar
(
@parts
)) {
my
(
$k1
,
@x1
) =
@$next
;
my
(
$k2
,
@x2
) = @{
shift
(
@parts
) };
$result
[-1][0] .=
$k1
.
$k2
;
push
(@{
$result
[-1] },
@x1
,
@x2
); }
else
{
push
(
@result
,
$next
); } }
return
@result
; }
sub
seealsoPartition_aux {
my
(
$doc
,
$see
) =
@_
;
my
@result
= ();
foreach
my
$ch
(
$see
->childNodes) {
my
$t
=
$ch
->nodeType;
if
(
$t
== XML_TEXT_NODE) {
my
$string
=
$ch
->textContent;
while
(
$string
) {
if
(
$string
=~ s/^(,|\.|\s+|and\s+also\b|and\b|or\b)//) {
push
(
@result
, [$1, $1]); }
elsif
(
$string
=~ s/^([^,\.\s]+)//) {
push
(
@result
, [getIndexContentKey($1), $1]); } }
push
(
@result
, [getIndexContentKey(
$string
),
$string
])
if
$string
; }
elsif
(
$t
!= XML_ELEMENT_NODE) { }
else
{
my
$tag
=
$doc
->getQName(
$ch
);
if
(
$tag
=~ /^(ltx:text|ltx:emph)$/) {
my
$attr
= {
map
{ (
$_
=>
$ch
->getAttribute(
$_
)) }
$ch
->attributes };
push
(
@result
,
map
{ [
$$_
[0], [
$tag
,
$attr
, cdr(
$_
)]] } seealsoPartition_aux(
$doc
,
$ch
)); }
else
{
push
(
@result
, [getIndexContentKey(
$ch
),
$ch
]); }
} }
return
@result
; }
sub
getGlossaryEntries {
my
(
$self
,
$doc
,
$glossary
) =
@_
;
my
$lists
=
$glossary
->getAttribute(
'lists'
) ||
''
;
return
()
unless
$lists
;
my
$glossary_id
=
$glossary
->getAttribute(
'xml:id'
) ||
''
;
my
%lists
=
map
{
$_
=> 1; }
split
(
','
,
$lists
);
my
@entries
= ();
foreach
my
$gkey
(
grep
{ /^GLOSSARY:/ }
$$self
{db}->getKeys) {
my
(
$ignore
,
$list
,
$key
) =
split
(
':'
,
$gkey
);
next
unless
$lists
{
$list
};
my
$gitem
=
$$self
{db}->lookup(
$gkey
);
my
$refs
=
$gitem
->getValue(
'referrers'
);
next
unless
$refs
&& %{
$refs
};
my
$term
=
$gitem
->getValue(
'phrase:name'
);
my
$desc
=
$gitem
->getValue(
'phrase:description'
);
my
$sortkey
=
$gitem
->getValue(
'phrase:sort'
) ||
$key
;
my
$initial
= (
$sortkey
=~ /^([a-zA-Z])/ ?
uc
($1) :
'*'
);
my
$id
=
$glossary_id
.
'.'
.
$key
;
push
(
@entries
,
{
initial
=>
$initial
,
sortkey
=>
$sortkey
,
formatted
=> [
'ltx:glossaryentry'
, {
lists
=>
$lists
,
'xml:id'
=>
$id
,
key
=>
$key
},
[
'ltx:glossaryphrase'
, {
role
=>
'label'
,
key
=>
$key
},
(
map
{
$doc
->cloneNode(
$_
,
'glo'
) }
$doc
->trimChildNodes(
$term
))],
[
'ltx:glossaryphrase'
, {
role
=>
'definition'
},
(
map
{
$doc
->cloneNode(
$_
,
'glo'
) }
$doc
->trimChildNodes(
$desc
))
]] });
}
return
@entries
; }
sub
makeGlossaryList {
my
(
$self
,
$doc
,
@entries
) =
@_
;
my
%hash
=
map
{ (
$$_
{sortkey} =>
$_
) }
@entries
;
my
@keys
=
$doc
->unisort(
keys
%hash
);
return
[
'ltx:glossarylist'
, {},
map
{
$hash
{
$_
}{formatted} }
@keys
]; }
sub
conjoin {
my
(
@items
) =
@_
;
my
@result
= ();
if
(
@items
) {
push
(
@result
,
shift
(
@items
));
while
(
@items
) {
push
(
@result
,
", "
,
shift
(
@items
)); } }
return
@result
; }
1;