BEGIN {
use
vars
qw($VERSION @ISA @EXPORT @EXPORT_OK)
;
@ISA
=
qw(Exporter)
;
@EXPORT
=
qw()
;
@EXPORT_OK
=
qw()
;
$VERSION
=
"1.49"
;
}
my
$errorString
=
""
;
my
$errorVal
= 0;
my
%pos_map
= (
'noun'
=>
'n'
,
'n'
=>
'n'
,
'1'
=>
'n'
,
''
=>
'n'
,
'verb'
=>
'v'
,
'v'
=>
'v'
,
'2'
=>
'v'
,
'adjective'
=>
'a'
,
'adj'
=>
'a'
,
'a'
=>
'a'
,
's'
=>
'a'
,
'3'
=>
'a'
,
'5'
=>
'a'
,
'adverb'
=>
'r'
,
'adv'
=>
'r'
,
'r'
=>
'r'
,
'4'
=>
'r'
);
my
%pos_num
= (
'noun'
=>
'1'
,
'n'
=>
'1'
,
'1'
=>
'1'
,
''
=>
'1'
,
'verb'
=>
'2'
,
'v'
=>
'2'
,
'2'
=>
'2'
,
'adjective'
=>
'3'
,
'adj'
=>
'3'
,
'a'
=>
'3'
,
's'
=>
'3'
,
'3'
=>
'3'
,
'adverb'
=>
'4'
,
'adv'
=>
'4'
,
'r'
=>
'4'
,
'4'
=>
'4'
);
my
%relNameSym
= (
'ants'
=> {
'!'
=>1},
'hype'
=> {
'@'
=>1},
'inst'
=> {
'@i'
=>1},
'hypes'
=> {
'@'
=>1,
'@i'
=>1},
'hypo'
=> {
'~'
=>1},
'hasi'
=> {
'~i'
=>1},
'hypos'
=> {
'~'
=>1,
'~i'
=>1},
'mmem'
=> {
'%m'
=>1},
'msub'
=> {
'%s'
=>1},
'mprt'
=> {
'%p'
=>1},
'mero'
=> {
'%m'
=>1,
'%s'
=>1,
'%p'
=>1},
'hmem'
=> {
'#m'
=>1},
'hsub'
=> {
'#s'
=>1},
'hprt'
=> {
'#p'
=>1},
'holo'
=> {
'#m'
=>1,
'#s'
=>1,
'#p'
=>1},
'attr'
=> {
'='
=>1},
'enta'
=> {
'*'
=>1},
'caus'
=> {
'>'
=>1},
'also'
=> {
'^'
=>1},
'vgrp'
=> {
'$'
=>1},
'sim'
=> {
'&'
=>1},
'part'
=> {
'<'
=>1},
'pert'
=> {
'\\'
=>1},
'deri'
=> {
'+'
=>1},
'domn'
=> {
';c'
=>1,
';r'
=>1,
';u'
=>1},
'dmnc'
=> {
';c'
=>1},
'dmnr'
=> {
';r'
=>1},
'dmnu'
=> {
';u'
=>1},
'domt'
=> {
'-c'
=>1,
'-r'
=>1,
'-u'
=>1},
'dmtc'
=> {
'-c'
=>1},
'dmtr'
=> {
'-r'
=>1},
'dmtu'
=> {
'-u'
=>1});
my
%relSymName
= (
'!'
=>
'ants'
,
'@'
=>
'hype'
,
'@i'
=>
'inst'
,
'~'
=>
'hypo'
,
'~i'
=>
'hasi'
,
'%m'
=>
'mmem'
,
'%s'
=>
'msub'
,
'%p'
=>
'mprt'
,
'#m'
=>
'hmem'
,
'#s'
=>
'hsub'
,
'#p'
=>
'hprt'
,
'='
=>
'attr'
,
'*'
=>
'enta'
,
'>'
=>
'caus'
,
'^'
=>
'also'
,
'$'
=>
'vgrp'
,
'&'
=>
'sim'
,
'<'
=>
'part'
,
'\\'
=>
'pert'
,
'-u'
=>
'dmtu'
,
'-r'
=>
'dmtr'
,
'-c'
=>
'dmtc'
,
';u'
=>
'dmnu'
,
';r'
=>
'dmnr'
,
';c'
=>
'dmnc'
);
my
%lexnames
= (
'00'
=>
'adj.all'
,
'01'
=>
'adj.pert'
,
'02'
=>
'adv.all'
,
'03'
=>
'noun.Tops'
,
'04'
=>
'noun.act'
,
'05'
=>
'noun.animal'
,
'06'
=>
'noun.artifact'
,
'07'
=>
'noun.attribute'
,
'08'
=>
'noun.body'
,
'09'
=>
'noun.cognition'
,
'10'
=>
'noun.communication'
,
'11'
=>
'noun.event'
,
'12'
=>
'noun.feeling'
,
'13'
=>
'noun.food'
,
'14'
=>
'noun.group'
,
'15'
=>
'noun.location'
,
'16'
=>
'noun.motive'
,
'17'
=>
'noun.object'
,
'18'
=>
'noun.person'
,
'19'
=>
'noun.phenomenon'
,
'20'
=>
'noun.plant'
,
'21'
=>
'noun.possession'
,
'22'
=>
'noun.process'
,
'23'
=>
'noun.quantity'
,
'24'
=>
'noun.relation'
,
'25'
=>
'noun.shape'
,
'26'
=>
'noun.state'
,
'27'
=>
'noun.substance'
,
'28'
=>
'noun.time'
,
'29'
=>
'verb.body'
,
'30'
=>
'verb.change'
,
'31'
=>
'verb.cognition'
,
'32'
=>
'verb.communication'
,
'33'
=>
'verb.competition'
,
'34'
=>
'verb.consumption'
,
'35'
=>
'verb.contact'
,
'36'
=>
'verb.creation'
,
'37'
=>
'verb.emotion'
,
'38'
=>
'verb.motion'
,
'39'
=>
'verb.perception'
,
'40'
=>
'verb.possession'
,
'41'
=>
'verb.social'
,
'42'
=>
'verb.stative'
,
'43'
=>
'verb.weather'
,
'44'
=>
'adj.ppl'
);
my
$lexnamesFile
=
"lexnames"
;
my
@excFile
= (
""
,
"noun.exc"
,
"verb.exc"
,
"adj.exc"
,
"adv.exc"
);
my
@indexFile
= (
""
,
"index.noun"
,
"index.verb"
,
"index.adj"
,
"index.adv"
);
my
@dataFile
= (
""
,
"data.noun"
,
"data.verb"
,
"data.adj"
,
"data.adv"
);
my
$wnHomeUnix
=
defined
(
$ENV
{
"WNHOME"
}) ?
$ENV
{
"WNHOME"
} :
"/usr/local/WordNet-3.0"
;
my
$wnHomePC
=
defined
(
$ENV
{
"WNHOME"
}) ?
$ENV
{
"WNHOME"
} :
"C:\\Program Files\\WordNet\\3.0"
;
my
$wnPrefixUnix
=
defined
(
$ENV
{
"WNSEARCHDIR"
}) ?
$ENV
{
"WNSEARCHDIR"
} :
"$wnHomeUnix/dict"
;
my
$wnPrefixPC
=
defined
(
$ENV
{
"WNSEARCHDIR"
}) ?
$ENV
{
"WNSEARCHDIR"
} :
"$wnHomePC\\dict"
;
END { }
sub
getResetError
{
my
$self
=
shift
;
my
$tmpString
=
$self
->{errorString};
my
$tmpVal
=
$self
->{errorVal};
$self
->{errorString} =
""
;
$self
->{errorVal} = 0;
return
(
$tmpString
,
$tmpVal
);
}
sub
lower
{
my
$word
=
shift
;
$word
=~
tr
/A-Z /a-z_/;
$word
=~ s/\(.*\)$//;
return
$word
;
}
sub
underscore
{
$_
[0] =~
tr
/ /_/;
return
$_
[0];
}
sub
delMarker
{
$_
[0] =~ s/\(.*\)$//;
return
$_
[0];
}
sub
_initialize
{
my
$self
=
shift
;
warn
"Loading WordNet data...\n"
if
(
$self
->{verbose});
my
$old_separator
= $/;
$/ =
"\n"
;
unless
(
$self
->{noload}) {
$self
->loadExclusions ();
}
$self
->loadIndex ();
$self
->openData ();
$self
->{errorString} =
""
;
$self
->{errorVal} =
""
;
warn
"Done.\n"
if
(
$self
->{verbose});
$/ =
$old_separator
;
}
sub
new
{
my
$class
=
shift
;
my
$self
= {};
bless
$self
,
$class
;
if
(
scalar
@_
== 1) {
$self
->{dir} =
shift
;
}
else
{
my
%params
=
@_
;
$self
->{dir} =
$params
{dir}
if
$params
{dir};
$self
->{verbose} =
$params
{verbose}
if
$params
{verbose};
$self
->{noload} =
$params
{noload}
if
$params
{noload};
}
warn
"Dir = "
,
$self
->{dir},
"\n"
if
(
$self
->{verbose});
warn
"Verbose = "
,
$self
->{verbose},
"\n"
if
(
$self
->{verbose});
warn
"Noload = "
,
$self
->{noload},
"\n"
if
(
$self
->{verbose});
if
(-e
$wnPrefixUnix
) {
$self
->{dir} ||=
$wnPrefixUnix
;
$self
->{dir} .=
"/"
if
$self
->{dir} !~ m|/$|;
}
elsif
(-e
$wnPrefixPC
) {
$self
->{dir} ||=
$wnPrefixPC
;
$self
->{dir} .=
"\\"
if
$self
->{dir} !~ m|\\$|;
}
$self
->_initialize ();
return
$self
;
}
sub
DESTROY
{
my
$self
=
shift
;
for
(
my
$i
=1;
$i
<= 4;
$i
++) {
undef
$self
->{data_fh}->[
$i
];
}
}
sub
loadExclusions
{
my
$self
=
shift
;
warn
"(loadExclusions)"
if
(
$self
->{verbose});
for
(
my
$i
=1;
$i
<= 4;
$i
++)
{
my
$file
=
$self
->{dir} .
"$excFile[$i]"
;
my
$fh
= new FileHandle(
$file
);
die
"Not able to open $file: $!"
if
(!
defined
(
$fh
));
while
(
my
$line
= <
$fh
>)
{
my
(
$exc
,
@word
) =
split
(/\s+/,
$line
);
next
if
(!
@word
);
$self
->{morph_exc}->[
$i
]->{
$exc
} ||= [];
push
@{
$self
->{morph_exc}->[
$i
]->{
$exc
}},
@word
;
}
}
}
sub
loadIndex
{
my
$self
=
shift
;
warn
"(loadIndex)"
if
(
$self
->{verbose});
for
(
my
$i
=1;
$i
<= 4;
$i
++)
{
my
$file
=
$self
->{dir} .
"$indexFile[$i]"
;
${
$self
->{indexFilePaths}}[
$i
] =
$file
;
if
(!
$self
->{noload})
{
my
$fh
=
$self
->_getIndexFH(
$pos_num
{
$i
});
my
$line
;
while
(
$line
= <
$fh
>) {
$self
->{version} = $1
if
(
$line
=~ m/WordNet (\S+)/);
last
if
(
$line
=~ m/^\S/);
}
while
(1) {
my
(
$lemma
,
$pos
,
$offsets
,
$sense_cnt
,
$p_cnt
) =
$self
->_parseIndexLine(
$line
);
$self
->{
"index"
}->[
$pos_num
{
$pos
}]->{
$lemma
} =
$offsets
;
$self
->{
"tagsense_cnt"
}->[
$pos_num
{
$pos
}]->{
$lemma
} =
$sense_cnt
;
$line
= <
$fh
>;
last
if
(!
$line
);
}
warn
"\n*** Version 1.6 of the WordNet database is no longer being supported as\n*** of QueryData 1.27. It may still work, but consider yourself warned.\n"
if
(
$self
->{version} eq
"1.6"
);
warn
"\n*** Version 1.7 of the WordNet database is no longer being supported as\n*** of QueryData 1.27. It may still work, but consider yourself warned.\n"
if
(
$self
->{version} eq
"1.7"
);
}
}
}
sub
openData
{
my
$self
=
shift
;
warn
"(openData)"
if
(
$self
->{verbose});
for
(
my
$i
=1;
$i
<= 4;
$i
++)
{
my
$file
=
$self
->{dir} .
"$dataFile[$i]"
;
${
$self
->{dataFilePaths}}[
$i
] =
$file
;
$self
->_getDataFH(
$i
);
}
}
sub
removeDuplicates
{
my
(
$self
,
$aref
) =
@_
;
warn
"(removeDupliates) array="
,
join
(
" "
, @{
$aref
}),
"\n"
if
(
$self
->{verbose});
my
$i
= 0;
while
(
$i
<
$#$aref
) {
if
(
grep
{
$_
eq ${
$aref
}[
$i
]} @{
$aref
}[
$i
+1 ..
$#$aref
] ) {
splice
@$aref
,
$i
, 1;
}
else
{
$i
++;
}
}
}
sub
tokenDetach
{
my
(
$self
,
$string
) =
@_
;
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
warn
"(forms) Sense number ignored\n"
if
(
defined
(
$sense
));
die
"(tokenDetach) bad part-of-speech: pos=$pos word=$word sense=$sense"
if
(!
defined
(
$pos
) or !
defined
(
$pos_num
{
$pos
}));
my
@detach
= (
$word
);
if
(
$pos_num
{
$pos
} == 1)
{
push
@detach
, $1
if
(
$word
=~ m/^(.+)s$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+s)es$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+x)es$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+z)es$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+ch)es$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+sh)es$/);
push
@detach
, $1.
"man"
if
(
$word
=~ m/^(.+)men$/);
push
@detach
, $1.
"y"
if
(
$word
=~ m/^(.+)ies$/);
}
elsif
(
$pos_num
{
$pos
} == 2)
{
push
@detach
, $1
if
(
$word
=~ m/^(.+)s$/);
push
@detach
, $1.
"y"
if
(
$word
=~ m/^(.+)ies$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+e)s$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+)es$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+e)d$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+)ed$/);
push
@detach
, $1.
"e"
if
(
$word
=~ m/^(.+)ing$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+)ing$/);
}
elsif
(
$pos_num
{
$pos
} == 3)
{
push
@detach
, $1
if
(
$word
=~ m/^(.+)er$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+)est$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+e)r$/);
push
@detach
, $1
if
(
$word
=~ m/^(.+e)st$/);
}
$self
->removeDuplicates(\
@detach
);
return
@detach
;
}
sub
_forms
{
my
(
$self
,
$word
,
$pos
) =
@_
;
my
$lword
= lower(
$word
);
warn
"(_forms) WORD=$word POS=$pos\n"
if
(
$self
->{verbose});
if
(
$self
->{noload}) {
if
(!
exists
$self
->{morph_exc}) {
$self
->loadExclusions();
}
}
if
(
defined
(
$self
->{morph_exc}->[
$pos
]->{
$lword
})) {
return
(
$word
, @{
$self
->{morph_exc}->[
$pos
]->{
$lword
}});
}
my
@token
=
split
(/[ _]/,
$word
);
return
tokenDetach (
$self
,
$token
[0].
"#"
.
$pos
)
if
(
@token
== 1);
my
@forms
;
for
(
my
$i
=0;
$i
<
@token
;
$i
++) {
push
@{
$forms
[
$i
]}, _forms (
$self
,
$token
[
$i
],
$pos
);
}
my
@rtn
;
my
@index
;
for
(
my
$i
=0;
$i
<
@token
;
$i
++) {
$index
[
$i
] = 0; }
while
(1) {
my
$colloc
=
$forms
[0]->[
$index
[0]];
for
(
my
$i
=1;
$i
<
@token
;
$i
++) {
$colloc
.=
"_"
.
$forms
[
$i
]->[
$index
[
$i
]];
}
push
@rtn
,
$colloc
;
my
$i
;
for
(
$i
=0;
$i
<
@token
;
$i
++) {
last
if
(++
$index
[
$i
] < @{
$forms
[
$i
]});
$index
[
$i
] = 0;
}
last
if
(
$i
>=
@token
);
}
return
@rtn
;
}
sub
forms
{
my
(
$self
,
$string
) =
@_
;
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
warn
"(forms) Sense number ignored\n"
if
(
defined
(
$sense
));
warn
"(forms) WORD=$word POS=$pos\n"
if
(
$self
->{verbose});
die
"(forms) Bad part-of-speech: pos=$pos"
if
(!
defined
(
$pos
) or !
defined
(
$pos_num
{
$pos
}));
my
@rtn
= _forms (
$self
,
$word
,
$pos_num
{
$pos
});
for
(
my
$i
=0;
$i
<
@rtn
; ++
$i
) {
$rtn
[
$i
] .=
"\#$pos"
;
}
return
@rtn
;
}
sub
getSensePointers
{
my
(
$self
,
$line
,
$ptr
) =
@_
;
warn
"(getSensePointers) ptr="
,
keys
(%{
$ptr
}),
" line=\"$line\"\n"
if
(
$self
->{verbose});
my
(
@rtn
,
$w_cnt
);
(
undef
,
undef
,
undef
,
$w_cnt
,
$line
) =
split
(/\s+/,
$line
, 5);
$w_cnt
=
hex
(
$w_cnt
);
for
(
my
$i
=0;
$i
<
$w_cnt
; ++
$i
) {
(
undef
,
undef
,
$line
) =
split
(/\s+/,
$line
, 3);
}
my
$p_cnt
;
(
$p_cnt
,
$line
) =
split
(/\s+/,
$line
, 2);
for
(
my
$i
=0;
$i
<
$p_cnt
; ++
$i
) {
my
(
$sym
,
$offset
,
$pos
,
$st
);
(
$sym
,
$offset
,
$pos
,
$st
,
$line
) =
split
(/\s+/,
$line
, 5);
push
@rtn
,
$self
->getSense(
$offset
,
$pos
)
if
(
hex
(
$st
)==0 and
defined
(
$ptr
->{
$sym
}));
}
return
@rtn
;
}
sub
getWordPointers
{
my
(
$self
,
$line
,
$ptr
,
$word
) =
@_
;
warn
"(getWordPointers) ptr="
,
keys
(%{
$ptr
}),
" word=$word line=\"$line\"\n"
if
(
$self
->{verbose});
my
$lword
= lower(
$word
);
my
(
@rtn
,
$w_cnt
);
(
undef
,
undef
,
undef
,
$w_cnt
,
$line
) =
split
(/\s+/,
$line
, 5);
$w_cnt
=
hex
(
$w_cnt
);
my
@word
;
for
(
my
$i
=0;
$i
<
$w_cnt
; ++
$i
) {
(
$word
[
$i
],
undef
,
$line
) =
split
(/\s+/,
$line
, 3);
}
my
$p_cnt
;
(
$p_cnt
,
$line
) =
split
(/\s+/,
$line
, 2);
for
(
my
$i
=0;
$i
<
$p_cnt
; ++
$i
) {
my
(
$sym
,
$offset
,
$pos
,
$st
);
(
$sym
,
$offset
,
$pos
,
$st
,
$line
) =
split
(/\s+/,
$line
, 5);
next
if
(!
$st
);
my
(
$src
,
$tgt
) = (
$st
=~ m/([0-9a-f]{2})([0-9a-f]{2})/);
push
@rtn
,
$self
->getWord(
$offset
,
$pos
,
hex
(
$tgt
))
if
(
defined
(
$ptr
->{
$sym
}) and (
$word
[
hex
(
$src
)-1] =~ m/
$lword
/i));
}
return
@rtn
;
}
sub
getAllSenses
{
my
(
$self
,
$offset
,
$pos
) =
@_
;
warn
"(getAllSenses) offset=$offset pos=$pos\n"
if
(
$self
->{verbose});
my
@rtn
;
my
$line
=
$self
->_dataLookup(
$pos
,
$offset
);
my
$w_cnt
;
(
undef
,
undef
,
undef
,
$w_cnt
,
$line
) =
split
(/\s+/,
$line
, 5);
$w_cnt
=
hex
(
$w_cnt
);
my
@words
;
for
(
my
$i
=0;
$i
<
$w_cnt
; ++
$i
) {
(
$words
[
$i
],
undef
,
$line
) =
split
(/\s+/,
$line
, 3);
}
foreach
my
$word
(
@words
) {
$word
= delMarker(
$word
);
my
$lword
= lower (
$word
);
my
@offArr
=
$self
->_indexOffsetLookup(
$lword
,
$pos
);
for
(
my
$i
=0;
$i
<
@offArr
;
$i
++) {
if
(
$offArr
[
$i
] ==
$offset
) {
push
@rtn
,
"$word\#$pos\#"
.(
$i
+1);
last
;
}
}
}
return
@rtn
;
}
sub
getSense
{
my
(
$self
,
$offset
,
$pos
) =
@_
;
warn
"(getSense) offset=$offset pos=$pos\n"
if
(
$self
->{verbose});
my
$line
=
$self
->_dataLookup(
$pos
,
$offset
);
my
(
$lexfn
,
$word
);
(
undef
,
$lexfn
,
undef
,
undef
,
$word
,
$line
) =
split
(/\s+/,
$line
, 6);
$word
= delMarker(
$word
);
my
$lword
= lower(
$word
);
my
@offArr
=
$self
->_indexOffsetLookup(
$word
,
$pos
);
for
(
my
$i
=0;
$i
<
@offArr
;
$i
++) {
return
"$word\#$pos\#"
.(
$i
+1)
if
(
$offArr
[
$i
] ==
$offset
);
}
die
"(getSense) Internal error: offset=$offset pos=$pos"
;
}
sub
_getIndexFH {
my
$self
=
shift
;
my
$pos
=
shift
;
my
$fh
=
$self
->{index_fh}->[
$pos_num
{
$pos
}] ||=
FileHandle->new ( ${
$self
->{indexFilePaths}}[
$pos_num
{
$pos
}] );
unless
(
$fh
) {
die
"Couldn't open index file: "
. ${
$self
->{indexFilePaths}}[
$pos_num
{
$pos
}];
}
return
$fh
;
}
sub
_getDataFH {
my
$self
=
shift
;
my
$pos
=
shift
;
my
$fh
=
$self
->{data_fh}->[
$pos_num
{
$pos
}] ||=
FileHandle->new ( ${
$self
->{dataFilePaths}}[
$pos_num
{
$pos
}] );
unless
(
$fh
) {
die
"Couldn't open data file: "
. ${
$self
->{indexFilePaths}}[
$pos_num
{
$pos
}];
}
return
$fh
;
}
sub
_indexOffsetLookup {
my
$self
=
shift
;
my
(
$word
,
$pos
,
$sense
) =
@_
;
my
$lword
= lower (
$word
);
if
(
$sense
) {
my
$offset
;
if
(
$self
->{noload}) {
my
$line
=
$self
->_indexLookup(
$pos
,
$lword
);
my
(
$lemma
,
$pos
,
$offsets
,
$sense_cnt
,
$p_cnt
) =
$self
->_parseIndexLine(
$line
);
$offset
=
$$offsets
[
$sense
- 1]
if
(
$lemma
eq
$lword
);
}
else
{
$offset
= (
unpack
"i*"
,
$self
->{
"index"
}->[
$pos_num
{
$pos
}]->{
$lword
})[
$sense
-1]
if
(
exists
$self
->{
"index"
}->[
$pos_num
{
$pos
}]->{
$lword
});
}
return
$offset
;
}
else
{
my
@offsets
= ();
if
(
$self
->{noload}) {
my
$line
=
$self
->_indexLookup(
$pos
,
$lword
);
my
(
$lemma
,
$pos
,
$offsets
,
$sense_cnt
,
$p_cnt
) =
$self
->_parseIndexLine(
$line
);
@offsets
=
@$offsets
if
(
$lemma
eq
$lword
);
}
else
{
if
(
defined
(
$self
->{
"index"
}->[
$pos_num
{
$pos
}]->{
$lword
})) {
@offsets
= (
unpack
"i*"
,
$self
->{
"index"
}->[
$pos_num
{
$pos
}]->{
$lword
});
}
}
return
@offsets
;
}
}
sub
_indexLookup {
my
$self
=
shift
;
my
(
$pos
,
$word
) =
@_
;
my
$fh
=
$self
->_getIndexFH(
$pos
);
look(
$fh
,
$word
, 0);
my
$line
= <
$fh
>;
return
$line
;
}
sub
_dataLookup {
my
$self
=
shift
;
my
(
$pos
,
$offset
) =
@_
;
my
$fh
=
$self
->_getDataFH(
$pos
);
seek
(
$fh
,
$offset
, 0);
my
$line
= <
$fh
>;
return
$line
;
}
sub
getWord
{
my
(
$self
,
$offset
,
$pos
,
$num
) =
@_
;
warn
"(getWord) offset=$offset pos=$pos num=$num"
if
(
$self
->{verbose});
my
$fh
=
$self
->_getDataFH(
$pos
);
seek
$fh
,
$offset
, 0;
my
$line
= <
$fh
>;
my
$w_cnt
;
(
undef
,
undef
,
undef
,
$w_cnt
,
$line
) =
split
(/\s+/,
$line
, 5);
$w_cnt
=
hex
(
$w_cnt
);
my
$word
;
for
(
my
$i
=0;
$i
<
$w_cnt
; ++
$i
) {
(
$word
,
undef
,
$line
) =
split
(/\s+/,
$line
, 3);
$word
= delMarker(
$word
);
last
if
(
$i
+1 ==
$num
);
}
my
$lword
= lower(
$word
);
my
@offArr
=
$self
->_indexOffsetLookup(
$lword
,
$pos
);;
for
(
my
$i
=0;
$i
<
@offArr
;
$i
++) {
return
"$word\#$pos\#"
.(
$i
+1)
if
(
$offArr
[
$i
] ==
$offset
);
}
die
"(getWord) Bad number: offset=$offset pos=$pos num=$num"
;
}
sub
offset
{
my
(
$self
,
$string
) =
@_
;
my
(
$word
,
$pos
,
$sense
)
=
$string
=~ /^([^\
warn
"(offset) WORD=$word POS=$pos SENSE=$sense\n"
if
(
$self
->{verbose});
if
(!
defined
(
$sense
)
or !
defined
(
$pos
)
or !
defined
(
$word
)
or !
defined
(
$pos_num
{
$pos
})) {
$self
->{errorVal} = 1;
$self
->{errorString} =
"One or more bogus arguments: offset($word,$pos,$sense)"
;
return
;
}
my
$lword
= lower(
$word
);
my
$res
=
$self
->_indexOffsetLookup(
$lword
,
$pos
,
$sense
);
return
$res
if
$res
;
$self
->{errorVal} = 2;
$self
->{errorString} =
"Index not initialized properly or `$word' not found in index"
;
return
;
}
sub
lexname
{
my
(
$self
,
$string
) =
@_
;
my
$offset
=
$self
->offset(
$string
);
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
warn
"(lexname) word=$word pos=$pos sense=$sense offset=$offset\n"
if
(
$self
->{verbose});
my
$line
=
$self
->_dataLookup(
$pos
,
$offset
);
my
(
undef
,
$lexfn
,
undef
) =
split
(/\s+/,
$line
, 3);
return
$lexnames
{
$lexfn
};
}
sub
frequency
{
my
(
$self
,
$string
) =
@_
;
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
unless
(
defined
$word
and
defined
$pos
and
defined
$sense
) {
croak
"(frequency) Query string is not a valid type (3) string"
;
}
warn
"(frequency) word=$word pos=$pos sense=$sense\n"
if
$self
->{verbose};
my
$cntfile
= File::Spec->catfile (
$self
->{dir} .
'cntlist.rev'
);
open
CFH,
"<$cntfile"
or
die
"Cannot open $cntfile: $!"
;
my
$position
= Search::Dict::look (
*CFH
,
"$word\%"
, 0, 0);
while
(<CFH>) {
if
(/^
$word
\%(\d+):[^ ]+ (\d+) (\d+)/) {
next
unless
$pos_map
{$1} eq
$pos
;
next
unless
$2 eq
$sense
;
close
CFH;
return
$3;
}
else
{
last
;
}
}
close
CFH;
return
0;
}
sub
querySense
{
my
$self
=
shift
;
my
$string
=
shift
;
warn
"(querySense) STRING=$string"
if
$self
->{verbose};
my
$old_separator
= $/;
$/ =
"\n"
;
my
@rtn
;
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
die
"(querySense) Bad query string: $string"
if
(!
defined
(
$word
));
my
$lword
= lower (
$word
);
die
"(querySense) Bad part-of-speech: $string"
if
(
defined
(
$pos
) && !
$pos_num
{
$pos
});
if
(
defined
(
$sense
)) {
my
$rel
=
shift
;
warn
"(querySense) WORD=$word POS=$pos SENSE=$sense RELATION=$rel\n"
if
(
$self
->{verbose});
die
"(querySense) Relation required: $string"
if
(!
defined
(
$rel
));
die
"(querySense) Bad relation: $rel"
if
(!
defined
(
$relNameSym
{
$rel
}) and !
defined
(
$relSymName
{
$rel
})
and (
$rel
ne
"glos"
) and (
$rel
ne
"syns"
));
$rel
=
$relSymName
{
$rel
}
if
(
defined
(
$relSymName
{
$rel
}));
my
$offset
=
$self
->_indexOffsetLookup(
$lword
,
$pos
,
$sense
);
my
$line
=
$self
->_dataLookup(
$pos
,
$offset
);
if
(!
$line
) {
die
"Line not found for offset $offset!"
;
}
if
(
$rel
eq
"glos"
) {
$line
=~ m/.*\|\s*(.*)$/;
$rtn
[0] = $1;
}
elsif
(
$rel
eq
"syns"
) {
@rtn
=
$self
->getAllSenses (
$offset
,
$pos
);
}
else
{
@rtn
=
$self
->getSensePointers(
$line
,
$relNameSym
{
$rel
});
}
}
elsif
(
defined
(
$pos
)) {
warn
"(querySense) WORD=$word POS=$pos\n"
if
(
$self
->{verbose});
my
@offsets
=
$self
->_indexOffsetLookup(
$lword
,
$pos
);
$word
= underscore(delMarker(
$word
));
for
(
my
$i
=0;
$i
<
@offsets
;
$i
++) {
push
@rtn
,
"$word\#$pos\#"
.(
$i
+1);
}
}
elsif
(
defined
(
$word
)) {
warn
"(querySense) WORD=$word\n"
if
(
$self
->{verbose});
$word
= underscore(delMarker(
$word
));
for
(
my
$i
=1;
$i
<= 4;
$i
++) {
my
(
$offset
) =
$self
->_indexOffsetLookup(
$lword
,
$i
);
push
@rtn
,
"$word\#"
.
$pos_map
{
$i
}
if
$offset
;
}
}
else
{
warn
"(querySense) no results being returned"
if
$self
->{verbose};
}
$/ =
$old_separator
;
return
@rtn
;
}
sub
queryWord
{
my
$self
=
shift
;
my
$string
=
shift
;
my
$old_separator
= $/;
$/ =
"\n"
;
my
@rtn
;
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
die
"(queryWord) Bad query string: $string"
if
(!
defined
(
$word
));
my
$lword
= lower (
$word
);
die
"(queryWord) Bad part-of-speech: $string"
if
(
defined
(
$pos
) && !
$pos_num
{
$pos
});
if
(
defined
(
$sense
)) {
my
$rel
=
shift
;
warn
"(queryWord) WORD=$word POS=$pos SENSE=$sense RELATION=$rel\n"
if
(
$self
->{verbose});
die
"(queryWord) Relation required: $string"
if
(!
defined
(
$rel
));
die
"(queryWord) Bad relation: $rel"
if
((!
defined
(
$relNameSym
{
$rel
}) and !
defined
(
$relSymName
{
$rel
})));
$rel
=
$relSymName
{
$rel
}
if
(
defined
(
$relSymName
{
$rel
}));
my
$offset
=
$self
->_indexOffsetLookup(
$lword
,
$pos
,
$sense
);
my
$line
=
$self
->_dataLookup(
$pos
,
$offset
);
push
@rtn
,
$self
->getWordPointers(
$line
,
$relNameSym
{
$rel
},
$word
);
}
elsif
(
defined
(
$pos
))
{
warn
"(queryWord) WORD=$word POS=$pos\n"
if
(
$self
->{verbose});
my
@offsets
=
$self
->_indexOffsetLookup(
$lword
,
$pos
);
$word
= underscore(delMarker(
$word
));
for
(
my
$i
=0;
$i
<
@offsets
;
$i
++) {
push
@rtn
,
"$word\#$pos\#"
.(
$i
+1);
}
}
else
{
print
STDERR
"(queryWord) WORD=$word\n"
if
(
$self
->{verbose});
$word
= underscore(delMarker(
$word
));
for
(
my
$i
=1;
$i
<= 4;
$i
++) {
my
$offset
=
$self
->_indexOffsetLookup(
$lword
,
$i
);
push
@rtn
,
"$word\#"
.
$pos_map
{
$i
}
if
$offset
;
}
}
$/ =
$old_separator
;
return
@rtn
;
}
sub
validForms
{
my
(
$self
,
$string
) =
@_
;
my
(
@possible_forms
,
@valid_forms
);
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
warn
"(valid_forms) Sense number ignored: $string\n"
if
(
defined
$sense
);
if
(!
defined
(
$pos
)) {
my
@rtn
;
push
@rtn
,
$self
->validForms(
$string
.
"#n"
);
push
@rtn
,
$self
->validForms(
$string
.
"#v"
);
push
@rtn
,
$self
->validForms(
$string
.
"#a"
);
push
@rtn
,
$self
->validForms(
$string
.
"#r"
);
return
@rtn
;
}
die
"(valid_forms) Invalid part-of-speech: $pos"
if
(!
defined
(
$pos_map
{
$pos
}));
@possible_forms
=
$self
->forms (
"$word#$pos"
);
@valid_forms
=
grep
$self
->querySense (
$_
),
@possible_forms
;
return
@valid_forms
;
}
sub
_parseIndexLine {
my
$self
=
shift
;
my
$line
=
shift
;
my
(
$lemma
,
$pos
,
$sense_cnt
,
$p_cnt
,
$rline
) =
split
(/\s+/,
$line
, 5);
for
(
my
$i
=0;
$i
<
$p_cnt
; ++
$i
) {
(
undef
,
$rline
) =
split
(/\s+/,
$rline
, 2);
}
my
(
undef
,
$tagsense_cnt
,
@offsets
) =
split
(/\s+/,
$rline
);
if
(
$self
->{noload}) {
return
(
$lemma
,
$pos
, \
@offsets
,
$tagsense_cnt
);
}
else
{
return
(
$lemma
,
$pos
, (
pack
"i*"
,
@offsets
),
$tagsense_cnt
);
}
}
sub
listAllWords
{
my
(
$self
,
$pos
) =
@_
;
if
(
$self
->{noload}) {
my
@words
;
my
$fh
=
$self
->_getIndexFH(
$pos
);
seek
(
$fh
, 0, 0);
for
my
$line
(<
$fh
>) {
next
if
(
$line
=~ m/^\s/);
my
(
$lemma
,
@rest
) =
$self
->_parseIndexLine(
$line
);
push
@words
,
$lemma
;
}
return
@words
;
}
else
{
return
keys
(%{
$self
->{
"index"
}->[
$pos_num
{
$pos
}]});
}
}
sub
level
{
my
(
$self
,
$word
) =
@_
;
my
$level
;
for
(
$level
=0;
$word
; ++
$level
)
{
(
$word
) =
$self
->querySense (
$word
,
"hype"
);
}
return
$level
;
}
sub
tagSenseCnt
{
my
(
$self
,
$string
) =
@_
;
my
(
$word
,
$pos
,
$sense
) =
$string
=~ /^([^\
warn
"(tagSenseCnt) Ignorning sense: $string"
if
(
defined
(
$sense
));
die
"Word and part-of-speech required word=$word pos=$pos"
if
(!
defined
(
$word
) or !
defined
(
$pos
) or !
defined
(
$pos_num
{
$pos
}));
my
$lword
= lower(
$word
);
return
$self
->_getTagSenseCnt(
$lword
,
$pos
);
}
sub
dataPath {
my
$self
=
shift
;
return
$self
->{dir};
}
sub
_getTagSenseCnt {
my
$self
=
shift
;
my
(
$lword
,
$pos
) =
@_
;
if
(
$self
->{noload}) {
my
$line
=
$self
->_indexLookup(
$pos
,
$lword
);
my
(
$lemma
,
$pos
,
$offsets
,
$tagsense_cnt
) =
$self
->_parseIndexLine(
$line
);
return
$tagsense_cnt
if
(
$lemma
eq
$lword
);
}
else
{
return
$self
->{
"tagsense_cnt"
}->[
$pos_num
{
$pos
}]->{
$lword
};
}
}
1;
Hide Show 226 lines of Pod