use
5.008004;
our
$VERSION
=
'0.07'
;
my
$STOPPED
;
my
%measure
;
my
$wn
;
use
constant
CONFIG
=>
$ENV
{ HOME }.
"/.wordnet-similarity"
;
sub
new
{
my
(
$class
) =
@_
;
my
$self
= {};
bless
$self
,
$class
;
}
sub
initialize
{
my
(
$self
) =
@_
;
$self
->{ wn } = WordNet::QueryData->new;
$self
->{ measure }{
"path"
} = WordNet::Similarity::path->new(
$self
->{ wn },CONFIG.
"/config-path.conf"
);
$self
->{ measure }{
"hso"
} = WordNet::Similarity::hso->new(
$self
->{ wn },CONFIG.
"/config-hso.conf"
);
$self
->{ measure }{
"lesk"
} = WordNet::Similarity::lesk->new(
$self
->{ wn });
$self
->{ measure }{
"lin"
} = WordNet::Similarity::lin->new(
$self
->{ wn });
$self
->{ measure }{
"random"
} = WordNet::Similarity::random->new(
$self
->{ wn });
$self
->{ measure }{
"wup"
} = WordNet::Similarity::wup->new(
$self
->{ wn },CONFIG.
"/config-wup.conf"
);
$self
->{ measure }{
"jcn"
} = WordNet::Similarity::jcn->new(
$self
->{ wn });
$self
->{ measure }{
"res"
} = WordNet::Similarity::res->new(
$self
->{ wn });
$self
->{ measure }{
"vector_pairs"
} = WordNet::Similarity::vector_pairs->new(
$self
->{ wn });
$self
->{ measure }{
"lch"
} = WordNet::Similarity::lch->new(
$self
->{ wn },CONFIG.
"/config-lch.conf"
);
}
sub
compute_similarity
{
my
(
$self
,
$word1
,
$word2
,
$measure_index
) =
@_
;
my
@allmeasures
= (
"hso"
,
"lch"
,
"lesk"
,
"lin"
,
"jcn"
,
"path"
,
"random"
,
"res"
,
"vector_pairs"
,
"wup"
);
my
@word1senses
=[];
my
@word2senses
=[];
if
(
$self
->{ STOPPED }==0)
{
@word1senses
=
$self
->find_allsenses(
$word1
);
}
if
(
$self
->{ STOPPED }==0)
{
@word2senses
=
$self
->find_allsenses(
$word2
);
}
my
$measurename
=
$allmeasures
[
$measure_index
-1];
my
$word1sense
;
my
$word2sense
;
my
%values
=();
my
%errors
=();
my
%measure
=();
my
%traces
=();
my
$module
;
my
$value
;
foreach
$word1sense
(
@word1senses
)
{
while
(Gtk2->events_pending)
{
Gtk2->main_iteration;
}
foreach
$word2sense
(
@word2senses
)
{
while
(Gtk2->events_pending)
{
Gtk2->main_iteration;
}
if
(
$self
->{ STOPPED }==0)
{
if
(
$measure_index
!= 0)
{
if
(
$self
->{ STOPPED } == 0)
{
$value
=
$self
->{ measure }{
$allmeasures
[
$measure_index
-1]}->getRelatedness(
$word1sense
,
$word2sense
);
my
(
$error
,
$errorString
) =
$self
->{ measure }{
$allmeasures
[
$measure_index
-1]}->getError();
if
(
$error
)
{
$values
{
$word1sense
}{
$word2sense
}=-1;
$errors
{
$word1sense
}{
$word2sense
}=
$errorString
;
}
else
{
$values
{
$word1sense
}{
$word2sense
}=
$value
;
$traces
{
$word1sense
}{
$word2sense
}{
$allmeasures
[
$measure_index
-1]}=
$self
->{ measure }{
$allmeasures
[
$measure_index
-1]}->getTraceString;
}
}
}
else
{
foreach
$module
(
@allmeasures
)
{
if
(
$self
->{ STOPPED } == 0)
{
$value
=
$self
->{ measure }{
$module
}->getRelatedness(
$word1sense
,
$word2sense
);
my
(
$error
,
$errorString
) =
$self
->{ measure }{
$module
}->getError();
if
(
$error
)
{
$values
{
$word1sense
}{
$word2sense
}{
$module
}=-1;
$errors
{
$word1sense
}{
$word2sense
}{
$module
}=
$errorString
;
}
else
{
$values
{
$word1sense
}{
$word2sense
}{
$module
}=
$value
;
$traces
{
$word1sense
}{
$word2sense
}{
$module
}=
$self
->{ measure }{
$module
}->getTraceString;
}
}
}
}
}
}
}
return
(\
%values
, \
%errors
,\
%traces
);
}
sub
find_allsenses
{
my
(
$self
,
$word
)=
@_
;
my
@temp
=
split
'#'
,
$word
;
my
$wordlevel
=
$#temp
+1;
my
$pos
;
my
@wordsenses
= ();
my
@wordsense
;
if
(
$wordlevel
==1)
{
@temp
=
$self
->{ wn }->queryWord(
$word
);
foreach
$pos
(
@temp
)
{
@wordsense
=
$self
->{ wn }->queryWord(
$pos
);
push
(
@wordsenses
,
@wordsense
);
@wordsense
= ();
}
}
elsif
(
$wordlevel
==2)
{
@wordsenses
=
$self
->{ wn }->queryWord(
$word
);
}
else
{
$wordsenses
[0]=
$word
}
return
@wordsenses
;
}
sub
convert_to_meta
{
my
(
$self
,
$word1
,
$word2
,
$tracestring
,
$measure
) =
@_
;
my
@trace
=
split
"\n"
,
$tracestring
;
my
$length
=
$#trace
;
my
$i
;
my
%uniquepaths
;
my
$path
;
my
$w2tree
;
my
@synsets
= ();
my
$synset
;
my
%lcs_path
;
my
%tree
;
my
$wtree
;
my
$alt_path
;
my
%alt_paths
;
my
%allpaths
;
my
$maxdepth
=0;
my
$trace_return
;
if
(
$measure
=~ /path/)
{
my
@paths
=
grep
/Shortest path/,
@trace
;
my
@pathlengths
=
grep
/Path
length
/,
@trace
;
my
@hypertrees
=
grep
/HyperTree/,
@trace
;
my
$pathlength
=
$pathlengths
[0];
foreach
$i
(0...
$#hypertrees
)
{
if
(
length
(
$hypertrees
[
$i
])>
$maxdepth
)
{
$maxdepth
=
length
(
$hypertrees
[
$i
]);
}
$hypertrees
[
$i
]=~ s/\
*Root
\*/Root/;
$hypertrees
[
$i
]=~ s/HyperTree: //;
}
foreach
$path
(
@paths
)
{
$path
=~ s/\
*Root
\*/Root/;
$path
=~ s/Shortest path: //;
if
(
length
(
$path
)>0)
{
$uniquepaths
{
$path
}=1;
$allpaths
{
$path
}=1;
}
}
my
@syns1
;
my
@syns2
;
my
$syn
;
my
$all_lcs
=
""
;
my
@word1tree
;
@syns1
=
$self
->{ wn }->querySense(
$word1
,
"syns"
);
foreach
$syn
(
@syns1
)
{
push
@word1tree
,
grep
(/\b
$syn
\b/,
@hypertrees
);
}
my
@word2tree
;
@syns2
=
$self
->{ wn }->querySense(
$word2
,
"syns"
);
foreach
$syn
(
@syns2
)
{
push
@word2tree
,
grep
(/\b
$syn
\b/,
@hypertrees
);
}
if
(
$#word2tree
==
$#hypertrees
)
{
@word2tree
= ();
foreach
$syn
(
@syns1
)
{
push
@word2tree
,
grep
(!/\b
$syn
\b/,
@hypertrees
);
}
}
if
(
$#word1tree
==
$#hypertrees
)
{
@word1tree
= ();
foreach
$syn
(
@syns2
)
{
push
@word1tree
,
grep
(!/\b
$syn
\b/,
@hypertrees
);
}
}
@pathlengths
= ();
@trace
=();
foreach
$path
(
keys
%uniquepaths
)
{
@synsets
=
split
" "
,
$path
;
PATH:
foreach
$w2tree
(
@word2tree
)
{
foreach
$synset
(
@synsets
)
{
if
(
$w2tree
=~/
$synset
/)
{
$lcs_path
{
$path
}{
$synset
}=1;
if
(
length
(
$all_lcs
) > 0)
{
$all_lcs
=
$synset
.
" "
.
$all_lcs
;
}
else
{
$all_lcs
=
$synset
}
last
PATH;
}
}
}
}
my
%lcs_root_paths
= ();
my
$lcs_root
;
foreach
$path
(
@hypertrees
)
{
$lcs_root
=
''
;
@synsets
=
split
" "
,
$path
;
foreach
$synset
(
@synsets
)
{
if
(
length
(
$lcs_root
)!=0 )
{
$lcs_root
=
$synset
.
" hypernym "
.
$lcs_root
;
}
else
{
$lcs_root
=
$synset
;
}
if
(
$all_lcs
=~/
$synset
/)
{
last
;
}
}
$lcs_root_paths
{
$lcs_root
}++;
$allpaths
{
$lcs_root
}=1;
$alt_paths
{
$lcs_root
}=1;
}
my
%w2_paths
=();
my
$w2_path
;
foreach
$path
(
keys
%uniquepaths
)
{
$w2_path
=
''
;
@synsets
=
split
" "
,
$path
;
foreach
$synset
(
reverse
@synsets
)
{
if
(
length
(
$w2_path
)!=0 )
{
$w2_path
=
$w2_path
.
" hypernym "
.
$synset
;
}
else
{
$w2_path
=
$synset
;
}
if
(
exists
$lcs_path
{
$path
}{
$synset
})
{
last
;
}
}
$w2_paths
{
$w2_path
}++;
}
my
%w1_paths
=();
my
$w1_path
;
foreach
$path
(
keys
%uniquepaths
)
{
$w1_path
=
''
;
@synsets
=
split
" "
,
$path
;
foreach
$synset
(
@synsets
)
{
if
(
length
(
$w1_path
)!=0 )
{
$w1_path
=
$w1_path
.
" hypernym "
.
$synset
;
}
else
{
$w1_path
=
$synset
;
}
if
(
exists
$lcs_path
{
$path
}{
$synset
})
{
$all_lcs
=
$synset
;
last
;
}
}
$w1_paths
{
$w1_path
}++;
}
my
$key
;
$trace_return
=
$measure
.
"\n"
;
my
$j
=0;
foreach
$key
(
keys
%w1_paths
)
{
if
(
$j
==0)
{
$trace_return
=
$trace_return
.
$key
;
}
else
{
$trace_return
=
$trace_return
.
" OR "
.
$key
;
}
$j
++;
}
$trace_return
=
$trace_return
.
"\n"
;
$j
=0;
foreach
$key
(
keys
%w2_paths
)
{
if
(
$j
==0)
{
$trace_return
=
$trace_return
.
$key
;
}
else
{
$trace_return
=
$trace_return
.
" OR "
.
$key
;
}
$j
++;
}
$trace_return
=
$trace_return
.
"\n"
;
foreach
$key
(
keys
%alt_paths
)
{
if
(
$key
=~ /
$all_lcs
/)
{
$trace_return
=
$trace_return
.
$key
.
"\n"
;
}
}
$trace_return
=
$trace_return
.
"Max Depth = "
.
$maxdepth
.
"\n"
;
$trace_return
=
$trace_return
.
$pathlength
.
"\n"
;
}
elsif
(
$measure
=~/wup/)
{
my
@depth
=
grep
/Depth/,
@trace
;
my
@word1_depth
;
my
$syn
;
my
@syns1
;
my
@syns2
;
@syns1
=
$self
->{ wn }->querySense(
$word1
,
"syns"
);
foreach
$syn
(
@syns1
)
{
push
@word1_depth
,
grep
/
$syn
/,
@depth
;
}
my
@word2_depth
;
@syns2
=
$self
->{ wn }->querySense(
$word2
,
"syns"
);
foreach
$syn
(
@syns2
)
{
push
@word2_depth
,
grep
/
$syn
/,
@depth
;
}
my
@lcs_depth
=
grep
/Lowest\sCommon\sSubsumers/,
@depth
;
my
@hypertrees
=
grep
/HyperTree/,
@trace
;
foreach
$i
(0...
$#hypertrees
)
{
if
(
length
(
$hypertrees
[
$i
])>
$maxdepth
)
{
$maxdepth
=
length
(
$hypertrees
[
$i
]);
}
$hypertrees
[
$i
]=~ s/\
*Root
\*/Root/;
$hypertrees
[
$i
]=~ s/HyperTree: //;
}
my
$w_tree
;
my
$tree
;
my
%trace_trees
;
foreach
$w_tree
(
@hypertrees
)
{
$tree
=
''
;
@synsets
=
split
" "
,
$w_tree
;
foreach
$synset
(
reverse
@synsets
)
{
if
(
length
(
$tree
)!=0 )
{
$tree
=
$tree
.
" hypernym "
.
$synset
;
}
else
{
$tree
=
$synset
;
}
}
$trace_trees
{
$tree
}++;
}
my
$lcs
=
$lcs_depth
[0];
my
$key
;
my
$temptree
=
join
" "
,
@hypertrees
;
$lcs
=~ s/Lowest\sCommon\sSubsumers:\s//;
$lcs
=~ s/\
*Root
\*/Root/;
$lcs
=~ s/\s\(Depth=\d\)//;
my
$depth_word1
;
my
$depth_word2
;
my
@temp
;
if
(
$#word1_depth
>0)
{
@temp
=
split
/=/,
$word1_depth
[0];
$depth_word1
=
$word1
.
" = "
.
$temp
[1];
}
else
{
foreach
$syn
(
@syns1
)
{
if
(
$temptree
=~ /
$syn
/)
{
$depth_word1
=
$syn
;
last
;
}
}
}
if
(
$#word2_depth
>0)
{
@temp
=
split
/=/,
$word2_depth
[0];
$depth_word2
=
$word2
.
" = "
.
$temp
[1];
}
else
{
foreach
$syn
(
@syns2
)
{
if
(
$temptree
=~/
$syn
/)
{
$depth_word2
=
$syn
;
last
;
}
}
}
@temp
=
split
/=/,
$lcs_depth
[0];
$temp
[1] =~ s/\)//;
$lcs
=
$lcs
.
" = "
.
$temp
[1];
$trace_return
=
$measure
.
"\n"
;
foreach
$key
(
keys
%trace_trees
)
{
$trace_return
=
$trace_return
.
$key
.
"\n"
;
}
$trace_return
=
$trace_return
.
$depth_word1
.
"\n"
;
$trace_return
=
$trace_return
.
$depth_word2
.
"\n"
;
$trace_return
=
$trace_return
.
$lcs
;
}
elsif
(
$measure
=~/lch/)
{
@trace
=
split
/Lowest\sCommon\sSubsumer\(s\):\s/,
$tracestring
;
my
@hypertrees
=
split
/\n/,
$trace
[0];
my
@lcs_temp
=
split
/\n/,
$trace
[1];
my
$key
;
my
@lcs_split
;
my
%lcs
;
foreach
$key
(
@lcs_temp
)
{
@lcs_split
=
split
" "
,
$key
;
$lcs
{
$lcs_split
[0]}=1;
}
foreach
$i
(0...
$#hypertrees
)
{
if
(
length
(
$hypertrees
[
$i
])>
$maxdepth
)
{
$maxdepth
=
length
(
$hypertrees
[
$i
]);
}
$hypertrees
[
$i
]=~ s/\
*Root
\*/Root/;
$hypertrees
[
$i
]=~ s/HyperTree: //;
}
my
@syns1
;
my
@syns2
;
my
$syn
;
my
@word1tree
;
@syns1
=
$self
->{ wn }->querySense(
$word1
,
"syns"
);
foreach
$syn
(
@syns1
)
{
push
@word1tree
,
grep
(/\b
$syn
\b/,
@hypertrees
);
}
my
@word2tree
;
@syns2
=
$self
->{ wn }->querySense(
$word2
,
"syns"
);
foreach
$syn
(
@syns2
)
{
push
@word2tree
,
grep
(/\b
$syn
\b/,
@hypertrees
);
}
if
(
$#word1tree
==
$#hypertrees
)
{
@word1tree
= ();
foreach
$syn
(
@syns1
)
{
push
@word1tree
,
grep
(!/\b
$syn
\b/,
@hypertrees
);
}
}
if
(
$#word2tree
==
$#hypertrees
)
{
@word2tree
= ();
foreach
$syn
(
@syns2
)
{
push
@word2tree
,
grep
(!/\b
$syn
\b/,
@hypertrees
);
}
}
my
%w1_paths
=();
my
$w1_path
;
my
$j
=0;
my
$all_lcs
=
""
;
foreach
$path
(
@word1tree
)
{
$w1_path
=
''
;
$j
=0;
@synsets
=
split
" "
,
$path
;
foreach
$synset
(
reverse
@synsets
)
{
$j
++;
if
(
length
(
$w1_path
)!=0 )
{
$w1_path
=
$w1_path
.
" hypernym "
.
$synset
;
}
else
{
$w1_path
=
$synset
;
}
if
(
exists
$lcs
{
$synset
})
{
$all_lcs
=
$synset
;
last
;
}
}
if
(
length
(
$all_lcs
)>0)
{
$w1_paths
{
$all_lcs
}{
$w1_path
}=
$j
;
}
else
{
$w1_paths
{
"Root#n#1"
}{
$w1_path
}=
$j
;
}
}
my
%w2_paths
=();
my
$w2_path
;
$j
=0;
foreach
$path
(
@word2tree
)
{
$w2_path
=
''
;
$j
=0;
@synsets
=
split
" "
,
$path
;
foreach
$synset
(
reverse
@synsets
)
{
$j
++;
if
(
length
(
$w2_path
)!=0 )
{
$w2_path
=
$w2_path
.
" hypernym "
.
$synset
;
}
else
{
$w2_path
=
$synset
;
}
if
(
exists
$lcs
{
$synset
})
{
$all_lcs
=
$synset
;
last
;
}
}
if
(
length
(
$all_lcs
)>0)
{
$w2_paths
{
$all_lcs
}{
$w2_path
}=
$j
;
}
else
{
$w2_paths
{
"Root#n#1"
}{
$w2_path
}=
$j
;
}
}
my
$lcs_synset
;
my
$length
= 100;
foreach
$lcs_synset
(
keys
%w1_paths
)
{
foreach
$w1_path
(
keys
%{
$w1_paths
{
$lcs_synset
}})
{
foreach
$w2_path
(
keys
%{
$w2_paths
{
$lcs_synset
}})
{
if
(
$length
>
$w1_paths
{
$lcs_synset
}{
$w1_path
}+
$w2_paths
{
$lcs_synset
}{
$w2_path
})
{
$path
=
$w1_path
.
"\n"
.
$w2_path
;
$length
=
$w1_paths
{
$lcs_synset
}{
$w1_path
}+
$w2_paths
{
$lcs_synset
}{
$w2_path
};
}
elsif
(
$length
==
$w1_paths
{
$lcs_synset
}{
$w1_path
}+
$w2_paths
{
$lcs_synset
}{
$w2_path
})
{
$path
=
$path
.
"\n"
.
$w1_path
.
"\n"
.
$w2_path
;
}
}
}
}
$trace_return
=
$measure
.
"\n"
;
$length
--;
$trace_return
=
$trace_return
.
$path
.
"\n"
.
$length
;
return
$trace_return
;
}
elsif
(
$measure
=~ /hso/)
{
my
$trace_return
=
"hso"
;
if
(
$tracestring
=~ /MedStrong\srelation\spath\.\.\./)
{
$tracestring
=~ s/\n//g;
@trace
=
split
/MedStrong\srelation\spath\.\.\./,
$tracestring
;
my
@path
;
foreach
$i
(1...
$#trace
)
{
chomp
$trace
[
$i
];
$path
[
$i
-1] =
$trace
[
$i
];
$path
[
$i
-1] =~ s/\[U\]/hyponym/g;
$path
[
$i
-1] =~ s/\[D\]/hypernym/g;
$path
[
$i
-1] =~ s/\[H\]/merynym/g;
$trace_return
=
$trace_return
.
"\n"
.
$path
[
$i
-1];
}
}
elsif
(
$tracestring
=~/Strong\sRel\s\(Synset\sMatch\)/)
{
$tracestring
=~ s/\n//g;
@trace
=
split
/Strong\sRel\s\(Synset\sMatch\)\s:\s/,
$tracestring
;
$trace_return
=
$trace_return
.
"\n"
.
$trace
[1];
}
else
{
$trace_return
=
$trace_return
.
"\n-1"
;
}
return
$trace_return
;
}
}
1;