#!/usr/bin/perl
eval
(GetOptions(
"version"
,
"help"
,
"username=s"
,
"password=s"
,
"hostname=s"
,
"database=s"
,
"socket=s"
,
"measure=s"
,
"config=s"
,
"infile=s"
,
"matrix"
,
"dbfile=s"
,
"precision=s"
,
"info"
,
"allsenses"
,
"forcerun"
,
"debug"
,
"verbose"
,
"icfrequency"
,
"icpropagation=s"
,
"realtime"
,
"stoplist=s"
,
"debugfile=s"
,
"vectormatrix=s"
,
"vectorindex=s"
,
"defraw"
,
"dictfile=s"
,
"t"
)) or
die
(
"Please check the above mentioned option(s).\n"
);
my
$debug
= 0;
if
(
defined
$opt_help
) {
$opt_help
= 1;
&showHelp
();
exit
;
}
if
(
defined
$opt_version
) {
$opt_version
= 1;
&showVersion
();
exit
;
}
if
( !(
defined
$opt_infile
) and (
scalar
(
@ARGV
) < 2) ) {
print
STDERR
"At least 2 terms or CUIs should be given on the \n"
;
print
STDERR
"command line or use the --infile option\n"
;
&minimalUsageNotes
();
exit
;
}
my
$icpropagation
=
""
;
my
$precision
=
""
;
my
$floatformat
=
""
;
my
$database
=
""
;
my
$hostname
=
""
;
my
$socket
=
""
;
my
$measure
=
""
;
my
$umls
=
""
;
my
$noscore
=
""
;
my
$infile
=
""
;
my
@input_array
= ();
&checkOptions
();
&setOptions
();
&loadUMLS
();
my
$meas
=
&loadMeasures
();
&loadInput
();
&calculateSimilarity
();
sub
calculateSimilarity {
if
(
$debug
) {
print
STDERR
"In calculateSimilarity\n"
; }
if
(
defined
$opt_matrix
) {
print
"@input_array\n"
; }
my
@secondary_array
=
@input_array
;
foreach
my
$input1
(
@input_array
) {
if
(! (
defined
$opt_matrix
) ) {
my
(
$i1
,
$i2
) =
split
/<>/,
$input1
;
$i1
=~s/^\s+//g;
$i2
=~s/\s+$//g;
$i1
=~s/^\s+//g;
$i2
=~s/\s+$//g;
$input1
=
$i1
;
@secondary_array
= ();
push
@secondary_array
,
$i2
;
}
else
{
print
"$input1 "
;
}
foreach
$input2
(
@secondary_array
) {
if
(
$debug
) {
print
STDERR
"INPUT=> $input1 : $input2\n"
; }
my
@c1
= ();
my
@c2
= ();
my
$cui_flag1
= 0;
my
$cui_flag2
= 0;
if
(
$input1
=~/C[0-9]+/) {
if
(
$umls
->
exists
(
$input1
)) {
push
@c1
,
$input1
;
}
$cui_flag1
= 1;
}
else
{
@c1
=
$umls
->getConceptList(
$input1
);
&errorCheck
(
$umls
);
}
if
(
$input2
=~/C[0-9]+/) {
if
(
$umls
->
exists
(
$input2
)) {
push
@c2
,
$input2
;
}
$cui_flag2
= 1;
}
else
{
@c2
=
$umls
->getConceptList(
$input2
);
&errorCheck
(
$umls
);
}
my
$t1
=
$input1
;
my
$t2
=
$input2
;
if
(
$cui_flag1
) {
my
@ts1
=
$umls
->getTermList(
$input1
);
&errorCheck
(
$umls
);
(
$t1
) =
@ts1
;
}
if
(
$cui_flag2
) {
my
@ts2
=
$umls
->getTermList(
$input2
);
&errorCheck
(
$umls
);
(
$t2
) =
@ts2
;
}
if
(
$debug
) {
print
STDERR
"$input1:$t1 (@c1)\n"
;
print
STDERR
"$input2:$t2 (@c2)\n"
;
}
my
%similarityHash
= ();
foreach
my
$cc1
(
@c1
) {
foreach
my
$cc2
(
@c2
) {
if
(
$debug
) {
print
STDERR
"Obtaining similarity for $cc1 and $cc2\n"
;
}
my
$score
=
""
;
$value
=
$meas
->getRelatedness(
$cc1
,
$cc2
,
$t1
,
$t2
);
&errorCheck
(
$meas
);
$score
=
sprintf
$floatformat
,
$value
;
$similarityHash
{
$cc1
}{
$cc2
} =
$score
;
}
}
my
$max_cc1
=
""
;
my
$max_cc2
=
""
;
my
$max_score
= 0;
my
$min_cc1
=
""
;
my
$min_cc2
=
""
;
my
$min_score
= 999;
foreach
my
$concept1
(
sort
keys
%similarityHash
) {
foreach
my
$concept2
(
sort
keys
%{
$similarityHash
{
$concept1
}}) {
if
(
$max_score
<=
$similarityHash
{
$concept1
}{
$concept2
}) {
$max_score
=
$similarityHash
{
$concept1
}{
$concept2
};
$max_cc1
=
$concept1
;
$max_cc2
=
$concept2
;
}
if
(
$min_score
>
$similarityHash
{
$concept1
}{
$concept2
}) {
$min_score
=
$similarityHash
{
$concept1
}{
$concept2
};
$min_cc1
=
$concept1
;
$min_cc2
=
$concept2
;
}
}
}
my
$score
= 0;
my
$cc1
=
""
;
my
$cc2
=
""
;
if
(
$measure
eq
"nam"
) {
$score
=
$min_score
;
$cc1
=
$min_cc1
;
$cc2
=
$min_cc2
;
}
else
{
$score
=
$max_score
;
$cc1
=
$max_cc1
;
$cc2
=
$max_cc2
;
}
if
(
defined
$opt_matrix
) {
print
"$score "
; }
elsif
(
defined
$opt_allsenses
) {
foreach
my
$cc1
(
sort
keys
%similarityHash
) {
foreach
my
$cc2
(
sort
keys
%{
$similarityHash
{
$cc1
}}) {
if
(
$cui_flag1
and
$cui_flag2
) {
print
"$score<>$cc1($t1)<>$cc2($t2)\n"
; }
elsif
(
$cui_flag1
) {
print
"$score<>$t1($cc1)<>$input2($cc2)\n"
; }
elsif
(
$cui_flag2
) {
print
"$score<>$input1($cc1)<>$t2($cc2)\n"
; }
else
{
print
"$score<>$input1($cc1)<>$input2($cc2)\n"
; }
}
}
}
elsif
(
$cc1
ne
""
or
$cc2
ne
""
) {
if
(
$cui_flag1
and
$cui_flag2
) {
print
"$score<>$cc1($t1)<>$cc2($t2)\n"
; }
elsif
(
$cui_flag1
) {
print
"$score<>$t1($cc1)<>$input2($cc2)\n"
; }
elsif
(
$cui_flag2
) {
print
"$score<>$input1($cc1)<>$t2($cc2)\n"
; }
else
{
print
"$score<>$input1($cc1)<>$input2($cc2)\n"
; }
}
else
{
if
(
$#c1
> -1) {
foreach
my
$cc1
(
@c1
) {
if
(
$cuiflag1
) {
print
"$noscore<>$cc1($t1)<>$input2\n"
; }
else
{
print
"$noscore<>$t1($cc1)<>$input2\n"
; }
if
(
$opt_info
) {
print
" => $input2 does not exist\n"
; }
}
}
elsif
(
$#c2
> -1) {
foreach
my
$cc2
(
@c2
) {
if
(
$cuiflag1
) {
print
"$noscore<>$input1<>$cc2($t2)\n"
; }
else
{
print
"$noscore<>$input1<>$t2($cc2)\n"
; }
if
(
$opt_info
) {
print
" => $input1 does not exist\n"
; }
}
}
else
{
print
"$noscore<>$input1<>$input2\n"
;
if
(
$opt_info
) {
print
" => $input2 nor $input1 exist\n"
; }
}
}
}
if
(
defined
$opt_matrix
) {
print
"\n"
; }
}
}
sub
loadInput {
if
(
$debug
) {
print
STDERR
"In loadInput\n"
; }
if
( (
defined
$opt_infile
) && (
defined
$opt_matrix
) ) {
if
(
$debug
) {
print
STDERR
"FILE ($opt_infile) DEFINED\n"
; }
open
(FILE,
$infile
) ||
die
"Could not open file: $infile\n"
;
my
$linecounter
= 1;
while
(<FILE>) {
chomp
;
if
(
$_
=~/^\s*$/) {
next
; }
if
(
$_
=~/C[0-9]+/) {
push
@input_array
,
$_
;
}
else
{
print
STDERR
"There is an error in the input file ($infile)\n"
;
print
STDERR
"one line $linecounter. The input is not in the\n"
;
print
STDERR
"correct format. Here is the input line:\n"
;
print
STDERR
"$_\n\n"
;
exit
;
}
}
}
elsif
(
defined
$opt_infile
) {
if
(
$debug
) {
print
STDERR
"FILE ($opt_infile) DEFINED\n"
; }
open
(FILE,
$infile
) ||
die
"Could not open file: $infile\n"
;
my
$linecounter
= 1;
while
(<FILE>) {
chomp
;
if
(
$_
=~/^\s*$/) {
next
; }
if
(
$_
=~/\<\>/) {
push
@input_array
,
$_
;
}
else
{
print
STDERR
"There is an error in the input file ($infile)\n"
;
print
STDERR
"one line $linecounter. The input is not in the\n"
;
print
STDERR
"correct format. Here is the input line:\n"
;
print
STDERR
"$_\n\n"
;
exit
;
}
}
}
else
{
if
(
$debug
) {
print
STDERR
"Command Line terms/cuis defined\n"
; }
my
$i1
=
shift
@ARGV
;
my
$i2
=
shift
@ARGV
;
if
(
$debug
) {
print
STDERR
"INPUT: $i1 $i2\n"
; }
my
$input
=
"$i1<>$i2"
;
push
@input_array
,
$input
;
}
}
sub
loadMeasures {
my
$meas
;
if
(
$measure
eq
"vector"
) {
require
"UMLS/Similarity/vector.pm"
;
my
%vectoroptions
= ();
if
(
defined
$opt_dictfile
) {
$vectoroptions
{
"dictfile"
} =
$opt_dictfile
;
}
if
(
defined
$opt_vectorindex
) {
$vectoroptions
{
"vectorindex"
} =
$opt_vectorindex
;
}
if
(
defined
$opt_debugfile
) {
$vectoroptions
{
"debugfile"
} =
$opt_debugfile
;
}
if
(
defined
$opt_vectormatrix
) {
$vectoroptions
{
"vectormatrix"
} =
$opt_vectormatrix
;
}
if
(
defined
$opt_config
) {
$vectoroptions
{
"config"
} =
$opt_config
;
}
if
(
defined
$opt_defraw
) {
$vectoroptions
{
"defraw"
} =
$opt_defraw
;
}
if
(
defined
$opt_stoplist
) {
$vectoroptions
{
"stoplist"
} =
$opt_stoplist
;
}
$meas
= UMLS::Similarity::vector->new(
$umls
,\
%vectoroptions
);
}
if
(
$measure
eq
"lch"
) {
$meas
= UMLS::Similarity::lch->new(
$umls
);
}
if
(
$measure
eq
"wup"
) {
$meas
= UMLS::Similarity::wup->new(
$umls
);
}
if
(
$measure
eq
"path"
) {
$meas
= UMLS::Similarity::path->new(
$umls
);
}
if
(
$measure
eq
"cdist"
) {
$meas
= UMLS::Similarity::cdist->new(
$umls
);
}
if
(
$measure
eq
"nam"
) {
$meas
= UMLS::Similarity::nam->new(
$umls
);
}
if
(
$measure
eq
"res"
) {
$meas
= UMLS::Similarity::res->new(
$umls
);
}
if
(
$measure
eq
"jcn"
) {
$meas
= UMLS::Similarity::jcn->new(
$umls
);
}
if
(
$measure
eq
"lin"
) {
$meas
= UMLS::Similarity::lin->new(
$umls
);
}
if
(
$measure
eq
"random"
) {
$meas
= UMLS::Similarity::random->new(
$umls
);
}
die
"Unable to create measure object.\n"
if
(!
$meas
);
(
$errCode
,
$errString
) =
$meas
->getError();
die
"$errString\n"
if
(
$errCode
);
return
$meas
;
}
sub
loadUMLS {
if
(
defined
$opt_t
) {
$option_hash
{
"t"
} = 1;
}
if
(
defined
$opt_config
) {
$option_hash
{
"config"
} =
$opt_config
;
}
if
(
defined
$opt_debug
) {
$option_hash
{
"debug"
} =
$opt_debug
;
}
if
(
defined
$opt_forcerun
) {
$option_hash
{
"forcerun"
} =
$opt_forcerun
;
}
if
(
defined
$opt_realtime
) {
$option_hash
{
"realtime"
} =
$opt_realtime
;
}
if
(
defined
$opt_verbose
) {
$option_hash
{
"verbose"
} =
$opt_verbose
;
}
if
(
defined
$opt_icpropagation
) {
$option_hash
{
"icpropagation"
} =
$opt_icpropagation
;
}
if
(
defined
$opt_icfrequency
) {
$option_hash
{
"icfrequency"
} =
$opt_icfrequency
;
}
if
(
defined
$opt_username
and
defined
$opt_password
) {
$option_hash
{
"driver"
} =
"mysql"
;
$option_hash
{
"database"
} =
$database
;
$option_hash
{
"username"
} =
$opt_username
;
$option_hash
{
"password"
} =
$opt_password
;
$option_hash
{
"hostname"
} =
$hostname
;
$option_hash
{
"socket"
} =
$socket
;
}
$umls
= UMLS::Interface->new(\
%option_hash
);
die
"Unable to create UMLS::Interface object.\n"
if
(!
$umls
);
(
$errCode
,
$errString
) =
$umls
->getError();
die
"$errString\n"
if
(
$errCode
);
&errorCheck
(
$umls
);
}
sub
checkOptions {
if
( (
defined
$opt_matrix
) && !(
defined
$opt_infile
)) {
print
STDERR
"The file must be specified using the --infile option\n"
;
&minimalUsageNotes
();
exit
;
}
if
(
defined
$opt_measure
) {
if
(
$opt_measure
=~/\b(path|wup|lch|cdist|nam|vector|res|lin|random|jcn)\b/) {
}
else
{
print
STDERR
"The measure ($opt_measure) is not defined for\n"
;
print
STDERR
"the UMLS-Similarity package at this time.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
$opt_measure
=~/vector/) {
if
(! (
defined
$opt_vectorindex
)) {
print
STDERR
"The --vectorindex and --vectormatrix option must be\n"
;
print
STDERR
"specified when using the vector measure. An example\n"
;
print
STDERR
"of the matrix and index files can be seen in the \n"
;
print
STDERR
"samples/ directory.\n\n"
;
&minimalUsageNotes
();
exit
;
}
if
(! (
defined
$opt_vectormatrix
)) {
print
STDERR
"The --vectorindex and --vectormatrix option must be\n"
;
print
STDERR
"specified when using the vector measure.An example\n"
;
print
STDERR
"of the index and matrix files can be seen in the\n"
;
print
STDERR
"samples/ directory. The vector-input.pl program can\n"
;
print
STDERR
"generate these files given your specific text. \n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_dictfile
) {
if
(! (
$opt_measure
=~/vector/) ) {
print
STDERR
"The --dictfile option is only available\n"
;
print
STDERR
"when using the vector measure.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_debugfile
) {
if
(! (
$opt_measure
=~/(vector)/) ) {
print
STDERR
"The --debugfile option is only available\n"
;
print
STDERR
"when using the vector measure.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_vectormatrix
and
defined
$opt_vectorindex
) {
if
(! (
$opt_measure
=~/vector/) ) {
print
STDERR
"The --vectormatrix and --vectorindex options are only\n"
;
print
STDERR
"available when using the vector measure.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_vectormatrix
) {
if
(! (
$opt_measure
=~/vector/) ) {
print
STDERR
"The --vectormatrix option is only available\n"
;
print
STDERR
"when using the vector measure. \n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_vectorindex
) {
if
(! (
$opt_measure
=~/vector/) ) {
print
STDERR
"The --vectorindex option is only available\n"
;
print
STDERR
"when using the vector measure.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
$opt_measure
=~/(res|lin|jcn)/) {
if
(! (
defined
$opt_icpropagation
) and !(
defined
$opt_icfrequency
) ) {
print
STDERR
"The --icpropagation or --icfrequency option must be\n"
;
print
STDERR
"specified when using the res, lin or jcn measures.\n"
;
print
STDERR
"An example of the propagation file can be seen in\n"
;
print
STDERR
"the samples/ directory.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_icpropagation
||
defined
$opt_icfrequency
) {
if
( !(
$opt_measure
=~/(res|lin|jcn)/) ) {
print
STDERR
"The --icpropagation or --icfrequency options\n"
;
print
STDERR
"may only be specified when using the res, lin\n"
;
print
STDERR
"or jcn measures.\n\n"
;
&minimalUsageNotes
();
exit
;
}
}
if
(
defined
$opt_icpropagation
and
defined
$opt_icfrequency
) {
print
STDERR
"You can specify both the --icpropagation and\n"
;
print
STDERR
"--icfrequency options at the same time.\n\n"
;
&minimalUsageNotes
();
exit
;
}
if
(
defined
$opt_precision
) {
if
(
$opt_precision
!~ /^\d+$/) {
print
STDERR
"Value for switch --precision should be integer >= 0\n"
;
&minimalUsageNotes
();
exit
;
}
}
}
sub
setOptions {
if
(
$debug
) {
print
STDERR
"In setOptions\n"
; }
my
$default
=
""
;
my
$set
=
""
;
if
(
defined
$opt_icpropagation
) {
$set
.=
" --icpropagation $opt_icpropagation\n"
;
}
if
(
defined
$opt_icfrequency
) {
$set
.=
" --icfrequency $opt_icfrequency\n"
;
}
if
(
defined
$opt_debugfile
) {
$set
.=
" --debugfile $opt_debugfile\n"
;
}
if
(
defined
$opt_vectormatrix
) {
$set
.=
" --vectormatrix $opt_vectormatrix\n"
;
}
if
(
defined
$opt_vectorindex
) {
$set
.=
" --vectorindex $opt_vectorindex\n"
;
}
if
(
defined
$opt_dictfile
) {
$set
.=
" --dictfile $opt_dictfile\n"
;
}
if
(
defined
$opt_defraw
) {
$set
.=
" --defraw\n"
;
}
if
(
defined
$opt_stoplist
) {
$set
.=
" --stoplist $opt_stoplist\n"
;
}
if
(
defined
$opt_config
) {
$config
=
$opt_config
;
$set
.=
" --config $config\n"
;
}
if
(
defined
$opt_infile
) {
$infile
=
$opt_infile
;
$set
.=
" --infile $opt_infile\n"
;
}
if
(
defined
$opt_matrix
) {
$set
.=
" --matrix\n"
;
}
$precision
= 4;
if
(
defined
$opt_precision
) {
$precision
=
$opt_precision
;
$set
.=
" --precision $precision\n"
;
}
else
{
$precision
= 4;
$default
.=
" --precision $precision\n"
;
}
$floatformat
=
join
''
,
'%'
,
'.'
,
$precision
,
'f'
;
$noscore
=
sprintf
$floatformat
, -1;
if
(
defined
$opt_username
) {
if
(
defined
$opt_username
) {
$set
.=
" --username $opt_username\n"
;
}
if
(
defined
$opt_password
) {
$set
.=
" --password XXXXXXX\n"
;
}
if
(
defined
$opt_database
) {
$database
=
$opt_database
;
$set
.=
" --database $database\n"
;
}
else
{
$database
=
"umls"
;
$default
.=
" --database $database\n"
;
}
if
(
defined
$opt_hostname
) {
$hostname
=
$opt_hostname
;
$set
.=
" --hostname $hostname\n"
;
}
else
{
$hostname
=
"localhost"
;
$default
.=
" --hostname $hostname\n"
;
}
if
(
defined
$opt_socket
) {
$socket
=
$opt_socket
;
$set
.=
" --socket $socket\n"
;
}
else
{
$socket
=
"/tmp/mysql.sock\n"
;
$default
.=
" --socket $socket\n"
;
}
}
if
(
defined
$opt_measure
) {
$measure
=
$opt_measure
;
$set
.=
" --measure $measure\n"
;
}
else
{
$measure
=
"path"
;
$default
.=
" --measure $measure\n"
;
}
if
(
defined
$opt_realtime
) {
$set
.=
" --realtime\n"
;
}
if
(
defined
$opt_debug
) {
$set
.=
" --debug\n"
;
}
if
(
defined
$opt_verbose
) {
$set
.=
" --verbose\n"
;
}
if
(
defined
$opt_info
) {
$set
.=
" --info\n"
;
}
if
(
$default
eq
""
) {
$default
=
" No default settings\n"
; }
if
(
$set
eq
""
) {
$set
=
" No user defined settings\n"
; }
print
STDERR
"Default Settings:\n"
;
print
STDERR
"$default\n"
;
print
STDERR
"User Settings:\n"
;
print
STDERR
"$set\n"
;
}
sub
errorCheck {
my
$obj
=
shift
;
(
$errCode
,
$errString
) =
$obj
->getError();
print
STDERR
"$errString\n"
if
(
$errCode
);
exit
if
(
$errCode
> 1);
}
sub
minimalUsageNotes {
print
"Usage: umls-similarity.pl [OPTIONS] [TERM1 TERM2] [CUI1 CUI2]\n"
;
&askHelp
();
exit
;
}
sub
showHelp() {
print
"This is a utility that takes as input either two terms \n"
;
print
"or two CUIs from the command line or a file and returns \n"
;
print
"the similarity between the two using either Leacock and \n"
;
print
"Chodorow, 1998 (lch), Wu and Palmer, 1994 (wup) or the \n"
;
print
"basic path measure (path)\n\n"
;
print
"Usage: umls-similarity.pl [OPTIONS] TERM1 TERM2\n\n"
;
print
"General Options:\n\n"
;
print
"--config FILE Configuration file\n\n"
;
print
"--realtime This option finds the path and propagation\n"
;
print
" information for relevant measures in realtime\n"
;
print
" rather than building an index\n\n"
;
print
"--forcerun This option will bypass any command \n"
;
print
" prompts such as asking if you would \n"
;
print
" like to continue with the index \n"
;
print
" creation. \n\n"
;
print
"--measure MEASURE The measure to use to calculate the\n"
;
print
" semantic similarity. (DEFAULT: path)\n\n"
;
print
"--precision N Displays values upto N places of decimal.\n\n"
;
print
"--allsenses This option prints out all the possible\n"
;
print
" CUIs pairs and their semantic similarity\n"
;
print
" score if one of the inputs is a term that\n"
;
print
" maps to more than one CUI. Right now we \n"
;
print
" return the CUIs that are the most similar.\n\n"
;
print
"--version Prints the version number\n\n"
;
print
"--help Prints this help message.\n\n"
;
print
"\n\nInput Options: \n\n"
;
print
"--infile FILE File containing TERM or CUI pairs\n\n"
;
print
"--matrix This option returns a matrix of similarity\n"
;
print
" scores given a file containing a list of \n"
;
print
" CUIs. File is specified using --infile.\n\n"
;
print
"\n\nDebug Options:\n\n"
;
print
"--debug Sets the UMLS-Interface debug flag on\n"
;
print
" for testing purposes\n\n"
;
print
"--verbose This option prints out the path information\n"
;
print
" to a file in your config directory.\n\n"
;
print
"--info Displays information about a concept if\n"
;
print
" it doesn't exist in the source.\n\n"
;
print
"\n\nDatabase Options: \n\n"
;
print
"--username STRING Username required to access mysql\n\n"
;
print
"--password STRING Password required to access mysql\n\n"
;
print
"--hostname STRING Hostname for mysql (DEFAULT: localhost)\n\n"
;
print
"--database STRING Database contain UMLS (DEFAULT: umls)\n\n"
;
print
"\n\nIC Measure Options:\n\n"
;
print
"--icpropagation FILE File containing the information content\n"
;
print
" of the CUIs.\n\n"
;
print
"--icfrequency FILE File containing the frequency counts\n"
;
print
" of the CUIs.\n\n"
;
print
"\n\nVector Measure Options:\n\n"
;
print
"--vectormatrix FILE The matrix file containing the vector\n"
;
print
" information for the vector measure.\n\n"
;
print
"--vectorindex FILE The index file containing the vector\n"
;
print
" information for the vector measure.\n\n"
;
print
"--debugfile FILE This prints the vector information to file,\n"
;
print
" FILE, for debugging purposes.\n\n"
;
print
"--dictfile FILE This is a dictionary file for the vector measure.\n"
;
print
" It contains the 'definitions' of a concept which\n"
;
print
" would be used rather than the definitions from the\n"
;
print
" UMLS\n\n"
;
print
"--stoplist FILE A file containing a list of words to be excluded\n"
;
print
" from the features in the vector method.\n\n"
;
print
"--defraw This is a flag for the vector measure. The \n"
;
print
" definitions used are 'cleaned'. If the --defraw\n"
;
print
" flag is set they will not be cleaned. \n\n"
;
}
sub
showVersion {
print
'$Id: umls-similarity.pl,v 1.37 2010/05/12 15:15:16 btmcinnes Exp $'
;
print
"\nCopyright (c) 2008, Ted Pedersen & Bridget McInnes\n"
;
}
sub
askHelp {
print
STDERR
"Type umls-similarity.pl --help for help.\n"
;
}