use
5.006;
our
@ISA
=
qw(Exporter)
;
our
%EXPORT_TAGS
= (
'all'
=> [
qw()
] );
our
@EXPORT_OK
= ( @{
$EXPORT_TAGS
{
'all'
} } );
our
@EXPORT
=
qw( &toword &install_dic )
;
our
$VERSION
=
'0.01'
;
sub
install_dic{
my
%opt
=(
yaml
=>
undef
,
name
=>
undef
);
if
(
ref
(
$_
[0]) eq
"HASH"
) {
%opt
= (
%opt
, %{
shift
(
@_
)}) } ;
my
(
$aff
,
@dic
)=
@_
;
my
$cpaff
=1;
if
(
$aff
=~ /^from:(.*)/){
$aff
= catfile(
$Lingua::Jspell::JSPELLLIB
,
"$1.aff"
) ;
$cpaff
=0;}
my
$ya
;
open
(F,
">__$$.dic"
) or
die
(
"Error 1: $!\n"
);
for
(
@dic
){
open
(G,
$_
) or
die
(
"Error 2($_): $!\n"
);
print
F <G>;
close
G;
}
close
F;
$ya
= LoadFile(
$opt
{yaml})
if
$opt
{yaml};
my
$name
=
$opt
{name} ||
$ya
->{META}{IDS}[0] ||
$dic
[0];
if
(
$opt
{hash}){ copy(
$opt
{hash},
"__$$.hash"
); }
else
{
system
(
"jbuild __$$.dic $aff __$$.hash"
); }
if
(
$opt
{irr}){
copy(
$opt
{irr},catfile(
$Lingua::Jspell::JSPELLLIB
,
$opt
{irr}))
or
warn
(
"Error 3: $!"
);
}
copy(
"__$$.hash"
,catfile(
$Lingua::Jspell::JSPELLLIB
,
"$name.hash"
))
or
warn
(
"Error 4: $!"
);
if
(
$cpaff
){
copy(
$aff
, catfile(
$Lingua::Jspell::JSPELLLIB
,
"$name.aff"
))
or
warn
(
"Error 5: $!"
);
}
if
(
$opt
{yaml}){
copy(
$opt
{yaml}, catfile(
$Lingua::Jspell::JSPELLLIB
,
"$name.yaml"
))
or
warn
(
"Error 6: $!"
);
for
(@{
$ya
->{META}{IDS}}){
copy(
"__$$.hash"
,catfile(
$Lingua::Jspell::JSPELLLIB
,
"$_.hash"
))
or
warn
(
"Error 7: $!"
);
}
}
unlink
(
"__$$.dic"
,
"__$$.hash"
,
"__$$.dic.cnt"
,
"__$$.dic.stat"
);
}
sub
init{
my
$file
=
shift
;
my
$self
= {
filename
=>
$file
};
open
F,
$file
or
die
"Cannot open file '$file': $!\n"
;
while
(<F>) {
$self
->{ shortcut}{$1} = $2
if
(m!^
$self
->{revshortcut}{$2} = $1
if
(m!^
}
close
F;
copy(
$file
,
"$file.old"
) or
die
(
"$! cant create $file.old\n"
);
return
bless
(
$self
);
}
sub
toword{ _data2line(
@_
) }
sub
modeach_word{
my
%opt
=(
rawfea
=> 0);
my
$dic
=
shift
;
if
(
ref
(
$_
[0]) eq
"HASH"
) {
%opt
= (
%opt
, %{
shift
(
@_
)}) } ;
my
$func
=
shift
;
open
DIC,
$dic
->{filename} or
die
(
"cannot open file"
);
open
NDIC,
">$dic->{filename}.new"
or
die
(
"cannot create file $!"
);
while
(<DIC>) {
if
(m!^
chomp
;
my
(
$word
,
$class
,
$flags
,
@r
) =
split
'/'
,
$_
;
my
@flags
= (
$flags
)?
split
(//,
$flags
):();
if
(not
$opt
{rawfea}){
my
@atts
;
if
(
$class
=~ /^\$/){
@atts
= (
special
=>
$class
)}
else
{
$class
=~ s/
@atts
= (
$class
)?
split
(/[,=]/,
$class
):();
}
my
%atts
;
if
(
@atts
% 2) {
%atts
= ();
}
else
{
%atts
=
@atts
;
}
print
NDIC
$func
->(
$word
,\
%atts
,\
@flags
,
@r
) ||
$_
;
}
else
{
print
NDIC
$func
->(
$word
,
$class
,\
@flags
,
@r
) ||
$_
;
}
print
NDIC
"\n"
;
}
close
DIC;
close
NDIC;
copy(
"$dic->{filename}.new"
,
$dic
->{filename});
}
sub
foreach_word {
my
%opt
=(
type
=>
"struct"
);
my
$dic
=
shift
;
if
(
ref
(
$_
[0]) eq
"HASH"
) {
%opt
= (
%opt
, %{
shift
(
@_
)}) } ;
my
$func
=
shift
;
open
DIC,
$dic
->{filename} or
die
(
"cannot open file"
);
while
(<DIC>) {
next
if
m!^
next
if
m!^\s*$!;
chomp
;
my
(
$word
,
$class
,
$flags
,
@r
) =
split
'/'
,
$_
;
if
(
$opt
{type} eq
"struct"
){
$class
=~ s/
my
@flags
= (
$flags
)?
split
(//,
$flags
):();
my
@atts
= (
$class
)?
split
(/[,=]/,
$class
):();
my
%atts
;
if
(
@atts
% 2) {
%atts
= ();
}
else
{
%atts
=
@atts
;
}
$func
->(
$word
,\
%atts
,\
@flags
,
@r
); }
elsif
(
$opt
{type} eq
"raw"
){
$func
->(
$_
); }
}
close
DIC;
}
sub
for_this_cat_I_want_only_these_flags {
my
$dic
=
shift
;
my
$cat
=
shift
;
$cat
=~ s/
my
$flags
=
shift
;
my
%flags
;
@flags
{
split
//,
$flags
}=1;
foreach_word(
$dic
,
sub
{
my
(
$w
,
$a
,
$f
) =
@_
;
my
%fs
=
%flags
;
my
$ct
=
$cat
;
my
$this_cat
=
$a
->{CAT} ||
"unknown"
;
if
(
$this_cat
eq
$ct
) {
my
$fl
;
for
$fl
(
@$f
) {
unless
(
exists
(
$fs
{
$fl
})) {
print
"$w from category '$cat' uses flag '$fl'\n"
;
}
}
}
});
}
sub
for_this_cat_I_dont_want_these_flags {
my
$dic
=
shift
;
my
$cat
=
shift
;
$cat
=~ s/
my
$flags
=
shift
;
my
%flags
;
@flags
{
split
//,
$flags
}=1;
foreach_word(
$dic
,
sub
{
my
(
$w
,
$a
,
$f
) =
@_
;
my
%fs
=
%flags
;
my
$ct
=
$cat
;
my
$this_cat
=
$a
->{CAT} ||
"unknown"
;
if
(
$this_cat
eq
$ct
) {
my
$fl
;
for
$fl
(
@$f
) {
if
(
exists
(
$fs
{
$fl
})) {
print
"$w from category '$cat' uses flag '$fl'\n"
;
}
}
}
});
}
sub
not_categorized {
my
$dic
=
shift
;
open
DIC,
$dic
->{filename} or
die
(
"Cannot open file"
);
while
(<DIC>) {
chomp
;
next
if
/^
next
if
/^\s*$/;
m{^([^/]+)/};
my
$word
= $1;
my
$cat
= $';
next
unless
(
$cat
=~ m!^/!);
print
"word '$word' doesn't have a categorie\n"
;
}
close
DIC;
}
sub
extra_words {
my
$dic
=
shift
;
my
%from
;
my
(
$r
,
$word
,
$fea
,
$fea1
,
$t
);
my
$jdic
= Lingua::Jspell->new(
"port"
);
open
DIC,
$dic
->{filename} or
die
(
"Cannot open file"
);
while
(<DIC>) {
chomp
;
next
if
/^
next
if
/^\s*$/;
m{^([^/]+)/};
$word
= $1;
my
@rads
=
$jdic
->rad(
$word
);
if
(
@rads
> 1) {
print
STDERR
"."
if
rand
> 0.99;
for
$r
(
@rads
) {
next
if
(
$r
eq
$word
);
for
$fea
(
$jdic
->fea(
$word
)) {
if
(
$fea
->{rad} eq
$word
) {
for
$fea1
(fea(
$r
)) {
if
(_same_cat(
$fea1
->{CAT},
$fea
->{CAT})) {
$from
{
$r
} = {
word
=>
$word
,
orig
=>
$fea1
,
dest
=>
$fea
};
}
}
}
}
}
}
}
close
DIC;
for
(
keys
%from
) {
if
(
$from
{
$from
{
$_
}{word}}{word}) {
print
"# warning: multiple dependence\n"
;
print
"# \t$_ => $from{$_}{word} => $from{$from{$_}{word}}{word}\n"
;
delete
(
$from
{
$_
});
}
else
{
print
"# from $_ you can get $from{$_}{word}\n"
;
print
"delete_word('$from{$_}{word}')\n"
;
}
}
}
sub
_same_cat {
my
(
$a
,
$b
) =
@_
;
if
(
defined
(
$a
) &&
defined
(
$b
)) {
return
(
$a
eq
$b
);
}
else
{
return
0;
}
}
sub
add_word {
my
$dict
=
shift
;
$dict
->_add_full_line(
map
{
my
$word
=
$_
->{word};
my
$flags
=
$_
->{flags};
my
$comment
=
$_
->{comment} ||
""
;
delete
(
$_
->{word});
delete
(
$_
->{flags});
delete
(
$_
->{comment});
my
%hash
=
%$_
;
my
$info
=
join
(
","
,
map
{
"$_=$hash{$_}"
}
keys
%hash
);
"$word/$info/$flags/$comment"
}
@_
);
}
sub
_add_full_line {
my
$dict
=
shift
;
my
%saw
=();
@saw
{
@_
} = ();
my
@v
;
open
DIC,
$dict
->{filename} or
die
(
"cannot open dictionary file"
);
open
NDIC,
">$dict->{filename}.new"
or
die
(
"cannot open new dictionary file"
);
while
(<DIC>) {
push
@v
,
$_
and
next
if
(/^
chomp
;
$saw
{
$_
} = 1;
}
print
NDIC
join
""
,
@v
;
print
NDIC
"\n\n"
;
print
NDIC
map
{/./ ? (
"$_\n"
):()}
sort
keys
%saw
;
close
DIC;
close
NDIC;
copy(
"$dict->{filename}.new"
,
$dict
->{filename});
}
sub
delete_word {
my
$dict
=
shift
;
my
$pal
=
shift
;
my
$t
;
open
DIC,
$dict
->{filename} or
die
(
"cannot open dictionary file"
);
open
NDIC,
">$dict->{filename}.new"
or
die
(
"cannot open new dictionary file"
);
while
(<DIC>) {
$t
= $1
if
/^(.+?)\//;
print
NDIC
unless
(
$t
=~/^
$pal
$/);
}
close
DIC;
close
NDIC;
copy(
"$dict->{filename}.new"
,
$dict
->{filename});
}
sub
add_flag {
my
$dic
=
shift
;
my
$flag
=
shift
;
my
%words
;
@words
{
@_
} = 1;
$dic
-> foreach_word(
sub
{
my
(
$w
,
$a
,
$f
) =
@_
;
my
%fs
;
@fs
{
@$f
}=1;
if
(
$words
{
$w
}) {
@fs
{
split
//,
$flag
}=1;;
print
_data2line(
$w
,
$a
,
join
(
""
,
keys
%fs
));
}
print
_data2line(
$w
,
$a
,
$f
);
});
}
sub
_data2line {
my
(
$word
,
$atts
,
$flags
,
@r
) =
@_
;
if
(
ref
$atts
){
return
"$word/"
.
join
(
","
,
map
{
"$_=$atts->{$_}"
}
keys
%$atts
).
"/"
.
join
(
""
,
grep
{/./}
@$flags
).
"/"
.
join
(
"/"
,
@r
);
}
else
{
return
"$word/"
.
$atts
.
"/"
.
join
(
""
,
grep
{/./}
@$flags
).
"/"
.
join
(
"/"
,
@r
);
}
}
1;