#!/usr/bin/perl -w
use lib qw(.);
use Lingua::LTS::Gfsm;
use Encode qw(encode decode);
use File::Basename qw(basename);
use Getopt::Long qw(:config no_ignore_case);
use IO::File;
use Pod::Usage;
use locale;
##------------------------------------------------------------------------------
## Constants & Globals
##------------------------------------------------------------------------------
##-- analysis object
our $lts = Lingua::LTS::Gfsm->new(
check_symbols=>1,
tolower =>1,
profile =>0,
);
##-- analysis object: filenames
our $lts_labfile = undef;
our $lts_fstfile = undef;
our $lts_dictfile = undef;
##-- analysis options
our $queryenc = undef;
##-- program options
our $verbose = 1;
our $progname = basename($0);
##------------------------------------------------------------------------------
## Command-line
##------------------------------------------------------------------------------
GetOptions(##-- General
'help|h' => \$help,
'verbose|v!' => \$verbose,
##-- Analysis Objects
'labels|labs|lab|l|symbols|syms|sym|s=s' => \$lts_labfile,
'fst|f|m=s' => \$lts_fstfile,
'dictionary|dict|d=s' => \$lts_dictfile,
##-- Analysis Options
'label-encoding|labencoding|labenc|le=s' => \$lts->{labenc},
'query-encoding|queryenc|qenc|qe|q=s' => \$queryenc,
'check-symbols|check|c!' => \$lts->{check_symbols},
'tolower|lower|L=s' => \$lts->{tolower},
);
pod2usage({-exitval=>0, -verbose=>0}) if ($help);
pod2usage({-msg=>"No query specified!", -exitval=>1, -verbose=>0}) if (!@ARGV);
##------------------------------------------------------------------------------
## Subs: regexify
##------------------------------------------------------------------------------
sub regexify {
my $str = shift;
$str =~ s/([\[\]\+\*\.\^\$\(\)\:\?])/\\$1/g;
return '/^'.$str.'$/';
}
##------------------------------------------------------------------------------
## MAIN
##------------------------------------------------------------------------------
##-- load: labels
$lts->loadLabels($lts_labfile)
or die("$progname: load failed for labels '$lts_labfile': $!");
##-- load: fst
$lts->loadFst($lts_fstfile)
or die("$progname: load failed for automaton '$lts_fstfile': $!");
##-- load: dict
if (defined($lts_dictfile)) {
$lts->loadDict($lts_dictfile)
or die("$progname: load failed for dictionary file '$lts_dictfile': $!");
}
##-- expand query
our $query = join(' ', @ARGV);
$query = decode($queryenc,$query) if ($queryenc);
$query =~ s/\$p\~([^\s\"\(\)\&\|]+)/'$p='.regexify($lts->analyze($1))/ge;
$query = encode($queryenc,$query) if ($queryenc);
print STDERR "$progname: $query\n" if ($verbose);
print $query, "\n";
__END__
##------------------------------------------------------------------------------
## PODS
##------------------------------------------------------------------------------
=pod
=head1 NAME
ddc-expand-lts-query.perl - LTS-savvy DDC-query expander
=head1 SYNOPSIS
ddc-expand-lts-query.perl [OPTIONS] [QUERY...]
General Options:
-help
LTS Analysis Objects:
-fst GFSMFILE # LTS analysis FST
-lab LABFILE # LTS analysis labels (default: basename(GFSMFILE).lab)
-dict DICTFILE # exception dictionary
LTS Analysis Options:
-labenc ENCODING # use ENCODING for labels
-queryenc ENCODING # use ENCODING for query
-check , -no-check # do/don't check for unknown symbols (default=do)
-tolower , -nolower # do/don't force input to lower case (default=do)
=cut
##------------------------------------------------------------------------------
## Options and Arguments
##------------------------------------------------------------------------------
=pod
=head1 OPTIONS AND ARGUMENTS
not yet written
=cut
##------------------------------------------------------------------------------
## Description
##------------------------------------------------------------------------------
=pod
=head1 DESCRIPTION
not yet written
=cut
##------------------------------------------------------------------------------
## Footer
##------------------------------------------------------------------------------
=pod
=head1 AUTHOR
Bryan Jurish E<lt>moocow@cpan.orgE<gt>
=cut