#!/usr/bin/perl -w use lib qw(.); use Lingua::LTS::Gfsm; use Encode qw(encode decode); use File::Basename qw(basename); use Getopt::Long qw(:config no_ignore_case); use IO::File; use Pod::Usage; use locale; ##------------------------------------------------------------------------------ ## Constants & Globals ##------------------------------------------------------------------------------ ##-- analysis object our $lts = Lingua::LTS::Gfsm->new( check_symbols=>1, tolower =>1, profile =>0, ); ##-- analysis object: filenames our $lts_labfile = undef; our $lts_fstfile = undef; our $lts_dictfile = undef; ##-- analysis options our $queryenc = undef; ##-- program options our $verbose = 1; our $progname = basename($0); ##------------------------------------------------------------------------------ ## Command-line ##------------------------------------------------------------------------------ GetOptions(##-- General 'help|h' => \$help, 'verbose|v!' => \$verbose, ##-- Analysis Objects 'labels|labs|lab|l|symbols|syms|sym|s=s' => \$lts_labfile, 'fst|f|m=s' => \$lts_fstfile, 'dictionary|dict|d=s' => \$lts_dictfile, ##-- Analysis Options 'label-encoding|labencoding|labenc|le=s' => \$lts->{labenc}, 'query-encoding|queryenc|qenc|qe|q=s' => \$queryenc, 'check-symbols|check|c!' => \$lts->{check_symbols}, 'tolower|lower|L=s' => \$lts->{tolower}, ); pod2usage({-exitval=>0, -verbose=>0}) if ($help); pod2usage({-msg=>"No query specified!", -exitval=>1, -verbose=>0}) if (!@ARGV); ##------------------------------------------------------------------------------ ## Subs: regexify ##------------------------------------------------------------------------------ sub regexify { my $str = shift; $str =~ s/([\[\]\+\*\.\^\$\(\)\:\?])/\\$1/g; return '/^'.$str.'$/'; } ##------------------------------------------------------------------------------ ## MAIN ##------------------------------------------------------------------------------ ##-- load: labels $lts->loadLabels($lts_labfile) or die("$progname: load failed for labels '$lts_labfile': $!"); ##-- load: fst $lts->loadFst($lts_fstfile) or die("$progname: load failed for automaton '$lts_fstfile': $!"); ##-- load: dict if (defined($lts_dictfile)) { $lts->loadDict($lts_dictfile) or die("$progname: load failed for dictionary file '$lts_dictfile': $!"); } ##-- expand query our $query = join(' ', @ARGV); $query = decode($queryenc,$query) if ($queryenc); $query =~ s/\$p\~([^\s\"\(\)\&\|]+)/'$p='.regexify($lts->analyze($1))/ge; $query = encode($queryenc,$query) if ($queryenc); print STDERR "$progname: $query\n" if ($verbose); print $query, "\n"; __END__ ##------------------------------------------------------------------------------ ## PODS ##------------------------------------------------------------------------------ =pod =head1 NAME ddc-expand-lts-query.perl - LTS-savvy DDC-query expander =head1 SYNOPSIS ddc-expand-lts-query.perl [OPTIONS] [QUERY...] General Options: -help LTS Analysis Objects: -fst GFSMFILE # LTS analysis FST -lab LABFILE # LTS analysis labels (default: basename(GFSMFILE).lab) -dict DICTFILE # exception dictionary LTS Analysis Options: -labenc ENCODING # use ENCODING for labels -queryenc ENCODING # use ENCODING for query -check , -no-check # do/don't check for unknown symbols (default=do) -tolower , -nolower # do/don't force input to lower case (default=do) =cut ##------------------------------------------------------------------------------ ## Options and Arguments ##------------------------------------------------------------------------------ =pod =head1 OPTIONS AND ARGUMENTS not yet written =cut ##------------------------------------------------------------------------------ ## Description ##------------------------------------------------------------------------------ =pod =head1 DESCRIPTION not yet written =cut ##------------------------------------------------------------------------------ ## Footer ##------------------------------------------------------------------------------ =pod =head1 AUTHOR Bryan Jurish E<lt>moocow@cpan.orgE<gt> =cut