Why not adopt me?
This distribution is up for adoption!
If you're interested then please contact the PAUSE module admins via
email.
NAME
Plucene::Plugin::Analyzer::SnowballAnalyzer - Stemmed analyzer with Lingua::Stem::Snowball and Lingua::StopWords
DESCRIPTION
Filters StandardTokenizer with SnowballAnalyzer.
Change $Plucene::Plugin::Analysis::SnowballAnalyzer::LANG to the language of your choice. (see Lingua::Stem::Snowball documentation for all available languages).
EXAMPLE
#!/usr/bin/perl
use strict;
use Plucene;
use Plucene::Index::Writer;
use Plucene::Plugin::Analyzer::SnowballAnalyzer;
use Plucene::Search::IndexSearcher;
use Plucene::QueryParser;
$Plucene::Plugin::Analyzer::SnowballAnalyzer::LANG = 'fr';
my $db = "plucene_index";
my $writer = Plucene::Index::Writer->new($db, Plucene::Plugin::Analyzer::SnowballAnalyzer->new(), 1);
$doc = Plucene::Document->new();
$doc->add(Plucene::Document::Field->Keyword(filename => 'test2.html'));
$doc->add(Plucene::Document::Field->Text(title => 'Another file title'));
$doc->add(Plucene::Document::Field->UnStored(content => 'Nothing HERE. la folie...'));
$writer->add_document($doc);
$doc = Plucene::Document->new();
$doc->add(Plucene::Document::Field->Keyword(filename => 'test2.html'));
$doc->add(Plucene::Document::Field->Text(title => 'Fichier en français'));
$doc->add(Plucene::Document::Field->UnStored(content => 'Vive le français! Je t\'aime tendrement...'));
$writer->add_document($doc);
$writer->optimize;
my $searcher = Plucene::Search::IndexSearcher->new($db);
my $parser = Plucene::QueryParser->new({
analyzer => Plucene::Plugin::Analyzer::SnowballAnalyzer->new(),
default => 'content',
});
my $parsed = $parser->parse("la +france +TENDRE title:fichier");
my @docs;
my $hc = Plucene::Search::HitCollector->new(
collect => sub {
my ($self, $doc, $score) = @_;
my $res = eval { $searcher->doc($doc); };
push @docs, $res if res;
},
);
$searcher->search_hc($parsed, $hc);
my @results = map {{
filename => $_->get('filename')->string,
title => $_->get('title')->string,
}} @docs;
print "Results:\n";
foreach my $result (@results) {
print "\t$result->{title} ($result->{filename})\n";
}
AUTHOR
Fabien POTENCIER, fabpot@cpan.org
LICENSE
You may distribute this code under the same terms as Plucene himself.
1 POD Error
The following errors were encountered while parsing the POD:
- Around line 51:
Non-ASCII character seen before =encoding in 'français'));'. Assuming CP1252