#Copyright barry king <barry@wyrdwright.com> and released under the GPL. #See http://www.gnu.org/licenses/gpl.html#TOC1 for details use 5.006; use strict; use warnings; no warnings qw(uninitialized); package Apache::Wyrd::Services::SearchParser; our $VERSION = '0.91'; =pod =head1 NAME Apache::Wyrd::Services::SearchParser - Object for performing logical word-searches =head1 SYNOPSIS use Apache::Wyrd::Services::SearchParser; sub key {'key'}; sub search { ..... } my $parser = Apache::Wyrd::Services::SearchParser->new($self); return $parser->parse('(search AND word) OR (web NOT page)'); =head1 DESCRIPTION Allows for logical parsing of a search using AND, OR, NOT and DIFF keywords. Designed to work with C<Apache::Wyrd::Services::Index>, but can work with another search object. Note that these keywords MUST be in upper-case to parse, otherwise they will be interpreted as the literal words. Any search object using the parser should implement a C<search()> method which does a word search against an index, returning an array of hashes. =head1 METHODS I<(format: (returns) name (arguments after self))> =over =item (Apache::Wyrd::Services::SearchParser) C<new> (objectref) Create a new parser object. The search object using the parser should pass itself as the argument, as C<parse> will call it's C<search> and C<key> methods. =cut sub new { my ($class, $creator) = @_; die "An object using $class must pass itself as an argument and must define the method search() which returns an array of hashrefs. It should also define the method key(), which returns the key (i.e. unique ID of the hashes in the array of hashrefs returned by search())" unless (UNIVERSAL::can($creator, 'search')); my $key = 'id'; $key = $creator->key if ($creator->can('key')); my $data = { creator => $creator, key => $key, counter => 0, hash => {} }; bless $data, $class; return $data; } =item (array) C<parse> (scalar, array) Parse accepts a phrase to parse for searching and an array which it will transparently pass to the C<search> method of the calling object. Returns an array of results derived from recursively calling C<search> and joining the results based on the logical operators. =cut sub parse { my ($self, $phrase, @options) = @_; #remove leading bogus operators $phrase =~ s/^\s*(AND|OR|DIFF)\s*//; #change leading NOTs to - $phrase =~ s/^\s*(NOT)\s*/-/; my $result = $self->recursive_parse($phrase, @options); return @{$self->{'hash'}->{$result}}; } sub recursive_parse { my ($self, $phrase, @options) = @_; my ($matched) = (1); while ($matched) {#first deal with parentheticals $matched = $phrase =~ s/\(([^\(]*?)\)/$self->recursive_parse($1)/e; } $matched = 1; while ($matched) {#then deal with nots $matched = $phrase =~ s/(\S+)\s+NOT\s+(\S+)/$self->negation($self->recursive_parse($1),$self->recursive_parse($2))/e; } $matched = 1; while ($matched) {#then deal with ands $matched = $phrase =~ s/(\S+)\s+AND\s+(\S+)/$self->intersection($self->recursive_parse($1),$self->recursive_parse($2))/e; } $matched = 1; while ($matched) {#then deal with diffs $matched = $phrase =~ s/(\S+)\s+DIFF\s+(\S+)/$self->difference($self->recursive_parse($1),$self->recursive_parse($2))/e; } $matched = 1; while ($matched) {#then deal with ors $matched = $phrase =~ s/(\S+)\s+OR\s+(\S+)/$self->union($self->recursive_parse($1),$self->recursive_parse($2))/e; } return $self->get_results($phrase, @options); } sub get_results { my ($self, $item, @options) = @_; return $item if ($item =~ /__RESULT_\d+__/); my $id = $self->new_id; $self->{'hash'}->{$id} = [$self->{'creator'}->search($item, @options)]; #print "item $item is $id\n"; return $id; } sub new_id { my $self=shift; return '__RESULT_' . $self->{'counter'}++ . '__'; } sub union { my ($self, $a, $b) = @_; my $id = new_id; $self->{'hash'}->{$id} = $self->join_sets('u', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b}); #use Data::Dumper; #warn Dumper($self->{'hash'}->{$a}) . ' union ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n"; return $id; } sub intersection { my ($self, $a, $b) = @_; my $id = new_id; $self->{'hash'}->{$id} = $self->join_sets('i', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b}); #use Data::Dumper; #warn Dumper($self->{'hash'}->{$a}) . ' intersection ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n"; return $id; } sub negation { my ($self, $a, $b) = @_; my $id = new_id; $self->{'hash'}->{$id} = $self->join_sets('n', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b}); #use Data::Dumper; #warn Dumper($self->{'hash'}->{$a}) . ' negation ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n"; return $id; } sub difference { my ($self, $a, $b) = @_; my $id = new_id; $self->{'hash'}->{$id} = $self->join_sets('d', $self->{'key'}, $self->{'hash'}->{$a}, $self->{'hash'}->{$b}); #use Data::Dumper; #warn Dumper($self->{'hash'}->{$a}) . ' difference ' . Dumper($self->{'hash'}->{$b}) . ' is ' . Dumper($self->{'hash'}->{$id}) . "\n"; return $id; } sub join_sets { my ($self, $type, $index, $a, $b) = @_; my (@intersection, @difference) = (); my (%count, %objects) = (); foreach my $e (@$a, @$b) { #note: this assumes the lists @$a and @$b are made of UNIQUE items $count{$e->{$index}}++; $objects{$e->{$index}} = $e; } return [values %objects] if ($type eq 'u'); if ($type eq 'n') { foreach my $e (@$b) { delete $objects{$e->{$index}}; } return [values %objects]; } foreach my $e (keys %count) { if ($count{$e} == 2) { push @intersection, $objects{$e}; } else { push @difference, $objects{$e}; } } if ($type eq 'i') { return \@intersection } return \@difference; } =pod =back =head1 BUGS/CAVEATS/RESERVED METHODS UNKNOWN =head1 AUTHOR Barry King E<lt>wyrd@nospam.wyrdwright.comE<gt> =head1 SEE ALSO =over =item Apache::Wyrd General-purpose HTML-embeddable perl object =back =head1 LICENSE Copyright 2002-2004 Wyrdwright, Inc. and licensed under the GNU GPL. See LICENSE under the documentation for C<Apache::Wyrd>. =cut 1;