#!/usr/local/bin/perl
use
vars
qw(@EXPORT @EXPORT_OK @ISA $debug)
;
@EXPORT
=
qw()
;
@EXPORT_OK
=
qw()
;
@ISA
=
qw(WWW::Search Exporter)
;
my
(
$debug
) = 0;
sub
native_setup_search {
my
(
$self
,
$native_query
,
$native_opt
) =
@_
;
my
(
$native_url
);
my
(
$default_native_url
) =
if
(
defined
(
$native_opt
)) {
$debug
= 1
if
(
$native_opt
->{
'search_debug'
});
if
(
$self
->{
'search_url'
} &&
$native_opt
->{
'search_args'
}) {
$native_url
=
$native_opt
->{
'search_url'
} .
"?"
.
$native_opt
->{
'search_args'
};
}
}
$native_url
=
$default_native_url
if
(!
$native_url
);
my
$how
=
$self
->{
'search_how'
};
if
(
defined
$how
) {
if
(
$how
=~ /any/) {
$native_url
=~ s/separator=[^&]+/separator=or/ig;
if
(
$native_url
!~ /separator=/) {
$native_url
.=
"&separator=or"
;
}
}
elsif
(
$how
=~ /all/) {
$native_url
=~ s/separator=[^&]+/separator=and/ig;
if
(
$native_url
!~ /separator=/) {
$native_url
.=
"&separator=and"
;
}
}
elsif
(
$how
=~ /phrase/) {
$native_url
=~ s/separator=[^&]+/separator=adj/ig;
if
(
$native_url
!~ /separator=/) {
$native_url
.=
"&separator=adj"
;
}
}
elsif
(
$how
=~ /boolean/) {
$native_url
=~ s/separator=[^&]+//ig;
}
}
$native_url
=~ s/
%s
/
$native_query
/g;
$self
->user_agent();
$self
->{_next_to_retrieve} = 0;
$self
->{_base_url} =
$self
->{_next_url} =
$native_url
;
}
sub
native_retrieve_some
{
my
(
$self
) =
@_
;
my
(
$hit
) = ();
my
(
$hits_found
) = 0;
return
undef
if
(!
defined
(
$self
->{_next_url}));
print
$self
->{_next_url} .
"\n"
if
(
$debug
);
my
$method
=
$self
->{
'search_method'
};
$method
=
'POST'
unless
$method
;
my
(
$response
) =
$self
->http_request(
$method
,
$self
->{_next_url});
if
(!
$response
->is_success) {
print
"Some problem\n"
if
(
$debug
);
return
undef
;
};
print
"Got something...\n"
if
(
$debug
);
my
$score
= 800;
my
$results
=
$response
->content();
if
(!
$results
) {
return
(
undef
);
}
my
(
@lines
) =
$self
->split_lines(
$results
);
my
(
$docs_found
);
my
(
$score_ratio
) = 0;
while
(
$#lines
> -1) {
$_
=
shift
(
@lines
);
s,\s+, ,g;
if
(m,(\d+) documents found,) {
$docs_found
= $1;
}
elsif
(m,^\s*(\d+) <A HREF=\"([^\"]+)\">(.*)</A>.*\s(\d+).*$,) {
if
(($1 > 0) && (
$score_ratio
== 0)) {
if
($1 > 900) {
$score_ratio
= 1000/$1;
}
else
{
$score_ratio
= (1000/$1) * .9;
}
}
my
(
$linkobj
) = new URI::URL $2,
$self
->{_next_url};
my
(
$hit
) = new WWW::SearchResult;
$hit
->add_url(
$linkobj
->
abs
->as_string);
$hit
->title($3);
$hit
->size($4);
$hit
->score($1 *
$score_ratio
);
push
(@{
$self
->{cache}},
$hit
);
}
}
$self
->approximate_result_count(
$docs_found
);
$self
->{_next_url} =
undef
;
return
(
$docs_found
);
my
(
$h
) = new HTML::TreeBuilder;
$h
->parse(
$results
);
for
(@{
$h
->extract_links(
qw(a)
) }) {
my
(
$link
,
$linkelem
) =
@$_
;
if
((
$linkelem
->parent->starttag() =~ /<P>/) &&
(
$linkelem
->parent->endtag() =~ m,</P>,)) {
my
(
$linkobj
) = new URI::URL
$link
,
$self
->{_next_url};
$hits_found
++;
my
(
$hit
) = new WWW::SearchResult;
$hit
->add_url(
$linkobj
->
abs
->as_string());
$hit
->title(
join
(
' '
,@{
$linkelem
->content}));
$hit
->score(
$score
);
$hit
->normalized_score(
$score
);
push
(@{
$self
->{cache}},
$hit
);
$score
=
int
(
$score
* .95);
}
}
$self
->approximate_result_count(
$hits_found
);
$self
->{_next_url} =
undef
;
return
(
$hits_found
);
}
1;