t/02_web_scraper.t - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
              use strict;
use warnings;
use Test::More;
use HTML::TreeBuilder::LibXML;
plan skip_all => "this test requires Web::Scraper" unless eval "use Web::Scraper; 1";
plan tests => 2*3+1;
can_ok 'HTML::TreeBuilder::LibXML', 'ignore_unknown';
my $html = <<'...';
<html>
    <a href="http://wassr.jp/">wassr</a>
    <div>
        <a href="http://mixi.jp/">mixi</a>
    </div>
</html>
...
my $code = <<'...';
use Web::Scraper;
my $ret = scraper {
    process '//a', 'text[]', 'TEXT';
    process '//a', 'href[]', '@href';
    process '//div', 'div[]', scraper {
        process '//a', 'link[]', 'TEXT';
    };
}->scrape($html);
is_deeply($ret->{text}, ['wassr', 'mixi']);
is_deeply($ret->{href}, ['http://wassr.jp/', 'http://mixi.jp/']);
is_deeply($ret->{div}, [{link => ['mixi']}]);
...
&run;
HTML::TreeBuilder::LibXML->replace_original();
&run;
sub run {
    eval $code;
    die $@ if $@;
}

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)