TODO - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
              1. more document is needed.
2. some interface may be changed in future.
----------------------- cut of sources
        $self->{Url_Last} = $url;
        $self->{Url_Base} = $res->base->as_string;
        $self->{Url_Code} = $res->code;
        $self->{Url_Title} = $res->headers->title;
        $self->{Url_Content_Type} = $res->headers->content_type;
        $self->{Url_Last_Modified} = $res->headers->last_modified;
        $self->{Url_Last_Modified_Iso} = HTTP::Date::time2iso($res->headers->last_modified);
$res->content
$res->is_success
$str="<Test>\nThis is a Test\r\n;<!-- Abc\r \n-->";
is($bot->get_url_textok($str), "<ln> <Test><ln>This is a Test<ln>;<!-- Abc--><ln>", "function get_url_text_ok");
$bot=&recreate($testdir);
$str=<<STR;
Before <!---
Test of mark <font>FONTS</font>
---> Inner
<script src="Test">display();</script>
Another   Space
STR
$str=~s/(\r\n|\n|\r)/<ln>/g;
is($bot->parse_empty($str), "Before Inner<ln><ln>Another Space<ln>", "function parse_empty");
$bot->{REMOVE_LEADING_SPACES}=1;
$str="<ln>    a good<ln>  dog<ln>  must<ln>  be<ln>  good.<ln>";
is($bot->parse_leadingspace($str), "<ln>  a good<ln>dog<ln>must<ln>be<ln>good.<ln>", "function parse_leadingspace");
use WWW::BookBot::Chinese;
$bot=new WWW::BookBot::Chinese;
$bot->initialize;
$bot->{TEXTREMOVEINNERSPACE}=1;
$str="A b o u t  a  ÖÐgreat ¹ú ÊÇ Ò» ¸ö and  w o n d e r f u l  i d e a.<ln>";
is($bot->parse_innerspace($str), "About a great and wonderful idea.<ln>", "function parse_innerspace");
#print Dumper($bot);
#-----------------------------------------------
# get_url_ok: prepare received contents
#-----------------------------------------------
# $str                  contents
#---RETURN contents
#-----------------------------------------------
sub get_url_ok {
        my $self = shift;
        return ($self->{Url_Content_Type} eq 'text/html'
                        or $self->{Url_Content_Type} eq 'text/plain') ?
                $self->get_url_textok(@_) : $self->get_url_binok(@_);
}
#-----------------------------------------------
# get_url_textok: prepare received text contents by replace \r\n as <ln>
#-----------------------------------------------
# $str                  contents
#---RETURN contents
#-----------------------------------------------
sub get_url_textok {
        my $self = shift;
        my $str=$self->de_code($_[0]);
        $str=~s/(\r\n|\r|\n)/<ln>/g;
        $str=~s/(<[^<>]*)(?: *<ln>)+([^<>]*>)/$1$2/g;   #prepare <> in different lines
        return "<ln>$str<ln>";
}
#-----------------------------------------------
# parse_img: parser images
#-----------------------------------------------
# $base                 base url
# $contents             html contents
#-----------------------------------------------
sub parse_img {
        my $self = shift;
        my $base = shift;
        my ($html, $src);
        while($_[0]=~/<img([^<>]*>)/ig) {
                $html=$1;
                $src='';
                if( $html=~/src *= *\"([^\">]*)\"/ ) {
                        $src=$1;
                } elsif( $html=~/src *= *\'([^\'>]*)\'/ ) {
                        $src=$1;
                } elsif( $html=~/src *= *([^ >]*)( |>)/ ) {
                        $src=$1;
                }
                $self->parse_img_ok($self->url_rel2abs($src, $base));
        }
}
#-----------------------------------------------
# parse_img_ok: found good image url
#-----------------------------------------------
# $url                  image url
#-----------------------------------------------
sub parse_img_ok {
        my $self = shift;
        my ($url) = @_;
        # not finished
}
application/octet-stream
text/plain
$p1="\$str=~\/^(?:$p1\)\$/x";
$str=$found if eval($p1);
        $str=$self->parse_removespace($str);
        if( $str=~/\n +[^ \n][^\n]*\n[^ \n][^\n]*\n[^ \n][^\n]*\n/sg ) {
                # "\n " -> <br>, "\n" -> no use
                $str=~s/\n +(?=[^ \n])/<br> /sg;
                $str=~s/\n//sg;
                $str=~s/<br>/\n/g;
        }
#-----------------------------------------------
# parse_innerspace: remove inner space
#-----------------------------------------------
# $content              contents
#---RETURN contents
#-----------------------------------------------
sub parse_innerspace {
        my $self = shift;
        return $_[0] if not $self->{TEXTREMOVEINNERSPACE};
        my $pattern_sub="[^ <>]($self->{Pattern_Mark})* ";
        my $pattern="";
        for(my $i=0; $i<7; $i++) {
                $pattern.=$pattern_sub;
        }
        return $_[0] if not $_[0]=~/$pattern/;
        my $str1=$_[0];
        $str1=~s/([0-9a-zA-Z]($self->{Pattern_Mark})) [0-9a-zA-Z][0-9a-zA-Z]//g;
        my $str="";
        foreach (split /(<ln>| {2,})/, $_[0]) {
                print "($_) ";
                if(/^  /) {
                        $str.=" ";
                        next;
                }
                 
                $_=~s/ //g if $_=~/^[^ ]($self->{Pattern_Mark})*( [^ ]($self->{Pattern_Mark})*)+$/;
                $str.=$_;
        }
        return $str;
}
WWW::BookBot is a bot to fetch web e-texts with catalog, books and chapters. It can fetch and reformat multiple html pages into a single text file, which can be readed in .
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)