NAME
Novel::Robot::Parser 小说站点解析引擎
INIT
site 支持小说站点名称
晋江:Jjwxc
豆豆:Dddbbb
努努:Nunu
书农:Shunong
爱尚:Asxs
落秋:Luoqiu
顶点:Dingdian
new
初始化解析模块
my $url = 'http://www.jjwxc.net/onebook.php?novelid=2456';
#直接指定站点
my $parser = Novel::Robot::Parser->new( site => 'Jjwxc' );
#通过url自动检测站点
my $parser = Novel::Robot::Parser->new( site => $url );
$parser->get_index_ref($url);
INDEX FUNCTION
get_index_ref 获取目录页信息
my $index_ref = $parser->get_index_ref($index_url, %opt);
parse_index 解析目录页
my $index_ref = $parser->parse_index($index_html_ref);
update_chapter_id 更新章节id
$parser->update_chapter_id($index_ref);
update_chapter_num 更新章节数
$parser->update_chapter_num($index_ref);
CHAPTER FUNCTION
get_chapter_ref 获取章节页信息
my $chapter_url = 'http://www.jjwxc.net/onebook.php?novelid=2456&chapterid=2';
my $chapter_ref = $parser->get_chapter_ref($chapter_url, 2);
parse_chapter 解析章节页
my $chapter_ref = $parser->parse_chapter($chapter_html_ref);
WRITER FUNCTION
get_writer_ref 获取作者页信息
my $writer_url = 'http://www.jjwxc.net/oneauthor.php?authorid=3243';
my $writer_ref = $parser->get_writer_ref($writer_url);
parse_writer 解析作者页
my $writer_ref = $parser->parse_writer($writer_html_ref);
QUERY FUNCTION
get_query_ref 获取查询结果
my $query_type = '作者';
my $query_value = '顾漫';
my $query_ref = $parser->get_query_ref($query_type, $query_value);
make_query_request 指定查询请求
#查询类型: $type
#查询关键字:$keyword
my ($query_url, $post_data) =
$parser->make_query_request( $type, $keyword );
parse_query 解析查询结果
my $query_ref = $parser->parse_query($query_html_ref);
parse_query_result_urls 查询结果为分页url
my $query_urls_ref = $parser->parse_query_result_urls($query_html_ref);
OTHER FUNCTION
get_inner_html 获取html元素的innerHTML
my $inner_html = $parser->get_inner_html($element);
format_abs_url 批量将url转换成绝对路径
$parser->format_abs_url($index_ref->{chapter_info}, $index_ref->{index_url});
$parser->format_abs_url($index_ref->{more_book_info}, $index_ref->{index_url});
$parser->format_abs_url($query_urls_ref, $query_url);