NAME
WWW::Flatten - Flatten a web pages deeply and make it portable
SYNOPSIS
use strict;
use warnings;
use utf8;
use 5.010;
use Mojo::URL;
use WWW::Flatten;
my $basedir = './github/';
mkdir($basedir);
my $ext_regex = qr{\.(css|png|gif|jpeg|jpg|pdf|js|json)$}i;
my $bot = WWW::Flatten->new(
basedir => $basedir,
max_conn => 1,
wait_per_host => 3,
peeping_port => 3000,
depth => 3,
filenames => {
'https://github.com' => 'index.html',
},
is_target => sub {
my $uri = Mojo::URL->new(shift->resolved_uri);
if ($uri =~ $ext_regex) {
return 1;
}
if ($uri->host eq 'assets-cdn.github.com') {
return 1;
}
return 0;
},
normalize => sub {
my $uri = Mojo::URL->new(shift);
return $uri;
}
);
$bot->crawl;
DESCRIPTION
WWW::Flatten is a web crawling tool for freezing pages into standalone.
This software is considered to be alpha quality and isn't recommended for regular usage.
ATTRIBUTES
METHODS
AUTHOR
Sugama Keita, <sugama@jamadam.com>
COPYRIGHT AND LICENSE
Copyright (C) jamadam
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.