#!perl
use strict;
use Config;
use File::Path 'mkpath';
use Cwd qw( realpath );
# must cleanup the dance floor long before Dancer starts moving
my %option;
BEGIN {
# some defaults
%option = (index => 'index.html');
# command-line parameters
GetOptions(
\%option, 'application=s', 'include|I=s', 'environment=s',
'destination=s', 'index=s', 'log=s', 'vhost=s',
'help', 'manual'
) or pod2usage(-verbose => 1);
# simple on-line help
pod2usage(-verbose => 1) if $option{help};
pod2usage(-verbose => 2) if $option{manual};
# add include dirs to @INC
my $path_sep = $Config::Config{path_sep} || ';';
unshift @INC, split /\Q$path_sep\E/, $option{include}
if defined $option{include};
# load the application
die "no app" if !defined $option{application};
eval "use $option{application}; 1;"
or die "Unable to load application '$option{application}': $@";
# compute the location of the application
(my $module = $option{application} . '.pm') =~ s<::></>;
$ENV{DANCER_APPDIR} ||= realpath(
File::Spec->catdir(File::Basename::dirname($INC{$module}), '..'));
}
# Dancer hits the stage
use Dancer;
set environment => $option{environment} || 'production';
set log => $option{log} || 'warning';
# some basic verification
my $wwwdocs = $option{destination};
die "Destination not defined" if !defined $wwwdocs;
die "Invalid destination '$wwwdocs'" if !-e $wwwdocs || !-d $wwwdocs;
# I'm just hanging on to my friend's purse
my $log;
while (<>) {
# ignore blank lines and comments
next if /^\s*(#|$)/;
chomp;
# default values
my $url = URI->new($_);
my ($status, $file, $bytes) = (500, '-', '-');
$log = "$status $url";
# require an absolute path
next if $url->path !~ /^\//;
# fake a minimal environment
local $ENV{SERVER_NAME} = 'localhost';
local $ENV{SERVER_PORT} = '80';
local $ENV{HTTP_HOST} = eval { $url->host } || $option{vhost}
if defined $option{vhost};
# create a new request object
# strip everything but the path
my $request = Dancer::Request->new_for_request(GET => $url->path);
$request->headers(
HTTP::Headers->new(
Accept => '*/*',
$url->can('host_port') ? (Host => $url->host_port) : (),
)
);
# obtain a response
my $response = Dancer::Handler->handle_request($request);
($status, my $content) = @{$response}[0, 2];
$log = "$status $url";
# save successes to the appropriate file
if ($status eq '200') {
# absolute paths have the empty string as their first path_segment
my (undef, @segments) = $url->path_segments;
# create a vhost directory if required
unshift @segments, eval { $url->host } || $option{vhost}
if defined $option{vhost};
# assume directory
push @segments, $option{index} if $segments[-1] !~ /\./;
# generate target file name
my $file = File::Spec->catfile($wwwdocs, @segments);
pop @segments;
my $dir = File::Spec->catdir($wwwdocs, @segments);
# ensure the subdirectory exists
mkpath $dir if !-e $dir;
open my $fh, '>', $file or die "Can't open $file for writing: $!";
# copy content to the file
if (ref $content eq 'ARRAY') {
print $fh @$content;
}
elsif (ref $content eq 'GLOB') {
print {$fh} <$content>;
}
elsif (eval { $content->can('getlines') }) {
print {$fh} $content->getlines;
}
else {
die "Don't know how to handle $content";
}
# finish
close $fh;
$bytes = -s $file;
$log .= " => $file [$bytes]";
}
}
continue {
print "$log\n" if $log;
$log = '';
}
__END__
=head1 NAME
wallflower - Sorry I can't dance, I'm hanging on to my friend's purse
=head1 SYNOPSIS
wallflower [options]
=head1 OPTIONS
--application <name> Name of the Dancer application
--destination <path> Destination directory for the files
--include <path> Library paths to include
--environment <name> Application environment (default: production)
--index <filename> Default name for index file (default: index.html)
--log <level> Dancer log level (default: warning)
--vhost <vhost> Default vhost, enforce vhost dir creation
--help Print a short online help and exit
--manual Print the full manual page and exit
=head1 DESCRIPTION
B<wallflower> turns your Dancer application into a static web site.
While not suitable for all applications, there are a number of use cases
where this makes sense. Most web sites are in essence static. Without a
way for user to update information on the site (via forms, comments, etc)
the only changes in the web site come from sources that you control
(including the database) and that are accessible in your development
environment.
Using Dancer for a static web site actually makes a lot of sense,
just because if gives you access to all the features of the framework
for that site. Think of it as I<extreme caching>.
So, forms could be processed on your development server
(e.g. to update a local database), and the pages to be I<published>
would be a subset of all the URL that the application supports.
Turning such an application into a real static site (a set of pages
to upload to a static web server) is just a matter of generating all
possible URL for the static site and saving them to files.
B<wallflower> does exactly that. It reads a list of URL, strips them
from their query strings, turn them into C<GET> requests and saves the
body response to a file whose name matches the request pathinfo.
B<wallflower> is not a generic offline browsing tool.
=head1 EXAMPLE
The web site created by C<dancer -a mywebapp> is the perfect example.
The complete list of URL needed to view the site is:
/
/css
/css/error.css
/css/style.css
/favicon.ico
/images/perldancer-bg.jpg
/images/perldancer.jpg
/javascripts/jquery.js
Passing this list to B<wallflower> gives the following result:
$ wallflower -a mywebapp -d /tmp/output urls.txt
200 / => /tmp/output/index.html [5257]
200 /css/error.css => /tmp/output/css/error.css [1210]
200 /css/style.css => /tmp/output/css/style.css [2972]
200 /favicon.ico => /tmp/output/favicon.ico [1406]
200 /images/perldancer-bg.jpg => /tmp/output/images/perldancer-bg.jpg [7125]
200 /images/perldancer.jpg => /tmp/output/images/perldancer.jpg [2240]
200 /javascripts/jquery.js => /tmp/output/javascripts/jquery.js [72174]
Note that URL with a path ending with a C</> or a name without an extension
will be considered to be a directory, and have the default "index" filename
appended.
Any URL resulting in a status different than 200 will be logged,
but not saved:
404 /css
500 foo/bar
=head1 AUTHOR
Philippe Bruhat (BooK)
=head1 LICENSE
This program is free software and is published under the same
terms as Perl itself.
=cut