#!/opt/bin/perl =head1 NAME perlstrip - make perl sources smaller by removing comments, pod, whitespace... =head1 SYNOPSIS perlstrip file... perlstrip --size --cache file... -v verbose --size optimise for size (also: -s) --cache use a cache (also: -c) --cache-dir path cache directory to use --output path output file for next input file (also: -o) WARNING: like its counterpart strip, by default files are overwritten! =head1 DESCRIPTION This program can be used to reduce the filesize of perl sources, for example, before bundling them using L<PAR>, L<App::Staticperl>, or when using them in a situation where disk space is premium (such as within firmwares, boot floppies etc.). It does this by removing unnecessary whitespace, commands, POD and a few other things. By default, it works through the list of files given on the commandline, strips them and writes them again. This can be influenced via switches. =head2 OPTIONS =over 4 =item C<--verbose> =item C<-v> Increases verbosity - highly recommended for interactive use. =item C<--output> path =item C<-o> path Write the stripped contents of the I<following> files on the commandline to this path, instead of overwriting the original file. This can be used to copy files instead of (destructively) overwriting them, by specifying C<-o> before each file, e.g.: perlstrip -o strippedfile origsource =item C<--size> =item C<-s> Optimise for size instead of for compressibility - the default is to optimise the file for later compression. Optimising for size makes the raw file size smaller, but might compress a bit worse. =item C<--cache> =item C<-c> Stripping files can take a very long time. When this option is enabled, then C<perlstrip> will cache larger files in a special directory (default: F<~/.cache/perlstrip>), to improve repeated calls to strip sources. This is mainly useful when you strip a whole perl installation repeatedly. =item C<--cache-dir> path Enables caching, but uses an alternative cache directory (which will be created if it doesn't exist yet). =back =head1 SEE ALSO L<App::Staticperl>, L<Perl::Squish>. =head1 AUTHOR Marc Lehmann <schmorp@schmorp.de> http://software.schmorp.de/pkg/staticperl.html =cut use Perl::Strip; use Getopt::Long; use common::sense; our $VERBOSE; our $OPTIMISE_SIZE; our $CACHE; our $CACHEDIR; our $OUTPUT; $|=1; sub usage { require Pod::Usage; Pod::Usage::pod2usage (-output => *STDOUT, -verbose => 1, -exitval => 1, -noperldoc => 1); } sub process { my $file = shift; # GetOotions apparently wraps us into an eval :/ eval { my @cache; if (defined $CACHEDIR) { @cache = (cache => $CACHEDIR); } elsif ($CACHE) { mkdir "$ENV{HOME}/.cache"; @cache = (cache => "$ENV{HOME}/.cache/perlstrip"); } print "$file... " if $VERBOSE; my $output = defined $OUTPUT ? $OUTPUT : $file; my $src = do { open my $fh, "<:perlio", $file or die "$file: $!\n"; local $/; <$fh> }; printf "%d ", length $src if $VERBOSE; $src = (new Perl::Strip @cache, optimise_size => $OPTIMISE_SIZE)->strip ($src); printf "to %d bytes... ", length $src if $VERBOSE; print $output eq $file ? "writing... " : "saving as $output... " if $VERBOSE; open my $fh, ">:perlio", "$output~" or die "$output~: $!\n"; length $src == syswrite $fh, $src or die "$output~: $!\n"; close $fh; rename "$output~", $output; print "ok\n" if $VERBOSE; }; if ($@) { print STDERR "$@\n"; exit 2; } } @ARGV or usage; Getopt::Long::Configure ("bundling", "no_auto_abbrev", "no_ignore_case"); GetOptions "cache|c" => \$CACHE, "cache-dir=s" => \$CACHEDIR, "verbose|v" => sub { ++$VERBOSE }, "quiet|q" => sub { --$VERBOSE }, "size|s" => \$OPTIMISE_SIZE, "output|o=s" => \$OUTPUT, "<>" => \&process, or usage; __END__ die "cannot specify both --app and --perl\n" if $PERL and defined $APP; # required for @INC loading, unfortunately trace_module "PerlIO::scalar"; ############################################################################# # apply include/exclude { my %pmi; for (@incext) { my ($inc, $glob) = @$_; my @match = grep /$glob/, keys %pm; if ($inc) { # include @pmi{@match} = delete @pm{@match}; print "applying include $glob - protected ", (scalar @match), " files.\n" if $VERBOSE >= 5; } else { # exclude delete @pm{@match}; print "applying exclude $glob - removed ", (scalar @match), " files.\n" if $VERBOSE >= 5; } } my @pmi = keys %pmi; @pm{@pmi} = delete @pmi{@pmi}; } ############################################################################# # scan for AutoLoader, static archives and other dependencies sub scan_al { my ($auto, $autodir) = @_; my $ix = "$autodir/autosplit.ix"; print "processing autoload index for '$auto'\n" if $VERBOSE >= 6; $pm{"$auto/autosplit.ix"} = $ix; open my $fh, "<:perlio", $ix or die "$ix: $!"; my $package; while (<$fh>) { if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) { my $al = "auto/$package/$1.al"; my $inc = find_inc $al; defined $inc or die "$al: autoload file not found, but should be there.\n"; $pm{$al} = $inc; print "found autoload function '$al'\n" if $VERBOSE >= 6; } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) { ($package = $1) =~ s/::/\//g; } elsif (/^\s*(?:#|1?\s*;?\s*$)/) { # nop } else { warn "WARNING: $ix: unparsable line, please report: $_"; } } } for my $pm (keys %pm) { if ($pm =~ /^(.*)\.pm$/) { my $auto = "auto/$1"; my $autodir = find_inc $auto; if (defined $autodir && -d $autodir) { # AutoLoader scan_al $auto, $autodir if -f "$autodir/autosplit.ix"; # extralibs.ld if (open my $fh, "<:perlio", "$autodir/extralibs.ld") { print "found extralibs for $pm\n" if $VERBOSE >= 6; local $/; $extralibs .= " " . <$fh>; } $pm =~ /([^\/]+).pm$/ or die "$pm: unable to match last component"; my $base = $1; # static ext if (-f "$autodir/$base$Config{_a}") { print "found static archive for $pm\n" if $VERBOSE >= 3; push @libs, "$autodir/$base$Config{_a}"; push @static_ext, $pm; } # dynamic object die "ERROR: found shared object - can't link statically ($_)\n" if -f "$autodir/$base.$Config{dlext}"; if ($PACKLIST && open my $fh, "<:perlio", "$autodir/.packlist") { print "found .packlist for $pm\n" if $VERBOSE >= 3; while (<$fh>) { chomp; s/ .*$//; # newer-style .packlists might contain key=value pairs # only include certain files (.al, .ix, .pm, .pl) if (/\.(pm|pl|al|ix)$/) { for my $inc (@INC) { # in addition, we only add files that are below some @INC path $inc =~ s/\/*$/\//; if ($inc eq substr $_, 0, length $inc) { my $base = substr $_, length $inc; $pm{$base} = $_; print "+ added .packlist dependency $base\n" if $VERBOSE >= 3; } last; } } } } } } } ############################################################################# print "processing bundle files (try more -v power if you get bored waiting here)...\n" if $VERBOSE >= 1; my $data; my @index; my @order = sort { length $a <=> length $b or $a cmp $b } keys %pm; # sorting by name - better compression, but needs more metadata # sorting by length - faster lookup # usually, the metadata overhead beats the loss through compression for my $pm (@order) { my $path = $pm{$pm}; 128 > length $pm or die "ERROR: $pm: path too long (only 128 octets supported)\n"; my $src = ref $path ? $$path : do { open my $pm, "<", $path or die "$path: $!"; local $/; <$pm> }; my $size = length $src; unless ($pmbin{$pm}) { # only do this unless the file is binary if ($pm =~ /^auto\/POSIX\/[^\/]+\.al$/) { if ($src =~ /^ unimpl \"/m) { print "$pm: skipping (raises runtime error only).\n" if $VERBOSE >= 3; next; } } $src = cache +($STRIP eq "ppi" ? "$UNISTRIP,$OPTIMISE_SIZE" : undef), $src, sub { if ($UNISTRIP && $pm =~ /^unicore\/.*\.pl$/) { print "applying unicore stripping $pm\n" if $VERBOSE >= 6; # special stripping for unicore swashes and properties # much more could be done by going binary $src =~ s{ (^return\ <<'END';\n) (.*?\n) (END(?:\n|\Z)) }{ my ($pre, $data, $post) = ($1, $2, $3); for ($data) { s/^([0-9a-fA-F]+)\t([0-9a-fA-F]+)\t/sprintf "%X\t%X", hex $1, hex $2/gem if $OPTIMISE_SIZE; # s{ # ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t # }{ # # ww - smaller filesize, UU - compress better # pack "C0UU", # hex $1, # length $2 ? (hex $2) - (hex $1) : 0 # }gemx; s/#.*\n/\n/mg; s/\s+\n/\n/mg; } "$pre$data$post" }smex; } if ($STRIP =~ /ppi/i) { require PPI; if (my $ppi = PPI::Document->new (\$src)) { $ppi->prune ("PPI::Token::Comment"); $ppi->prune ("PPI::Token::Pod"); # prune END stuff for (my $last = $ppi->last_element; $last; ) { my $prev = $last->previous_token; if ($last->isa (PPI::Token::Whitespace::)) { $last->delete; } elsif ($last->isa (PPI::Statement::End::)) { $last->delete; last; } elsif ($last->isa (PPI::Token::Pod::)) { $last->delete; } else { last; } $last = $prev; } # prune some but not all insignificant whitespace for my $ws (@{ $ppi->find (PPI::Token::Whitespace::) }) { my $prev = $ws->previous_token; my $next = $ws->next_token; if (!$prev || !$next) { $ws->delete; } else { if ( $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/ or $prev->isa (PPI::Token::Structure::) or ($OPTIMISE_SIZE && ($prev->isa (PPI::Token::Word::) && (PPI::Token::Symbol:: eq ref $next || $next->isa (PPI::Structure::Block::) || $next->isa (PPI::Structure::List::) || $next->isa (PPI::Structure::Condition::))) ) ) { $ws->delete; } elsif ($prev->isa (PPI::Token::Whitespace::)) { $ws->{content} = ' '; $prev->delete; } else { $ws->{content} = ' '; } } } # prune whitespace around blocks if ($OPTIMISE_SIZE) { # these usually decrease size, but decrease compressability more for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::) { for my $node (@{ $ppi->find ($struct) }) { my $n1 = $node->first_token; my $n2 = $n1->previous_token; $n1->delete if $n1->isa (PPI::Token::Whitespace::); $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::); my $n1 = $node->last_token; my $n2 = $n1->next_token; $n1->delete if $n1->isa (PPI::Token::Whitespace::); $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::); } } for my $node (@{ $ppi->find (PPI::Structure::List::) }) { my $n1 = $node->first_token; $n1->delete if $n1->isa (PPI::Token::Whitespace::); my $n1 = $node->last_token; $n1->delete if $n1->isa (PPI::Token::Whitespace::); } } # reformat qw() lists which often have lots of whitespace for my $node (@{ $ppi->find (PPI::Token::QuoteLike::Words::) }) { if ($node->{content} =~ /^qw(.)(.*)(.)$/s) { my ($a, $qw, $b) = ($1, $2, $3); $qw =~ s/^\s+//; $qw =~ s/\s+$//; $qw =~ s/\s+/ /g; $node->{content} = "qw$a$qw$b"; } } $src = $ppi->serialize; } else { warn "WARNING: $pm{$pm}: PPI failed to parse this file\n"; } } elsif ($STRIP =~ /pod/i && $pm ne "Opcode.pm") { # opcode parses its own pod require Pod::Strip; my $stripper = Pod::Strip->new; my $out; $stripper->output_string (\$out); $stripper->parse_string_document ($src) or die; $src = $out; } if ($VERIFY && $pm =~ /\.pm$/ && $pm ne "Opcode.pm") { if (open my $fh, "-|") { <$fh>; } else { eval "#line 1 \"$pm\"\n$src" or warn "\n\n\n$pm\n\n$src\n$@\n\n\n"; exit 0; } } $src }; # if ($pm eq "Opcode.pm") { # open my $fh, ">x" or die; print $fh $src;#d# # exit 1; # } } print "adding $pm (original size $size, stored size ", length $src, ")\n" if $VERBOSE >= 2; push @index, ((length $pm) << 25) | length $data; $data .= $pm . $src; } length $data < 2**25 or die "ERROR: bundle too large (only 32MB supported)\n"; my $varpfx = "bundle_" . substr +(Digest::MD5::md5_hex $data), 0, 16; ############################################################################# # output print "generating $PREFIX.h... " if $VERBOSE >= 1; { open my $fh, ">", "$PREFIX.h" or die "$PREFIX.h: $!\n"; print $fh <<EOF; /* do not edit, automatically created by mkstaticbundle */ #include <EXTERN.h> #include <perl.h> #include <XSUB.h> /* public API */ EXTERN_C PerlInterpreter *staticperl; EXTERN_C void staticperl_xs_init (pTHX); EXTERN_C void staticperl_init (void); EXTERN_C void staticperl_cleanup (void); EOF } print "\n" if $VERBOSE >= 1; ############################################################################# # output print "generating $PREFIX.c... " if $VERBOSE >= 1; open my $fh, ">", "$PREFIX.c" or die "$PREFIX.c: $!\n"; print $fh <<EOF; /* do not edit, automatically created by mkstaticbundle */ #include "bundle.h" /* public API */ PerlInterpreter *staticperl; EOF ############################################################################# # bundle data my $count = @index; print $fh <<EOF; #include "bundle.h" /* bundle data */ static const U32 $varpfx\_count = $count; static const U32 $varpfx\_index [$count + 1] = { EOF my $col; for (@index) { printf $fh "0x%08x,", $_; print $fh "\n" unless ++$col % 10; } printf $fh "0x%08x\n};\n", (length $data); print $fh "static const char $varpfx\_data [] =\n"; dump_string $fh, $data; print $fh ";\n\n"; ############################################################################# # bootstrap # boot file for staticperl # this file will be eval'ed at initialisation time my $bootstrap = ' BEGIN { package ' . $PACKAGE . '; PerlIO::scalar->bootstrap; @INC = sub { my $data = find "$_[1]" or return; $INC{$_[1]} = $_[1]; open my $fh, "<", \$data; $fh }; } '; $bootstrap .= "require '//boot';" if exists $pm{"//boot"}; $bootstrap =~ s/\s+/ /g; $bootstrap =~ s/(\W) /$1/g; $bootstrap =~ s/ (\W)/$1/g; print $fh "const char bootstrap [] = "; dump_string $fh, $bootstrap; print $fh ";\n\n"; print $fh <<EOF; /* search all bundles for the given file, using binary search */ XS(find) { dXSARGS; if (items != 1) Perl_croak (aTHX_ "Usage: $PACKAGE\::find (\$path)"); { STRLEN namelen; char *name = SvPV (ST (0), namelen); SV *res = 0; int l = 0, r = $varpfx\_count; while (l <= r) { int m = (l + r) >> 1; U32 idx = $varpfx\_index [m]; int comp = namelen - (idx >> 25); if (!comp) { int ofs = idx & 0x1FFFFFFU; comp = memcmp (name, $varpfx\_data + ofs, namelen); if (!comp) { /* found */ int ofs2 = $varpfx\_index [m + 1] & 0x1FFFFFFU; ofs += namelen; res = newSVpvn ($varpfx\_data + ofs, ofs2 - ofs); goto found; } } if (comp < 0) r = m - 1; else l = m + 1; } XSRETURN (0); found: ST (0) = res; sv_2mortal (ST (0)); } XSRETURN (1); } /* list all files in the bundle */ XS(list) { dXSARGS; if (items != 0) Perl_croak (aTHX_ "Usage: $PACKAGE\::list"); { int i; EXTEND (SP, $varpfx\_count); for (i = 0; i < $varpfx\_count; ++i) { U32 idx = $varpfx\_index [i]; PUSHs (newSVpvn ($varpfx\_data + (idx & 0x1FFFFFFU), idx >> 25)); } } XSRETURN ($varpfx\_count); } EOF ############################################################################# # xs_init print $fh <<EOF; void staticperl_xs_init (pTHX) { EOF @static_ext = ("DynaLoader", sort @static_ext); # prototypes for (@static_ext) { s/\.pm$//; (my $cname = $_) =~ s/\//__/g; print $fh " EXTERN_C void boot_$cname (pTHX_ CV* cv);\n"; } print $fh <<EOF; char *file = __FILE__; dXSUB_SYS; newXSproto ("$PACKAGE\::find", find, file, "\$"); newXSproto ("$PACKAGE\::list", list, file, ""); EOF # calls for (@static_ext) { s/\.pm$//; (my $cname = $_) =~ s/\//__/g; (my $pname = $_) =~ s/\//::/g; my $bootstrap = $pname eq "DynaLoader" ? "boot" : "bootstrap"; print $fh " newXS (\"$pname\::$bootstrap\", boot_$cname, file);\n"; } print $fh <<EOF; Perl_av_create_and_unshift_one (&PL_preambleav, newSVpv (bootstrap, sizeof (bootstrap) - 1)); } EOF ############################################################################# # optional perl_init/perl_destroy if ($APP) { print $fh <<EOF; int main (int argc, char *argv []) { extern char **environ; int exitstatus; static char *args[] = { "staticperl", "-e", "0" }; PERL_SYS_INIT3 (&argc, &argv, &environ); staticperl = perl_alloc (); perl_construct (staticperl); PL_exit_flags |= PERL_EXIT_DESTRUCT_END; exitstatus = perl_parse (staticperl, staticperl_xs_init, sizeof (args) / sizeof (*args), args, environ); if (!exitstatus) perl_run (staticperl); exitstatus = perl_destruct (staticperl); perl_free (staticperl); PERL_SYS_TERM (); return exitstatus; } EOF } elsif ($PERL) { print $fh <<EOF; int main (int argc, char *argv []) { extern char **environ; int exitstatus; PERL_SYS_INIT3 (&argc, &argv, &environ); staticperl = perl_alloc (); perl_construct (staticperl); PL_exit_flags |= PERL_EXIT_DESTRUCT_END; exitstatus = perl_parse (staticperl, staticperl_xs_init, argc, argv, environ); if (!exitstatus) perl_run (staticperl); exitstatus = perl_destruct (staticperl); perl_free (staticperl); PERL_SYS_TERM (); return exitstatus; } EOF } else { print $fh <<EOF; EXTERN_C void staticperl_init (void) { extern char **environ; int argc = sizeof (args) / sizeof (args [0]); char **argv = args; static char *args[] = { "staticperl", "-e", "0" }; PERL_SYS_INIT3 (&argc, &argv, &environ); staticperl = perl_alloc (); perl_construct (staticperl); PL_origalen = 1; PL_exit_flags |= PERL_EXIT_DESTRUCT_END; perl_parse (staticperl, staticperl_xs_init, argc, argv, environ); perl_run (staticperl); } EXTERN_C void staticperl_cleanup (void) { perl_destruct (staticperl); perl_free (staticperl); staticperl = 0; PERL_SYS_TERM (); } EOF } print -s "$PREFIX.c", " octets (", (length $data) , " data octets).\n\n" if $VERBOSE >= 1; ############################################################################# # libs, cflags { print "generating $PREFIX.ccopts... " if $VERBOSE >= 1; my $str = "$Config{ccflags} $Config{optimize} $Config{cppflags} -I$Config{archlibexp}/CORE"; $str =~ s/([\(\)])/\\$1/g; open my $fh, ">$PREFIX.ccopts" or die "$PREFIX.ccopts: $!"; print $fh $str; print "$str\n\n" if $VERBOSE >= 1; } { print "generating $PREFIX.ldopts... "; my $str = $STATIC ? "-static " : ""; $str .= "$Config{ccdlflags} $Config{ldflags} @libs $Config{archlibexp}/CORE/$Config{libperl} $Config{perllibs}"; my %seen; $str .= " $_" for grep !$seen{$_}++, ($extralibs =~ /(\S+)/g); for (@staticlibs) { $str =~ s/(^|\s) (-l\Q$_\E) ($|\s)/$1-Wl,-Bstatic $2 -Wl,-Bdynamic$3/gx; } $str =~ s/([\(\)])/\\$1/g; open my $fh, ">$PREFIX.ldopts" or die "$PREFIX.ldopts: $!"; print $fh $str; print "$str\n\n" if $VERBOSE >= 1; } if ($PERL or defined $APP) { $APP = "perl" unless defined $APP; print "building $APP...\n" if $VERBOSE >= 1; system "$Config{cc} \$(cat bundle.ccopts\) -o \Q$APP\E bundle.c \$(cat bundle.ldopts\)"; unlink "$PREFIX.$_" for qw(ccopts ldopts c h); print "\n" if $VERBOSE >= 1; }