From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

##----------------------------------------------------------------------------
## Apache2 Server Side Include Parser - ~/lib/Apache2/SSI/Common.pm
## Version v0.1.2
## Copyright(c) 2022 DEGUEST Pte. Ltd.
## Author: Jacques Deguest <jack@deguest.jp>
## Created 2021/01/13
## Modified 2024/09/04
## All rights reserved
##
## This program is free software; you can redistribute it and/or modify it
## under the same terms as Perl itself.
##----------------------------------------------------------------------------
BEGIN
{
use strict;
use warnings;
use vars qw( $VERSION $OS2SEP $DIR_SEP );
use File::Spec ();
use IO::File;
use Scalar::Util ();
use URI;
our $VERSION = 'v0.1.2';
# perlport
our $OS2SEP =
{
amigaos => '/',
android => '/',
aix => '/',
bsdos => '/',
beos => '/',
bitrig => '/',
cygwin => '/',
darwin => '/',
dec_osf => '/',
dgux => '/',
dos => "\\",
dragonfly => '/',
dynixptx => '/',
freebsd => '/',
gnu => '/',
gnukfreebsd => '/',
haiku => '/',
hpux => '/',
interix => '/',
iphoneos => '/',
irix => '/',
linux => '/',
machten => '/',
macos => ':',
midnightbsd => '/',
minix => '/',
mirbsd => '/',
mswin32 => "\\",
msys => '/',
netbsd => '/',
netware => "\\",
next => '/',
nto => '/',
openbsd => '/',
os2 => '/',
# Extended Binary Coded Decimal Interchange Code
os390 => '/',
os400 => '/',
qnx => '/',
riscos => '.',
sco => '/',
sco_sv => '/',
solaris => '/',
sunos => '/',
svr4 => '/',
svr5 => '/',
symbian => "\\",
unicos => '/',
unicosmk => '/',
vms => '/',
vos => '>',
win32 => "\\",
};
our $DIR_SEP = $OS2SEP->{ lc( $^O ) };
};
use strict;
# RFC 3986 section 5.2.4
# This is aimed for web URI initially, but is also used for filesystems in a simple way
sub collapse_dots
{
my $self = shift( @_ );
my $path = shift( @_ );
my $opts = $self->_get_args_as_hash( @_ );
# To avoid warnings
$opts->{separator} //= '';
# A path separator is provided when dealing with filesystem and not web URI
# We use this to know what to return and how to behave
my $sep = length( $opts->{separator} ) ? $opts->{separator} : '/';
return( '' ) if( !length( $path ) );
my $u = $opts->{separator} ? URI::file->new( $path ) : URI->new( $path );
my( @callinfo ) = caller;
$path = $opts->{separator} ? $u->file( $^O ) : $u->path;
my @new = ();
my $len = length( $path );
# "If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer"
if( substr( $path, 0, 2 ) eq ".${sep}" )
{
substr( $path, 0, 2 ) = '';
}
elsif( substr( $path, 0, 3 ) eq "..${sep}" )
{
substr( $path, 0, 3 ) = '';
}
# "if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer"
elsif( substr( $path, 0, 3 ) eq "${sep}.${sep}" )
{
substr( $path, 0, 3 ) = $sep;
}
elsif( substr( $path, 0, 2 ) eq "${sep}." && 2 == $len )
{
substr( $path, 0, 2 ) = $sep;
}
elsif( $path eq '..' || $path eq '.' )
{
$path = '';
}
elsif( $path eq $sep )
{
return( $u );
}
# -1 is used to ensure trailing blank entries do not get removed
my @segments = split( "\Q$sep\E", $path, -1 );
for( my $i = 0; $i < scalar( @segments ); $i++ )
{
my $segment = $segments[$i];
# "if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer"
if( $segment eq '..' )
{
pop( @new );
}
elsif( $segment eq '.' )
{
next;
}
else
{
push( @new, ( defined( $segment ) ? $segment : '' ) );
}
}
# Finally, the output buffer is returned as the result of remove_dot_segments.
my $new_path = join( $sep, @new );
# substr( $new_path, 0, 0 ) = $sep unless( substr( $new_path, 0, 1 ) eq '/' );
substr( $new_path, 0, 0 ) = $sep unless( File::Spec->file_name_is_absolute( $new_path ) );
if( $opts->{separator} )
{
$u = URI::file->new( $new_path );
}
else
{
$u->path( $new_path );
}
return( $u );
}
# Credits: Path::Tiny
sub slurp
{
my $self = shift( @_ );
my $args = {};
no warnings 'uninitialized';
$args = Scalar::Util::reftype( $_[0] ) eq 'HASH'
? shift( @_ )
: !( scalar( @_ ) % 2 )
? { @_ }
: {};
my $file = $args->{filename} || $args->{file} || $self->filename;
return( $self->error( "No filename found." ) ) if( !length( $file ) );
my $binmode = $args->{binmode} // '';
local $@;
# try-catch
my $rv = eval
{
my $fh = IO::File->new( "<$file" ) ||
return( $self->error( "Unable to open file \"$file\" in read mode: $!" ) );
$fh->binmode( $binmode ) if( length( $binmode ) );
my $size;
if( $binmode eq ':unix' && ( $size = -s( $fh ) ) )
{
my $buf;
$fh->read( $buf, $size );
return( $buf );
}
else
{
local $/;
return( scalar( <$fh> ) );
}
};
if( $@ )
{
return( $self->error( "An error occured while trying to open and read file \"$file\": $@" ) );
}
return( $rv );
}
sub slurp_utf8
{
my $self = shift( @_ );
my $args = {};
no warnings 'uninitialized';
$args = Scalar::Util::reftype( $_[0] ) eq 'HASH'
? shift( @_ )
: !( scalar( @_ ) % 2 )
? { @_ }
: {};
$args->{binmode} = ':utf8';
my $file = $args->{filename} || $args->{file} || $self->filename;
return( $self->error( "No filename found." ) ) if( !length( $file ) );
$args->{filename} = $file;
return( $self->slurp( $args ) );
}
1;
# NOTE: POD
__END__
=encoding utf-8
=head1 NAME
Apache2::SSI::Common - Apache2 Server Side Include Common Resources
=head1 VERSION
v0.1.2
=head1 SYNOPSIS
use parent qw( Apache2::SSI::Common );
=head1 DESCRIPTION
There is no specific api for this. This module contains only common resources used by other modules in this distribution.
=head1 METHODS
=head2 collapse_dots
Provided with an uri, and this will resolve the path and removing the dots, such as C<.> and C<..> and return an L<URI> object.
This is done as per the L<RFC 3986 section 5.2.4 algorithm|https://tools.ietf.org/html/rfc3986#page-33>
my $uri = $ssi->collapse_dots( '/../a/b/../c/./d.html' );
# would become /a/c/d.html
my $uri = $ssi->collapse_dots( '/../a/b/../c/./d.html?foo=../bar' );
# would become /a/c/d.html?foo=../bar
$uri->query # foo=../bar
=head2 slurp
It returns the content of the L</filename>
it takes an hash reference of parameters:
=over 4
=item I<binmode>
my $content = $uri->slurp({ binmode => ':utf-8' });
=back
It will return undef and sets an L<Module::Generic/error> if there is no L</filename> value set or if the file cannot be opened.
=head2 slurp_utf8
It returns the content of the file L</filename> utf-8 decoded.
This is equivalent to:
my $content = $uri->slurp({ binmode => ':utf8' });
C<:utf8> is slightly a bit more lax than C<:utf-8>, so it you want strict utf8, you can do:
my $content = $uri->slurp({ binmode => ':utf-8' });
=head1 AUTHOR
Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
CPAN ID: jdeguest
=head1 SEE ALSO
L<Apache2::SSI::File>, L<Apache2::SSI::URI>
=head1 COPYRIGHT & LICENSE
Copyright (c) 2020-2021 DEGUEST Pte. Ltd.
You can use, copy, modify and redistribute this package and associated
files under the same terms as Perl itself.
=cut