From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

#! /bin/false
# Copyright (C) 2016-2020 Guido Flohr <guido.flohr@cantanea.com>,
# all rights reserved.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package Qgoda::Util;
$Qgoda::Util::VERSION = '0.9.8';
use strict;
use File::Path qw(make_path);
use File::Basename qw(fileparse);
use Locale::TextDomain qw(qgoda);
use Scalar::Util qw(reftype looks_like_number);
use Encode 2.12;
use File::Find ();
use Data::Walk 2.00;
use Storable qw(freeze);
use URI::Escape qw(uri_escape_utf8);
use base 'Exporter';
use vars qw(@EXPORT_OK);
@EXPORT_OK = qw(empty read_file write_file yaml_error front_matter lowercase
expand_perl_format read_body merge_data interpolate
normalize_path strip_suffix
perl_identifier perl_class class2module
slugify html_escape unmarkup globstar trim
flatten2hash is_archive archive_extender collect_defaults
canonical purify safe_yaml_load
escape_link blength qstrftime tt2_args_merge);
sub js_unescape($);
sub tokenize($$);
sub evaluate($$);
sub lookup($$);
sub _globstar($;$);
my $unsafe_for_links = "^A-Za-z0-9\-\._~/";
sub empty(;$) {
my ($what) = @_;
$what = $_ if !@_;
return if defined $what && length $what;
return 1;
}
sub read_file($) {
my ($filename) = @_;
my $fh = IO::File->new;
open $fh, "<", $filename or return;
local $/;
my $data = <$fh>;
$fh->close;
return $data;
}
sub front_matter($) {
my ($filename) = @_;
my $fh = IO::File->new;
open $fh, "<", $filename or return;
undef $!;
my $first_line = <$fh>;
return if empty $first_line;
return if $first_line !~ /---[ \t]*\n$/o;
my $front_matter = '';
while (1) {
my $line = <$fh>;
return if !defined $line;
return $front_matter if $line =~ /---[ \t]*\n$/o;
$front_matter .= $line;
}
return;
}
sub read_body($$) {
my ($filename, $placeholder) = @_;
my $fh = IO::File->new;
open $fh, "<:encoding(UTF-8)", $filename or die "$!\n";
undef $!;
my $first_line = <$fh>;
die __"File is empty!\n" if empty $first_line;
die __"File does not begin with '---'!\n" if $first_line !~ /---[ \t]*\n$/o;
my $lines = 1;
while (1) {
++$lines;
my $line = <$fh>;
die __"Unterminated front matter!\n" if !defined $line;
last if $line =~ /---[ \t]*\n$/o;
}
local $/;
my $front_matter = "$placeholder" x $lines;
return $front_matter . <$fh>;
}
sub write_file($$) {
my ($path, $data) = @_;
my (undef, $directory) = fileparse $path;
make_path $directory unless -e $directory;
my $octets;
if (Encode::is_utf8($data)) {
my $handle_malformed = sub {
my $replacement = sprintf "{{+%04X}}", shift;
warn "malformed multi-byte sequence, search for '$replacement' in output file\n";
return $replacement;
};
$octets = Encode::encode('UTF-8', $data, $handle_malformed);
} else {
$octets = $data;
}
open my $fh, ">", $path or return;
$fh->print($octets) or return;
$fh->close or return;
return 1;
}
sub yaml_error {
my ($filename, $error) = @_;
my @lines = split /\n/, $error;
pop @lines;
return "$filename: " . join "\n", @lines;
}
sub lowercase($) {
my ($str) = @_;
return lc $str;
}
sub merge_data {
my ($data, $overlay) = @_;
# Return $overlay if it is of a different type than $data.
my $equal_ref = sub {
my ($x, $y) = @_;
return if !ref $x;
return if !ref $y;
my $ref_x = reftype $x;
my $ref_y = reftype $y;
return $ref_x eq $ref_y;
};
return $overlay if !$equal_ref->($overlay, $data);
return $overlay if 'ARRAY' eq reftype $overlay;
my $merger;
$merger = sub {
my ($d, $o) = @_;
foreach my $key (keys %$d) {
if (exists $o->{$key}) {
if (!$equal_ref->($d->{$key}, $o->{$key})) {
eval { $d->{$key} = $o->{$key}; };
} elsif (UNIVERSAL::isa($d->{$key}, 'HASH')) {
$merger->($d->{$key}, $o->{$key});
} else {
$d->{$key} = $o->{$key};
}
}
}
foreach my $key (keys %$o) {
if (!exists $d->{$key}) {
$d->{$key} = $o->{$key};
}
}
};
$merger->($data, $overlay);
return $data;
}
sub interpolate($$) {
my ($string, $data) = @_;
$data ||= {};
my $type = reftype $data;
if ($type ne 'ARRAY' && $type ne 'HASH') {
$type = 'HASH';
$data = {};
}
my $result = '';
while ($string =~ s/^([^\{]*)\{//) {
$result .= $1;
my ($remainder, @tokens) = tokenize $string, $type;
# Syntax errors can be handled in different ways.
# You can handle it gracefully and either leave
# everything uninterpolated, or you could replace the
# faulty string with the emtpy string or you can throw an
# exception. We just throw an exception.
die "syntax error before: '$remainder'\n" if !@tokens;
my $value = evaluate \@tokens, $data;
$result .= $value if defined $value;
$string = $remainder;
}
return $result . $string;
}
sub normalize_path($;$) {
my ($dir, $trailing_slash) = @_;
$dir =~ s{[\\/]+}{/}g;
$dir =~ s{/$}{} unless $trailing_slash;
return $dir;
}
sub strip_suffix($) {
my ($filename) = @_;
my @parts = split /\./, $filename;
my @suffixes;
while (@parts > 1) {
last if $parts[-1] =~ /[^a-zA-Z0-9]/;
unshift @suffixes, pop @parts
}
my $basename = join '.', @parts;
return $basename, grep { /./ } @suffixes;
}
##############################################################################
# The methods below are not exported.
##############################################################################
sub tokenize($$) {
my ($string, $type) = @_;
my @tokens;
my $depth = 0;
while (1) {
$string =~ s/^[ \t\r\n]+//;
last if !length $string;
last if $string =~ s/^\}//;
my $last = @tokens ? $tokens[-1]->[0] : '[';
if ($last eq '.') {
# Only variables are allowed but they are interpreted as
# a quoted string. We will repair that later, however.
return $string unless $string =~ s/^([^\[\]\}\.]+)//;
push @tokens, ['v', $1];
} elsif ($last eq 'v' || $last eq ']') {
# Only brackets or a dot are allowed. Everything else is a
# syntax error.
return $string unless $string =~ s/^([\[\]\.])//;
if ('[' eq $1) {
++$depth;
push @tokens, ['[' => ''];
} elsif (']' eq $1) {
--$depth;
return "]$string" if $depth < 0;
push @tokens, [']' => ''];
} else {
# A dot.
push @tokens, ['.', ''];
}
} elsif ($last eq '[') {
# At the beginning or after an opening bracket only quoted
# strings are allowed. Everything but a quoted string is
# treated as a variable.
if ($string =~ s/^(["'])([^\\\1]*(?:\\.[^\\\1]*)*)\1//) {
push @tokens, ['q', $2];
} elsif ($string =~ s/^([^\[\]\}\.]+)//) {
push @tokens, ['v', $1];
} elsif (!@tokens && $string =~ s/^\[//) {
# Special case. We want to allow starting an expression
# with an opening bracket so that you can write something
# like ["key with special characters"].
push @tokens, ['[', ''];
} else {
return $string;
}
} else {
# The last token was a quoted string (because all other
# possibilities are handled above. The only legal token after
# a quoted string is the closing bracket.
return $string unless $string =~ s/^]//;
push @tokens, [']', ''];
}
}
# Bracket not closed.
return '}' if $depth;
# We may have a trailing dot in our expression. We check that now
# and change the type of "variables" following a dot to a quoted
# string.
#
# We also must repair the type for "variables" that look like numbers
# and are enclosed in angle brackets. Only in this case they are
# treated like numbers. And numbers are the same as quoted strings
# for our purposes.
# If they are exactly between two brackets they are numbers, otherwise
# we try them as variables.
for (my $i = 0; $i < @tokens; ++$i) {
if ('.' eq $tokens[$i]->[0]) {
return $string if $i >= $#tokens;
$tokens[++$i]->[0] = 'q';
} elsif ('[' eq $tokens[$i]->[0]
&& 'v' eq $tokens[$i + 1]->[0]
&& ']' eq $tokens[$i + 2]->[0]
&& $tokens[$i + 1]->[1] =~ /^[-+]?(?:0|[1-9][0-9]*)$/) {
# Change the type to a quoted string.
$tokens[$i + 1]->[0] = 'q';
# And shorten the loop again.
$i = $i + 2;
}
}
return $string, @tokens;
}
sub evaluate($$) {
my ($tokens, $data) = @_;
my $cursor = $data;
while (@$tokens) {
my $token = shift @$tokens;
my ($toktype, $value) = @$token;
if ('[' eq $toktype) {
# We have to recurse.
my $key = evaluate $tokens, $data;
$cursor = lookup $cursor, $key;
} elsif (']' eq $toktype) {
return $cursor;
} elsif ('.' eq $toktype) {
$token = shift @$tokens;
$cursor = lookup $cursor, $token->[1];
} elsif ('v' eq $toktype) {
$cursor = lookup $cursor, $value;
} elsif ('q' eq $toktype) {
$cursor = $value;
} else {
die "unknown token type '$toktype'";
}
}
return $cursor;
}
sub lookup($$) {
my ($data, $key) = @_;
my $type = reftype $data;
if ('HASH' eq $type) {
return $data->{$key};
} elsif ('ARRAY' eq $type) {
return $data->[$key];
} else {
return;
}
}
sub js_unescape($) {
my ($string) = @_;
my %escapes = (
"\n" => '',
0 => "\000", # Note that octal escapes are not supported!
b => "\x08",
f => "\x0c",
n => "\x0a",
r => "\x0d",
t => "\x09",
v => "\x0b",
"'" => "'",
'"' => '"',
'\\' => '\\',
);
$string =~ s/
\\
(
x[0-9a-fA-F]{2}
|
u[0-9a-fA-F]{4}
|
u\{[0-9a-fA-F]+\}
|
.
)
/
if (exists $escapes{$1}) {
$escapes{$1}
} elsif (1 == length $1) {
$1;
} elsif ('x' eq substr $1, 0, 1) {
chr oct '0' . $1;
} elsif ('u' eq substr $1, 0, 1) {
if ('u{' eq substr $1, 0, 2) {
my $code = substr $1, 0, 2;
$code =~ s{^0+}{};
$code ||= '0';
chr oct '0x' . $code;
} else {
chr oct '0x' . substr $1, 1;
}
}
/xegs;
return $string;
}
sub perl_identifier($) {
my ($name) = @_;
return $name =~ /^[_a-zA-Z][_0-9a-zA-Z]*$/o;
}
sub perl_class($) {
my ($name) = @_;
return $name =~ /^[_a-zA-Z][_0-9a-zA-Z]*(?:::[_a-zA-Z][_0-9a-zA-Z]*)*$/o;
}
sub class2module($) {
my ($class) = @_;
$class =~ s{(?:::|')}{/}g;
return $class . '.pm';
}
sub slugify($;$) {
my ($string, $locale) = @_;
return '' if !defined $string;
Encode::_utf8_on($string);
require Text::Unidecode;
my $slug = lc Text::Unidecode::unidecode($string);
# We only allow alphanumerical characters, the dot, the hyphen and the underscore.
# Everything else gets converted into hyphens, and sequences of hyphens
# are condensed into one.
$slug =~ s/[\x00-\x2c\x2f\x3a-\x5e\x60\x7b-\x7f]/-/g;
$slug =~ s/--+/-/g;
$slug =~ s/^-//;
$slug =~ s/-$//;
$slug = '-' if !length $slug;
return $slug;
}
sub html_escape($) {
my ($string) = @_;
return '' if !defined $string;
my %escapes = (
'"' => '&#34;',
"&" => '&#38;',
"'" => '&#39;',
"<" => '&#60;',
">" => '&#62;',
);
$string =~ s/(["&'<>])/$escapes{$1}/gs;
return $string;
}
sub unmarkup($) {
my ($string) = @_;
return '' if !defined $string;
require HTML::Parser;
my $escaped = '';
my $text_handler = sub {
my ($string) = @_;
$escaped .= $string;
};
my $parser = HTML::Parser->new(api_version => 3,
text_h => [$text_handler, 'text'],
marked_sections => 1);
$parser->parse($string);
$parser->eof;
return $escaped;
}
sub trim($) {
my ($string) = @_;
$string =~ s{^[ \x09-\x0d]+}{};
$string =~ s{[ \x09-\x0d]+$}{};
return $string;
}
sub flatten2hash {
my ($data) = @_;
my @path;
my @types;
my %flat;
my $postprocess = sub {
# Remove the last path component.
pop @path;
pop @types;
};
# The wanted function for Data::Walk.
my $wanted = sub {
++$path[-1] if 'a' eq $types[-1];
my $reftype = reftype $_ || '';
if ('HASH' eq $reftype) {
if (!keys %$_) {
$flat{join '.', @path} = $_;
}
push @types, 'h';
push @path, '';
} elsif ('ARRAY' eq $reftype) {
if (!@$_) {
$flat{join '.', @path} = $_;
}
push @types, 'a';
push @path, -1;
} else {
$reftype = '';
}
if ('HASH' eq $Data::Walk::type) {
if (defined $Data::Walk::key) {
# Value.
if (!ref $_) {
$flat{join '.', @path} = $_;
}
} elsif (!ref $_) {
# Key.
if (/\./) {
$path[-1] = 'INVALID';
} else {
$path[-1] = $_;
}
}
} elsif (!$reftype) {
$flat{join '.', @path} = $_;
}
};
walk {
wanted => $wanted,
postprocess => $postprocess,
}, $data;
return \%flat;
}
# Should better be called looks_like_archive.
my @archive_types = (
'tar',
'tar.gz', 'tgz',
'zip',
'tar.bz2', 'tbz',
'tar.xz', 'txz'
);
my $archive_re = join '|', map { quotemeta } @archive_types;
sub is_archive($) {
my ($path) = @_;
return if $path !~ /\.(?:$archive_re)$/i;
return 1;
}
sub archive_extender($) {
my ($path) = @_;
return if $path !~ /(\.(?:$archive_re))/i;
return lc $1;
}
sub collect_defaults($$) {
my ($path, $rules) = @_;
my $vars = {};
foreach my $rule (@$rules) {
my ($matcher, $values) = @$rule;
merge_data $vars, $values if $matcher->matchInclude($path);
}
return $vars;
}
sub canonical {
my ($obj) = @_;
local $Storable::canonical = 1;
return freeze $obj;
}
sub purify {
my ($data) = @_;
my $type = reftype $data;
die "only hashes and arrays supported"
if ($type ne 'HASH' && $type ne 'ARRAY');
my @stack = ([$type, []]);
my $preprocess = sub {
if ('HASH' eq $Data::Walk::type) {
push @stack, [HASH => []];
} else {
push @stack, [ARRAY => []];
}
return @_;
};
my $postprocess = sub {
my $item = pop @stack;
my ($type, $store) = @$item;
if ('HASH' eq $type) {
$store = {@$store};
}
my $current = $stack[-1]->[1];
push @$current, $store;
};
my $wanted = sub {
if (ref $_) {
my $reftype = reftype $_;
if ('HASH' eq $reftype || 'ARRAY' eq $reftype) {
return;
}
}
my $store = $stack[-1]->[1];
push @$store, "$_";
};
walk { wanted => $wanted, preprocess => $preprocess,
postprocess => $postprocess }, $data;
my $item = pop @stack;
$type = $item->[0];
return $item->[1]->[0];
}
# Maybe we want to remove the utf-8 flag from the returned yaml data again ...
sub safe_yaml_load {
my ($yaml) = @_;
return YAML::XS::Load($yaml);
}
sub escape_link {
my $link = shift;
$link = '' if empty $link;
return uri_escape_utf8 $link, $unsafe_for_links;
}
sub blength {
my ($scalar) = @_;
return length $scalar if !Encode::is_utf8($scalar);
Encode::_utf8_off($scalar);
my $blength = length $scalar;
Encode::_utf8_on($scalar);
return $blength;
}
sub qstrftime($;$$$) {
my ($format, $date, $lingua, $markup) = @_;
my ($open, $close) = $markup ? ("<$markup>", "</$markup>") : ("", "");
my %converters = (
de => sub { shift . '.' },
en => sub {
my ($mday) = @_;
my $last_digit = ($mday >= 11 && $mday <= 13) ? 0
: substr $mday, -1, 1;
if (1 == $last_digit) {
return "${mday}${open}st${close}";
} elsif (2 == $last_digit) {
return "${mday}${open}nd${close}";
} elsif (3 == $last_digit) {
return "${mday}${open}rd${close}";
} else {
return "${mday}${open}th${close}";
}
},
fr => sub {
my ($mday) = @_;
if (1 == $mday) {
return "${mday}${open}er${close}";
} else {
shift;
}
},
);
if (!defined $lingua) {
$lingua = POSIX::setlocale(POSIX::LC_TIME()) || '';
# FIXME! This will not work under Windows. But we can use a mapping
# tabel from Locale::Util.
}
$lingua = lc substr $lingua, 0, 2;
my $handler = $converters{$lingua} || sub { shift };
my @then = localtime $date;
my $mday = $then[3];
# The handler will probably never be called more than once. No need to
# cache the result.
$format =~ s/\%([#\%])/$1 eq '%' ? '%%' : $handler->($mday)/ge;
return POSIX::strftime($format, localtime $date);
}
sub tt2_args_merge($$$$) {
my ($global_args, $global_conf, $local_args, $local_conf) = @_;
my @args = @$global_args;
my %conf = %$global_conf;
foreach my $arg (@$local_args) {
if ($arg =~ /^-(.*)/) {
@args = grep { $_ ne $1 } @args;
} else {
push @args, $arg;
}
}
while (my ($key, $value) = each %$local_conf) {
$conf{$key} = $value;
}
return \@args, \%conf;
}
1;