#! /bin/false # Copyright (C) 2016-2018 Guido Flohr <guido.flohr@cantanea.com>, # all rights reserved. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. package Qgoda::Util; $Qgoda::Util::VERSION = 'v0.9.3'; use strict; use IO::File; use File::Path qw(make_path); use File::Basename qw(fileparse); use Locale::TextDomain qw(qgoda); use Scalar::Util qw(reftype looks_like_number); use Encode 2.12; use File::Find (); use Data::Walk 2.00; use Storable qw(freeze); use YAML::XS; use URI::Escape qw(uri_escape_utf8); use base 'Exporter'; use vars qw(@EXPORT_OK); @EXPORT_OK = qw(empty read_file write_file yaml_error front_matter lowercase expand_perl_format read_body merge_data interpolate normalize_path strip_suffix perl_identifier perl_class class2module slugify html_escape unmarkup globstar trim flatten2hash is_archive archive_extender collect_defaults canonical purify safe_yaml_load escape_link blength qstrftime tt2_args_merge); sub js_unescape($); sub tokenize($$); sub evaluate($$); sub lookup($$); sub _globstar($;$); my $unsafe_for_links = "^A-Za-z0-9\-\._~/"; sub empty(;$) { my ($what) = @_; $what = $_ if !@_; return if defined $what && length $what; return 1; } sub read_file($) { my ($filename) = @_; my $fh = IO::File->new; open $fh, "<", $filename or return; local $/; my $data = <$fh>; $fh->close; return $data; } sub front_matter($) { my ($filename) = @_; my $fh = IO::File->new; open $fh, "<", $filename or return; undef $!; my $first_line = <$fh>; return if empty $first_line; return if $first_line !~ /---[ \t]*\n$/o; my $front_matter = ''; while (1) { my $line = <$fh>; return if !defined $line; return $front_matter if $line =~ /---[ \t]*\n$/o; $front_matter .= $line; } return; } sub read_body($$) { my ($filename, $placeholder) = @_; my $fh = IO::File->new; open $fh, "<", $filename or return; undef $!; my $first_line = <$fh>; return if empty $first_line; return if $first_line !~ /---[ \t]*\n$/o; my $lines = 1; while (1) { ++$lines; my $line = <$fh>; return if !defined $line; last if $line =~ /---[ \t]*\n$/o; } local $/; my $front_matter = "$placeholder" x $lines; return $front_matter . <$fh>; } sub write_file($$) { my ($path, $data) = @_; my (undef, $directory) = fileparse $path; make_path $directory unless -e $directory; my $octets; if (Encode::is_utf8($data)) { my $handle_malformed = sub { my $replacement = sprintf "{{+%04X}}", shift; warn "malformed multi-byte sequence, search for '$replacement' in output file\n"; return $replacement; }; $octets = Encode::encode('UTF-8', $data, $handle_malformed); } else { $octets = $data; } open my $fh, ">", $path or return; $fh->print($octets) or return; $fh->close or return; return 1; } sub yaml_error { my ($filename, $error) = @_; my @lines = split /\n/, $error; pop @lines; return "$filename: " . join "\n", @lines; } sub lowercase($) { my ($str) = @_; return lc $str; } sub merge_data { my ($data, $overlay) = @_; # Return $overlay if it is of a different type than $data. my $equal_ref = sub { my ($x, $y) = @_; return if !ref $x; return if !ref $y; my $ref_x = reftype $x; my $ref_y = reftype $y; return $ref_x eq $ref_y; }; return $overlay if !$equal_ref->($overlay, $data); return $overlay if 'ARRAY' eq reftype $overlay; my $merger; $merger = sub { my ($d, $o) = @_; foreach my $key (keys %$d) { if (exists $o->{$key}) { if (!$equal_ref->($d->{$key}, $o->{$key})) { eval { $d->{$key} = $o->{$key}; }; } elsif (UNIVERSAL::isa($d->{$key}, 'HASH')) { $merger->($d->{$key}, $o->{$key}); } else { $d->{$key} = $o->{$key}; } } } foreach my $key (keys %$o) { if (!exists $d->{$key}) { $d->{$key} = $o->{$key}; } } }; $merger->($data, $overlay); return $data; } sub interpolate($$) { my ($string, $data) = @_; $data ||= {}; my $type = reftype $data; if ($type ne 'ARRAY' && $type ne 'HASH') { $type = 'HASH'; $data = {}; } my $result = ''; while ($string =~ s/^([^\{]*)\{//) { $result .= $1; my ($remainder, @tokens) = tokenize $string, $type; # Syntax errors can be handled in different ways. # You can handle it gracefully and either leave # everything uninterpolated, or you could replace the # faulty string with the emtpy string or you can throw an # exception. We just throw an exception. die "syntax error before: '$remainder'\n" if !@tokens; my $value = evaluate \@tokens, $data; $result .= $value if defined $value; $string = $remainder; } return $result . $string; } sub normalize_path($;$) { my ($dir, $trailing_slash) = @_; $dir =~ s{[\\/]+}{/}g; $dir =~ s{/$}{} unless $trailing_slash; return $dir; } sub strip_suffix($) { my ($filename) = @_; my @parts = split /\./, $filename; my @suffixes; while (@parts > 1) { last if $parts[-1] =~ /[^a-zA-Z0-9]/; unshift @suffixes, pop @parts } my $basename = join '.', @parts; return $basename, grep { /./ } @suffixes; } ############################################################################## # The methods below are not exported. ############################################################################## sub tokenize($$) { my ($string, $type) = @_; my @tokens; my $depth = 0; while (1) { $string =~ s/^[ \t\r\n]+//; last if !length $string; last if $string =~ s/^\}//; my $last = @tokens ? $tokens[-1]->[0] : '['; if ($last eq '.') { # Only variables are allowed but they are interpreted as # a quoted string. We will repair that later, however. return $string unless $string =~ s/^([^\[\]\}\.]+)//; push @tokens, ['v', $1]; } elsif ($last eq 'v' || $last eq ']') { # Only brackets or a dot are allowed. Everything else is a # syntax error. return $string unless $string =~ s/^([\[\]\.])//; if ('[' eq $1) { ++$depth; push @tokens, ['[' => '']; } elsif (']' eq $1) { --$depth; return "]$string" if $depth < 0; push @tokens, [']' => '']; } else { # A dot. push @tokens, ['.', '']; } } elsif ($last eq '[') { # At the beginning or after an opening bracket only quoted # strings are allowed. Everything but a quoted string is # treated as a variable. if ($string =~ s/^(["'])([^\\\1]*(?:\\.[^\\\1]*)*)\1//) { push @tokens, ['q', $2]; } elsif ($string =~ s/^([^\[\]\}\.]+)//) { push @tokens, ['v', $1]; } elsif (!@tokens && $string =~ s/^\[//) { # Special case. We want to allow starting an expression # with an opening bracket so that you can write something # like ["key with special characters"]. push @tokens, ['[', '']; } else { return $string; } } else { # The last token was a quoted string (because all other # possibilities are handled above. The only legal token after # a quoted string is the closing bracket. return $string unless $string =~ s/^]//; push @tokens, [']', '']; } } # Bracket not closed. return '}' if $depth; # We may have a trailing dot in our expression. We check that now # and change the type of "variables" following a dot to a quoted # string. # # We also must repair the type for "variables" that look like numbers # and are enclosed in angle brackets. Only in this case they are # treated like numbers. And numbers are the same as quoted strings # for our purposes. # If they are exactly between two brackets they are numbers, otherwise # we try them as variables. for (my $i = 0; $i < @tokens; ++$i) { if ('.' eq $tokens[$i]->[0]) { return $string if $i >= $#tokens; $tokens[++$i]->[0] = 'q'; } elsif ('[' eq $tokens[$i]->[0] && 'v' eq $tokens[$i + 1]->[0] && ']' eq $tokens[$i + 2]->[0] && $tokens[$i + 1]->[1] =~ /^[-+]?(?:0|[1-9][0-9]*)$/) { # Change the type to a quoted string. $tokens[$i + 1]->[0] = 'q'; # And shorten the loop again. $i = $i + 2; } } return $string, @tokens; } sub evaluate($$) { my ($tokens, $data) = @_; my $cursor = $data; while (@$tokens) { my $token = shift @$tokens; my ($toktype, $value) = @$token; if ('[' eq $toktype) { # We have to recurse. my $key = evaluate $tokens, $data; $cursor = lookup $cursor, $key; } elsif (']' eq $toktype) { return $cursor; } elsif ('.' eq $toktype) { $token = shift @$tokens; $cursor = lookup $cursor, $token->[1]; } elsif ('v' eq $toktype) { $cursor = lookup $cursor, $value; } elsif ('q' eq $toktype) { $cursor = $value; } else { die "unknown token type '$toktype'"; } } return $cursor; } sub lookup($$) { my ($data, $key) = @_; my $type = reftype $data; if ('HASH' eq $type) { return $data->{$key}; } elsif ('ARRAY' eq $type) { return $data->[$key]; } else { return; } } sub js_unescape($) { my ($string) = @_; my %escapes = ( "\n" => '', 0 => "\000", # Note that octal escapes are not supported! b => "\x08", f => "\x0c", n => "\x0a", r => "\x0d", t => "\x09", v => "\x0b", "'" => "'", '"' => '"', '\\' => '\\', ); $string =~ s/ \\ ( x[0-9a-fA-F]{2} | u[0-9a-fA-F]{4} | u\{[0-9a-fA-F]+\} | . ) / if (exists $escapes{$1}) { $escapes{$1} } elsif (1 == length $1) { $1; } elsif ('x' eq substr $1, 0, 1) { chr oct '0' . $1; } elsif ('u' eq substr $1, 0, 1) { if ('u{' eq substr $1, 0, 2) { my $code = substr $1, 0, 2; $code =~ s{^0+}{}; $code ||= '0'; chr oct '0x' . $code; } else { chr oct '0x' . substr $1, 1; } } /xegs; return $string; } sub perl_identifier($) { my ($name) = @_; return $name =~ /^[_a-zA-Z][_0-9a-zA-Z]*$/o; } sub perl_class($) { my ($name) = @_; return $name =~ /^[_a-zA-Z][_0-9a-zA-Z]*(?:::[_a-zA-Z][_0-9a-zA-Z]*)*$/o; } sub class2module($) { my ($class) = @_; $class =~ s{(?:::|')}{/}g; return $class . '.pm'; } sub slugify($;$) { my ($string, $locale) = @_; return '' if !defined $string; Encode::_utf8_on($string); require Text::Unidecode; my $slug = lc Text::Unidecode::unidecode($string); # We only allow alphanumerical characters, the dot, the hyphen and the underscore. # Everything else gets converted into hyphens, and sequences of hyphens # are condensed into one. $slug =~ s/[\x00-\x2c\x2f\x3a-\x5e\x60\x7b-\x7f]/-/g; $slug =~ s/--+/-/g; $slug =~ s/^-//; $slug =~ s/-$//; $slug = '-' if !length $slug; return $slug; } sub html_escape($) { my ($string) = @_; return '' if !defined $string; my %escapes = ( '"' => '"', "&" => '&', "'" => ''', "<" => '<', ">" => '>', ); $string =~ s/(["&'<>])/$escapes{$1}/gs; return $string; } sub unmarkup($) { my ($string) = @_; return '' if !defined $string; require HTML::Parser; my $escaped = ''; my $text_handler = sub { my ($string) = @_; $escaped .= $string; }; my $parser = HTML::Parser->new(api_version => 3, text_h => [$text_handler, 'text'], marked_sections => 1); $parser->parse($string); $parser->eof; return $escaped; } sub trim($) { my ($string) = @_; $string =~ s{^[ \x09-\x0d]+}{}; $string =~ s{[ \x09-\x0d]+$}{}; return $string; } sub flatten2hash { my ($data) = @_; my @path; my @types; my %flat; my $postprocess = sub { # Remove the last path component. pop @path; pop @types; }; # The wanted function for Data::Walk. my $wanted = sub { ++$path[-1] if 'a' eq $types[-1]; my $reftype = reftype $_ || ''; if ('HASH' eq $reftype) { if (!keys %$_) { $flat{join '.', @path} = $_; } push @types, 'h'; push @path, ''; } elsif ('ARRAY' eq $reftype) { if (!@$_) { $flat{join '.', @path} = $_; } push @types, 'a'; push @path, -1; } else { $reftype = ''; } if ('HASH' eq $Data::Walk::type) { if (defined $Data::Walk::key) { # Value. if (!ref $_) { $flat{join '.', @path} = $_; } } elsif (!ref $_) { # Key. if (/\./) { $path[-1] = 'INVALID'; } else { $path[-1] = $_; } } } elsif (!$reftype) { $flat{join '.', @path} = $_; } }; walk { wanted => $wanted, postprocess => $postprocess, }, $data; return \%flat; } # Should better be called looks_like_archive. my @archive_types = ( 'tar', 'tar.gz', 'tgz', 'zip', 'tar.bz2', 'tbz', 'tar.xz', 'txz' ); my $archive_re = join '|', map { quotemeta } @archive_types; sub is_archive($) { my ($path) = @_; return if $path !~ /\.(?:$archive_re)$/i; return 1; } sub archive_extender($) { my ($path) = @_; return if $path !~ /(\.(?:$archive_re))/i; return lc $1; } sub collect_defaults($$) { my ($path, $rules) = @_; my $vars = {}; foreach my $rule (@$rules) { my ($matcher, $values) = @$rule; merge_data $vars, $values if $matcher->matchInclude($path); } return $vars; } sub canonical { my ($obj) = @_; local $Storable::canonical = 1; return freeze $obj; } sub purify { my ($data) = @_; my $type = reftype $data; die "only hashes and arrays supported" if ($type ne 'HASH' && $type ne 'ARRAY'); my @stack = ([$type, []]); my $preprocess = sub { if ('HASH' eq $Data::Walk::type) { push @stack, [HASH => []]; } else { push @stack, [ARRAY => []]; } return @_; }; my $postprocess = sub { my $item = pop @stack; my ($type, $store) = @$item; if ('HASH' eq $type) { $store = {@$store}; } my $current = $stack[-1]->[1]; push @$current, $store; }; my $wanted = sub { if (ref $_) { my $reftype = reftype $_; if ('HASH' eq $reftype || 'ARRAY' eq $reftype) { return; } } my $store = $stack[-1]->[1]; push @$store, "$_"; }; walk { wanted => $wanted, preprocess => $preprocess, postprocess => $postprocess }, $data; my $item = pop @stack; $type = $item->[0]; return $item->[1]->[0]; } # Maybe we want to remove the utf-8 flag from the returned yaml data again ... sub safe_yaml_load { my ($yaml) = @_; return YAML::XS::Load($yaml); } sub escape_link { my $link = shift; $link = '' if empty $link; return uri_escape_utf8 $link, $unsafe_for_links; } sub blength { my ($scalar) = @_; return length $scalar if !Encode::is_utf8($scalar); Encode::_utf8_off($scalar); my $blength = length $scalar; Encode::_utf8_on($scalar); return $blength; } sub qstrftime($;$$$) { my ($format, $date, $lingua, $markup) = @_; my ($open, $close) = $markup ? ("<$markup>", "</$markup>") : ("", ""); my %converters = ( de => sub { shift . '.' }, en => sub { my ($mday) = @_; my $last_digit = ($mday >= 11 && $mday <= 13) ? 0 : substr $mday, -1, 1; if (1 == $last_digit) { return "${mday}${open}st${close}"; } elsif (2 == $last_digit) { return "${mday}${open}nd${close}"; } elsif (3 == $last_digit) { return "${mday}${open}rd${close}"; } else { return "${mday}${open}th${close}"; } }, fr => sub { my ($mday) = @_; if (1 == $mday) { return "${mday}${open}er${close}"; } else { shift; } }, ); if (!defined $lingua) { $lingua = POSIX::setlocale(POSIX::LC_TIME()) || ''; # FIXME! This will not work under Windows. But we can use a mapping # tabel from Locale::Util. } $lingua = lc substr $lingua, 0, 2; my $handler = $converters{$lingua} || sub { shift }; my @then = localtime $date; my $mday = $then[3]; # The handler will probably never be called more than once. No need to # cache the result. $format =~ s/\%([#\%])/$1 eq '%' ? '%%' : $handler->($mday)/ge; return POSIX::strftime($format, localtime $date); } sub tt2_args_merge($$$$) { my ($global_args, $global_conf, $local_args, $local_conf) = @_; my @args = @$global_args; my %conf = %$global_conf; foreach my $arg (@$local_args) { if ($arg =~ /^-(.*)/) { @args = grep { $_ ne $1 } @args; } else { push @args, $arg; } } while (my ($key, $value) = each %$local_conf) { $conf{$key} = $value; } return \@args, \%conf; } 1;