From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

#!perl
#
# compile_encoding
#
# Copyright (C) 1998 Clark Cooper. All rights reserved.
# Copyright (C) 2007-2008, 2014 Steve Hay. All rights reserved.
#
# This script is free software; you can redistribute it and/or modify it under
# the same terms as Perl itself, i.e. under the terms of either the GNU General
# Public License or the Artistic License, as specified in the LICENCE file.
#
use 5.008001;
use strict;
my $Usage=<<'End_of_Usage;';
Usage is:
compile_encoding [-h] [-o output_file] input_file
Compiles the input XML encmap file into a binary encoding file usable
by XML::Parser.
-h Print this message.
-o output_file Put compiled binary into given output file. By
default, a file that has the same basename as
the input file, but with an extension of .enc
is the output.
End_of_Usage;
package Pfxmap;
use fields qw(min max map explen);
sub new {
my $class = shift;
no strict 'refs'; ## no critic (TestingAndDebugging::ProhibitNoStrict)
my $pfxmap = fields::new($class);
while (@_) {
my $key = shift;
$pfxmap->{$key} = shift;
}
$pfxmap;
}
package main;
use integer;
################################################################
# See the encoding.h file in the top level XML::Encoding directory
# to see the format of generated file
my $magic = 0xfeebface;
my $namelength = 40;
my $ofile;
while (defined($ARGV[0]) and $ARGV[0] =~ /^-/) {
my $opt = shift;
if ($opt eq '-o') {
$ofile = shift;
}
elsif ($opt eq '-h') {
print $Usage;
exit 1;
}
else {
$! = 2;
die "Unrecognized option: $opt\n$Usage";
}
}
my $infile = shift;
if (not defined($infile)) {
$! = 3;
die "Encmap XML file not provided\n$Usage";
}
unless (defined($ofile)) {
my $base = $infile;
$base =~ s!^.*/!!;
if ($base =~ /(.*)\.xml$/i) {
$base = $1;
}
$ofile = $base . '.enc';
}
# Do initializations
my @firstbyte;
$#firstbyte = 255;
my $pfxcount = 0;
my $totcount = 0;
my @stack = ();
my $pfxlenref;
my $currmap = new Pfxmap(min => 255, max => 0, map => \@firstbyte);
my $p = new XML::Encoding(ErrorContext => 2,
ExpatRequired => 1,
PushPrefixFcn => \&push_prefix,
PopPrefixFcn => \&pop_prefix,
RangeSetFcn => \&range_set
);
my $name = $p->parsefile($infile);
if (length($name) > $namelength) {
$! = 4;
die "Encoding name too long (> $namelength)\n";
}
my @prefixes;
my $maplen = 0;
my $pflen = 0;
if ($pfxcount) {
push(@prefixes, $currmap);
$currmap->{map} = [];
$maplen = $totcount + $currmap->{max} - $currmap->{min} + 1;
$pflen = $pfxcount + 1;
}
my $i;
for ($i = 0; $i < 256; $i++) {
if (defined($firstbyte[$i])) {
if ($pfxcount) {
$currmap->{map}->[$i] = $firstbyte[$i];
$firstbyte[$i] = - ($firstbyte[$i]->{explen} + 1)
if ref($firstbyte[$i]);
}
}
else {
$firstbyte[$i] = $i < 128 ? $i : -1;
}
}
my $enc;
open($enc, '>', $ofile) or do {
$! = 5;
die "Couldn't open $ofile for writing:\n$!\n";
};
binmode($enc);
#Note the use of network order packings
print $enc pack("Na${namelength}nnN256",
$magic, $name, $pflen, $maplen, @firstbyte);
my @map = ();
my $head = 0;
while (@prefixes) {
my $pfxmap = shift @prefixes;
$head++;
my $len = $pfxmap->{max} - $pfxmap->{min} + 1;
my $mapstart = @map;
my $ispfx = '';
vec($ispfx, 255, 1) = 0;
my $ischar = '';
vec($ischar, 255, 1) = 0;
for ($i = $pfxmap->{min}; $i <= $pfxmap->{max}; $i++) {
my $entry = $pfxmap->{map}->[$i];
if (defined($entry)) {
if (ref($entry)) {
my $pfxent = $entry;
$entry = $head + @prefixes;
push(@prefixes, $pfxent);
vec($ispfx, $i, 1) = 1;
}
else {
vec($ischar, $i, 1) = 1;
}
}
else {
$entry = 0xFFFF;
}
push(@map, $entry);
}
print $enc pack('CCn', $pfxmap->{min}, $len, $mapstart), $ispfx, $ischar;
}
if (@map) {
my $packlist = 'n' . int(@map);
print $enc pack($packlist, @map);
}
close($enc);
exit 0;
################
## End main
################
sub push_prefix {
my ($byte) = @_;
return "Prefix too long"
if (@stack >= 3);
return "Different lengths for same first byte"
if (defined($pfxlenref)
and defined($$pfxlenref)
and $$pfxlenref < @stack);
my $pfxmap = $currmap->{map}->[$byte];
if (defined($pfxmap)) {
return "Prefix already mapped to a character"
unless ref($pfxmap);
# Remove what we've already added in for this prefix so we don't
# count it twice
$totcount -= $pfxmap->{max} - $pfxmap->{min} + 1;
}
else {
$pfxmap = new Pfxmap(min => 255, max => 0, map => []);
$currmap->{map}->[$byte] = $pfxmap;
}
unless (@stack) {
$pfxlenref = \$pfxmap->{explen};
}
$currmap->{min} = $byte
if $byte < $currmap->{min};
$currmap->{max} = $byte
if $byte > $currmap->{max};
$pfxcount++;
push(@stack, $currmap);
$currmap = $pfxmap;
return;
} # End push_prefix
sub pop_prefix {
return "Attempt to pop un-pushed prefix"
unless (@stack);
my $count = $currmap->{max} - $currmap->{min} + 1;
return "Empty prefix not allowed"
unless $count > 0;
$totcount += $count;
$currmap = pop(@stack);
$pfxlenref = undef
unless @stack;
return;
} # End pop_prefix
sub range_set {
my ($byte, $uni, $len) = @_;
my $limit = $byte + $len;
return "Range too long"
if $limit > 256;
if (defined($pfxlenref)) {
if (defined($$pfxlenref)) {
return "Different for same 1st byte"
unless $$pfxlenref == @stack;
}
else {
$$pfxlenref = @stack;
}
}
my $i;
for ($i = $byte; $i < $limit; $i++, $uni++) {
return "Byte already mapped"
if defined($currmap->{map}->[$i]);
$currmap->{map}->[$i] = $uni;
}
$currmap->{min} = $byte
if $byte < $currmap->{min};
$currmap->{max} = $limit - 1
if $limit >= $currmap->{max};
return;
} # End range_set
__END__
=head1 NAME
compile_encoding - compile XML encmap into a binary encoded file for XML::Parser
=head1 SYNOPSIS
compile_encoding [-h] [-o <output_file>] <input_file>
=head1 DESCRIPTION
B<compile_encoding> compiles an input XML encmap file into a binary encoded file
usable by L<XML::Parser|XML::Parser(3pm)>.
=head1 ARGUMENTS
=over 4
=item E<lt>input_fileE<gt>
The XML encmap file to compile.
=back
=head1 OPTIONS
=over 4
=item B<-o E<lt>output_fileE<gt>>
Put compiled binary into given output file. By default, a file that has the
same basename as the input file, but with an extension of F<.enc> is output.
=item B<-h>
Print usage information.
=back
=head1 EXIT STATUS
0 The script exited normally.
1 The script exited after printing the help.
2 Invalid command-line arguments.
>2 An error occurred.
=head1 KNOWN BUGS
I<None>.
=head1 SEE ALSO
L<make_encmap(1)>,
L<XML::Encoding(3pm)>,
L<XML::Parser(3pm)>.
=head1 AUTHOR
Clark Cooper E<lt>L<coopercc@netheaven.com|mailto:coopercc@netheaven.com>E<gt>.
Steve Hay E<lt>L<shay@cpan.org|mailto:shay@cpan.org>E<gt> is now maintaining
XML::Encoding as of version 2.00.
This manual page was written by Daniel Leidert
E<lt>L<daniel.leidert@wgdd.de|mailto:daniel.leidert@wgdd.de>E<gt> for the Debian
project (but may be used by others).
=head1 COPYRIGHT
Copyright (C) 1998 Clark Cooper. All rights reserved.
Copyright (C) 2007-2008, 2014 Steve Hay. All rights reserved.
=head1 LICENCE
This script is free software; you can redistribute it and/or modify it under the
same terms as Perl itself, i.e. under the terms of either the GNU General Public
License or the Artistic License, as specified in the F<LICENCE> file.
=head1 VERSION
Version 2.11
=head1 DATE
08 Dec 2020
=head1 HISTORY
See the F<Changes> file.
=cut
# Tell Emacs that this is really a perl script
# Local Variables:
# mode:perl
# End: