#!perl
#
# compile_encoding
#
# Copyright (C) 1998 Clark Cooper.  All rights reserved.
# Copyright (C) 2007-2008, 2014 Steve Hay.  All rights reserved.
#
# This script is free software; you can redistribute it and/or modify it under
# the same terms as Perl itself, i.e. under the terms of either the GNU General
# Public License or the Artistic License, as specified in the LICENCE file.
#

use 5.008001;

use strict;
use warnings;

my $Usage=<<'End_of_Usage;';
Usage is:
   compile_encoding [-h] [-o output_file] input_file

Compiles the input XML encmap file into a binary encoding file usable
by XML::Parser.

  -h                    Print this message.

  -o output_file        Put compiled binary into given output file. By
                        default, a file that has the same basename as
                        the input file, but with an extension of .enc
                        is the output.
End_of_Usage;

package Pfxmap;
use fields qw(min max map explen);

sub new {
  my $class = shift;
  no strict 'refs'; ## no critic (TestingAndDebugging::ProhibitNoStrict)
  my $pfxmap = fields::new($class);

  while (@_) {
    my $key = shift;
    $pfxmap->{$key} = shift;
  }

  $pfxmap;
}

package main;

use XML::Encoding;
use integer;

################################################################
# See the encoding.h file in the top level XML::Encoding directory
# to see the format of generated file

my $magic = 0xfeebface;
my $namelength = 40;

my $ofile;

while (defined($ARGV[0]) and $ARGV[0] =~ /^-/) {
  my $opt = shift;

  if ($opt eq '-o') {
    $ofile = shift;
  }
  elsif ($opt eq '-h') {
    print $Usage;
    exit 1;
  }
  else {
    $! = 2;
    die "Unrecognized option: $opt\n$Usage";
  }
}

my $infile = shift;

if (not defined($infile)) {
  $! = 3;
  die "Encmap XML file not provided\n$Usage";
}

unless (defined($ofile)) {
  my $base = $infile;
  $base =~ s!^.*/!!;

  if ($base =~ /(.*)\.xml$/i) {
    $base = $1;
  }

  $ofile = $base . '.enc';
}

# Do initializations

my @firstbyte;
$#firstbyte = 255;

my $pfxcount = 0;
my $totcount = 0;
my @stack = ();
my $pfxlenref;

my $currmap = new Pfxmap(min => 255, max => 0, map => \@firstbyte);

my $p = new XML::Encoding(ErrorContext  => 2,
                          ExpatRequired => 1,
                          PushPrefixFcn => \&push_prefix,
                          PopPrefixFcn  => \&pop_prefix,
                          RangeSetFcn   => \&range_set
                          );

my $name = $p->parsefile($infile);


if (length($name) > $namelength) {
  $! = 4;
  die "Encoding name too long (> $namelength)\n";
}

my @prefixes;
my $maplen = 0;
my $pflen = 0;

if ($pfxcount) {
  push(@prefixes, $currmap);
  $currmap->{map} = [];
  $maplen = $totcount + $currmap->{max} - $currmap->{min} + 1;
  $pflen = $pfxcount + 1;
}

my $i;
for ($i = 0; $i < 256; $i++) {
  if (defined($firstbyte[$i])) {
    if ($pfxcount) {
      $currmap->{map}->[$i] = $firstbyte[$i];
      $firstbyte[$i] = - ($firstbyte[$i]->{explen} + 1)
        if ref($firstbyte[$i]);
    }
  }
  else {
    $firstbyte[$i] = $i < 128 ? $i : -1;
  }
}

my $enc;
open($enc, '>', $ofile) or do {
  $! = 5;
  die "Couldn't open $ofile for writing:\n$!\n";
};
binmode($enc);

#Note the use of network order packings

print $enc pack("Na${namelength}nnN256",
                $magic, $name, $pflen, $maplen, @firstbyte);

my @map = ();
my $head = 0;

while (@prefixes) {
  my $pfxmap = shift @prefixes;
  $head++;

  my $len = $pfxmap->{max} - $pfxmap->{min} + 1;
  my $mapstart = @map;
  my $ispfx = '';
  vec($ispfx, 255, 1) = 0;
  my $ischar = '';
  vec($ischar, 255, 1) = 0;

  for ($i = $pfxmap->{min}; $i <= $pfxmap->{max}; $i++) {
    my $entry = $pfxmap->{map}->[$i];

    if (defined($entry)) {
      if (ref($entry)) {
        my $pfxent = $entry;
        $entry = $head + @prefixes;
        push(@prefixes, $pfxent);
        vec($ispfx, $i, 1) = 1;
      }
      else {
        vec($ischar, $i, 1) = 1;
      }
    }
    else {
      $entry = 0xFFFF;
    }

    push(@map, $entry);
  }

  print $enc pack('CCn', $pfxmap->{min}, $len, $mapstart), $ispfx, $ischar;
}

if (@map) {
  my $packlist = 'n' . int(@map);
  print $enc pack($packlist, @map);
}

close($enc);

exit 0;

################
## End main
################

sub push_prefix {
  my ($byte) = @_;

  return "Prefix too long"
    if (@stack >= 3);

  return "Different lengths for same first byte"
    if (defined($pfxlenref)
        and defined($$pfxlenref)
        and $$pfxlenref < @stack);

  my $pfxmap = $currmap->{map}->[$byte];

  if (defined($pfxmap)) {
    return "Prefix already mapped to a character"
      unless ref($pfxmap);

    # Remove what we've already added in for this prefix so we don't
    # count it twice

    $totcount -= $pfxmap->{max} - $pfxmap->{min} + 1;
  }
  else {
    $pfxmap = new Pfxmap(min => 255, max => 0, map => []);
    $currmap->{map}->[$byte] = $pfxmap;
  }

  unless (@stack) {
    $pfxlenref = \$pfxmap->{explen};
  }

  $currmap->{min} = $byte
    if $byte < $currmap->{min};

  $currmap->{max} = $byte
    if $byte > $currmap->{max};

  $pfxcount++;

  push(@stack, $currmap);
  $currmap = $pfxmap;
  return;
}  # End push_prefix

sub pop_prefix {
  return "Attempt to pop un-pushed prefix"
    unless (@stack);

  my $count = $currmap->{max} - $currmap->{min} + 1;

  return "Empty prefix not allowed"
    unless $count > 0;

  $totcount += $count;

  $currmap = pop(@stack);
  $pfxlenref = undef
    unless @stack;
  return;
}  # End pop_prefix

sub range_set {
  my ($byte, $uni, $len) = @_;
  my $limit = $byte + $len;

  return "Range too long"
    if $limit > 256;

  if (defined($pfxlenref)) {
    if (defined($$pfxlenref)) {
      return "Different for same 1st byte"
        unless $$pfxlenref == @stack;
    }
    else {
      $$pfxlenref = @stack;
    }
  }

  my $i;
  for ($i = $byte; $i < $limit; $i++, $uni++) {
    return "Byte already mapped"
      if defined($currmap->{map}->[$i]);

    $currmap->{map}->[$i] = $uni;
  }

  $currmap->{min} = $byte
    if $byte < $currmap->{min};

  $currmap->{max} = $limit - 1
    if $limit >= $currmap->{max};

  return;
}  # End range_set

__END__

=head1 NAME

compile_encoding - compile XML encmap into a binary encoded file for XML::Parser

=head1 SYNOPSIS

    compile_encoding [-h] [-o <output_file>] <input_file>

=head1 DESCRIPTION

B<compile_encoding> compiles an input XML encmap file into a binary encoded file
usable by L<XML::Parser|XML::Parser(3pm)>.

=head1 ARGUMENTS

=over 4

=item E<lt>input_fileE<gt>

The XML encmap file to compile.

=back

=head1 OPTIONS

=over 4

=item B<-o E<lt>output_fileE<gt>>

Put compiled binary into given output file.  By default, a file that has the
same basename as the input file, but with an extension of F<.enc> is output.

=item B<-h>

Print usage information.

=back

=head1 EXIT STATUS

    0   The script exited normally.

    1   The script exited after printing the help.

    2   Invalid command-line arguments.

    >2  An error occurred.

=head1 KNOWN BUGS

I<None>.

=head1 SEE ALSO

L<make_encmap(1)>,
L<XML::Encoding(3pm)>,
L<XML::Parser(3pm)>.

=head1 AUTHOR

Clark Cooper E<lt>L<coopercc@netheaven.com|mailto:coopercc@netheaven.com>E<gt>.

Steve Hay E<lt>L<shay@cpan.org|mailto:shay@cpan.org>E<gt> is now maintaining
XML::Encoding as of version 2.00.

This manual page was written by Daniel Leidert
E<lt>L<daniel.leidert@wgdd.de|mailto:daniel.leidert@wgdd.de>E<gt> for the Debian
project (but may be used by others).

=head1 COPYRIGHT

Copyright (C) 1998 Clark Cooper.  All rights reserved.
Copyright (C) 2007-2008, 2014 Steve Hay.  All rights reserved.

=head1 LICENCE

This script is free software; you can redistribute it and/or modify it under the
same terms as Perl itself, i.e. under the terms of either the GNU General Public
License or the Artistic License, as specified in the F<LICENCE> file.

=head1 VERSION

Version 2.11

=head1 DATE

08 Dec 2020

=head1 HISTORY

See the F<Changes> file.

=cut

# Tell Emacs that this is really a perl script
# Local Variables:
# mode:perl
# End: