#   Title:      TTFWIDTH.PL
#   Author:     M. Hosken
#   Description: Write out character width, etc. information from a TTF file
#                in either SF or CSV format.
# 1.2.0     25-MAR-1998     Tidy up to package with the rest

require 'getopts.pl';
do Getopts("qsuzp:");

if (!defined $ARGV[0])
    {
    die 'TTFWIDTH [-q] [-s] [-u] [-z] [-p plat.spec] <infile> [<outfile>]

v1.2.0, 25-Mar-1998  (c) Martin_Hosken@sil.org
    
Generates character size information for each character to either Standard
Format or Comma Separated Variables format.  Essential for sorting out those
typographical variants.
    -u  unicode as key rather than calculated 8-bit code
    -q  suppress advisory output
    -s  output in standard format
    -p  plat.spec set platform and specific ids of character map
    -z  debug
';
    }

# print "TTFWIDTH v1.1: Freeware, (c) M. Hosken\n" if (!defined $opt_q);

open(INFILE, "$ARGV[0]") || die("Unable to open \"$ARGV[0]\" for reading");
binmode INFILE;
if (defined $ARGV[1])
    {
    open(OUTFILE, ">$ARGV[1]") || die "Unable to open \"$ARGV[1]\" for writing";
    }
else
    {
    open(OUTFILE, ">&STDOUT") || die "Can't dup STDOUT";
    }

# for the most part, we don't need all the information in the font, so it
# isn't parsed.  Secondly, no checks are made that all the essential tables
# are necessary.  Trivial, but then the tables are essential and the font
# would not work without them, so they will be there (famous last words).

# first read the header and directory
read(INFILE, $head, 12) == 12 || die "reading header";
($ver, $numtab) = unpack("Nn", $head);
# print "ver = $ver\nnumtab = $numtab\n";
for ($i = 0; $i < $numtab; $i++)
    {
    read(INFILE, $tab, 16) == 16 || die "reading table directory";
    ($name, $offset) = unpack("a4x4N", $tab);
#    printf "name = \"$name\", offset = %X\n", $offset;
    $dir{$name} = $offset;
    }

# trawl the world to get all those essential numbers from the various strange
# tables that they are spread around.

# process the "head" table
seek(INFILE, $dir{"head"}, 0);
read(INFILE, $h_data, 54) == 54 || die "reading head table";
($h_em, $h_longloc) = unpack("x18nx30n", $h_data);

# process the "maxp" table
seek(INFILE, $dir{"maxp"}, 0);
read(INFILE, $m_data, 6);
($m_num) = unpack("x4n", $m_data);
print "The em box is $h_em units square.\nThere are $m_num glyphs\n"
    if (!defined $opt_q);

# process the "hhea" table
seek(INFILE, $dir{"hhea"}, 0);
read(INFILE, $h_data, 36) == 36 || die "reading hhea table";
($h_numh) = unpack("x34n", $h_data);
undef $h_data;

# process the "cmap" table
# contains the mappings of unicode to glyph number
seek(INFILE, $dir{"cmap"}, 0);
read(INFILE, $head, 4) == 4 || die "reading cmap header";
($c_ver, $c_n) = unpack("nn", $head);
if (defined $opt_p && $opt_p =~ m/^([0-9]+)[.]([0-9]+)/o)
    {
    $c_tid = $1;
    $c_tenc = $2;
    }
else
    {
    $c_tid = 3;
    $c_tenc = 1;
    }
for ($i = 0; $i < $c_n; $i++)
    {
    read(INFILE, $c_info, 8) == 8 || die "reading cmap dir entry";
    ($c_id, $c_enc, $c_offset) = unpack("nnN", $c_info);
    last if ($c_id == $c_tid);       # found the encoding we want
    }
if ($i >= $c_n)
    {
    print STDERR "Can't find required encoding, using Unicode instead.\n";
    $opt_u = 1;
    }
if (!defined $opt_q)
    {
    print "font mapping Microsoft id = $c_id, encoding = $c_enc\n";
    print "    (encoding => " . ($c_enc == 1 ? "UGL coding"
            : "unknown or symbol") . ")\n";
    }

if ($c_enc == 1)
    {
# Microsoft UGL coding (8-bit to unicode mapping table)
    (@c_enc) = (32 .. 126, 0, 0, 0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020,
                0x2021, 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0, 0, 0,
                0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x02dc,
                0x2122, 0x0161, 0x203a, 0x0153, 0, 0, 0x0178,
                160 .. 255);
    }
# print it all out as comma seperated variables or standard format
if (!defined $opt_s)
    {
#    print OUTFILE "\"Em box\",\"$h_em\"\n";
    if (!defined $opt_u)
        {
        print OUTFILE "Code, Char, ";
        }

    printf OUTFILE "%s, " x 8 . "%s\n",
        "Unicode", "Glyph", "AdvWidth", "LSdBearing",
        "Xmin", "Xmax", "Ymin", "Ymax", "XCentre";
    }

$big = 512;
if (defined $opt_u && $m_num > $big)
    {
    $low = 0; $c_count = $big;
    while ($c_count >= $big)
        {
        &getdata;
        &printdata;
        undef @c_uni;
        undef @l_offsets;
        undef @h_adw;
        undef @h_lsb;
        undef @g_xmin;
        undef @g_xmax;
        undef @g_ymin;
        undef @g_ymax;
        undef @map;
        }
    }
else
    {
    $low = -1;
    &getdata;
    &printdata;
    }
close(OUTFILE);
close(INFILE);


sub printdata
{
for ($i = (defined $opt_u ? $[ : 32); $i <= (defined $opt_u ? $#c_uni : 255);
        $i++)
    {
    if (defined $opt_u)
        {
        next if ($c_uni[$i] == 0);
        $j = $map[$i];
        }
    else
        {
        $j = ($c_enc == 1) ? $c_map[$c_enc[$i - 32]] : $c_map[$i];
        next if ($j == 0);
        }
    $o_cnt = $h_lsb[$j] + ($g_xmax[$j] - $g_xmin[$j]) / 2;
    $o_centre = $h_adw[$j] - $o_cnt;
    $o_centre = -$o_centre if ($o_cnt < 0);
    if (defined $opt_s)
        {
        if (!defined $opt_u)
            {
            printf OUTFILE "\\code %d\n\\char %c\n\\uni 0x%04x\n\\glyph %d\n"
                . "\\adw %d\n\\lsb %d\n",
                $i, $i, ($c_enc == 1) ? $c_enc[$i] : $i + 0xf000, $i,
                $h_adw[$j], $h_lsb[$j];
            }
        else
            {
            printf OUTFILE "\\code 0x%04x\n\\glyph %d\n\\adw %d\n\\lsb %d \n",
                $c_uni[$i], $i, $h_adw[$j], $h_lsb[$j];
            }
        printf OUTFILE "\\xmin %d\n\\xmax %d\n\\ymin %d\n\\ymax %d\n\\xcent %d\n\n",
            $g_xmin[$j], $g_xmax[$j], $g_ymin[$j], $g_ymax[$j], $o_centre;
        }
    else
        {
        if (!defined $opt_u)
            {
            if ($i == 34)
                { $o_s = "\"" x 4; }
            elsif ($i == 44)
                { $o_s = "\",\""; }
            else
                { $o_s = sprintf("%c", $i); }
            printf OUTFILE "%d,%s,0x%04X,%d,",
                $i, $o_s, ($c_enc == 1) ? $c_enc[$i - 32] : $i + 0xf000, $j;
            }
        else
            {
            printf OUTFILE "0x%04X,%d,", $c_uni[$i], $i;
            }
        printf OUTFILE "%d,%d,%d,%d,%d,%d,%d\n",
            $h_adw[$j], $h_lsb[$j], $g_xmin[$j],
            $g_xmax[$j], $g_ymin[$j], $g_ymax[$j], $o_centre;
        }
    }
}

sub getdata
{
seek(INFILE, $dir{"cmap"} + $c_offset, 0);
read(INFILE, $c_head, 6) == 6 || die "reading cmap table header";
($c_fmt, $c_len, $c_ver) = unpack("nnn", $c_head);
die "Incorrect encoding format $c_fmt, should be 4" if ($c_fmt != 4);
read(INFILE, $c_head, 8) == 8 || die "reading cmap table header part 2";
($c_segs) = unpack("n", $c_head);
$c_segs = $c_segs / 2;
# now read the real meat of the table
read(INFILE, $c_data, 2 * $c_segs) == 2 * $c_segs || die "reading cmap_end data";
(@c_ends) = unpack("n" x $c_segs, $c_data);
read(INFILE, $c_data, 2 * $c_segs + 2) == 2 * $c_segs + 2
        || die "reading cmap_start data";
(@c_starts) = unpack("xx" . "n" x $c_segs, $c_data);
read(INFILE, $c_data, 2 * $c_segs) == 2 * $c_segs || die "reading cmap_deltas";
(@c_deltas) = unpack("n" x $c_segs, $c_data);
read(INFILE, $c_data, 2 * $c_segs) == 2 * $c_segs || die "reading cmap_ranges";
(@c_ranges) = unpack("n" x $c_segs, $c_data);
undef $c_data;
$num = read(INFILE, $c_idarray, $c_len - $c_segs * 8 - 16);
(@c_idarray) = unpack("n" x ($num / 2), $c_idarray);
undef $c_idarray;
# convert range type information into per-code information.  Creates mapping
# table (@c_enc) to convert unicode to glyph
$c_count = 0;
cmap:
for ($i = 0; $i < $c_segs - 1; $i++)
    {
    for ($j = $c_starts[$i]; $j <= $c_ends[$i]; $j++)
        {
        if ($low == -1 || $j > $low)
            {
                        # calculate glyph number
            if ($c_ranges[$i] != 0)
                {
                $index = $c_idarray[($c_ranges[$i]/2 + $j -
                        $c_starts[$i] - $c_segs + $i)];
                }
            else
                {
                $index = $j + $c_deltas[$i] - ($c_deltas[$i] > 32767 ? 65536:0);
                    # can't handle 0xf000 directly as an array index, it thinks
                    # it's negative :-(
                }
            if (!defined $opt_u)
                {
                $c_map[$j - ($c_enc == 1 ? 0 : 0xf000)] = $index;
                $map[$index] = $index;
                }
            else
                {
                next if ($index == 0);
                $c_count++;
                $map[$index] = $c_count;
                }
            $c_uni[$index] = $j;
            if ($low > -1 && $c_count >= $big)
                {
                $low = $j;
                last cmap;
                }
            }
        }
    }
print STDERR "$c_count " if (defined $opt_z);
undef @c_deltas;
undef @c_ranges;
undef @c_starts;
undef @c_ends;
print STDERR "1" if (defined $opt_z);
# generate the locations of each glyph

# process the "loca" table
seek(INFILE, $dir{"loca"}, 0);
read(INFILE, $l_data, ($h_longloc == 1 ? 4 : 2) * ($m_num + 1));
(@l_offs) = unpack(($h_longloc == 1 ? "N" : "n") x ($m_num + 1), $l_data);
undef $l_data;
$lold = -1;
for ($i = 0; $i <= $m_num; $i++)
    {
    if ($c_uni[$i])
        {
        $l_offsets[$map[$i]] = $l_offs[$i];
        $l_offsets[$map[$i]] = -1
                if ($i != $m_num && $l_offs[$i] == $l_offs[$i+1]);
        $l_offsets[$map[$i]] *= 2
                if ($h_longloc == 0 && $l_offs[$i] != -1);
        }
    }
undef @l_offs;
print STDERR "2" if (defined $opt_z);
# get the horizontal metrics (advance width and left side bearing)

# process the "hmtx" table
seek(INFILE, $dir{"hmtx"}, 0);
read(INFILE, $h_data, 4 * $h_numh) == 4 * $h_numh || die "reading hmtx table";
(@h_temp) = unpack("n" x (2 * $h_numh), $h_data);
undef $h_data;
for ($i = 0; $i < $h_numh; $i++)
    {
    $h_ladw = $h_temp[$i * 2];
    if ($c_uni[$i])
        {
        $h_adw[$map[$i]] = $h_ladw;
        $h_lsb[$map[$i]] = $h_temp[$i * 2 + 1];
        }
    }
if ($h_numh != $m_num)      # for monospaced fonts
    {
    read(INFILE, $h_data, 2 * ($m_num - $h_numh));
    @h_temp = unpack("n" x ($m_num - $h_numh), $h_data);
    for ($i = $h_numh; $i < $m_num; $i++)
        {
        if ($c_uni[$i])
            {
            $h_adw[$map[$i]] = $h_ladw;
            $h_lsb[$map[$i]] = $h_temp[$i - $h_numh];
            }
        }
    }
for ($i = 0; $i <= $m_num; $i++)    # convert unsigned to signed (any easier
                                    # way?)
    {
    if ($c_uni[$i])
        {
        $j = $map[$i];
        $h_adw[$j] = $h_adw[$j] - ($h_adw[$j] > 32768 ? 65536 : 0);
        $h_lsb[$j] = $h_lsb[$j] - ($h_lsb[$j] > 32768 ? 65536 : 0);
        }
    }
undef @h_temp;
print STDERR "3" if (defined $opt_z);

# process the "glyf" table to get the character bounding box dimensions
for ($i = 0; $i <= $m_num; $i++)
    {
    $j = $map[$i];
    if ($l_offsets[$j] != -1 && $c_uni[$i])
        {
        seek(INFILE, $dir{"glyf"} + $l_offsets[$j], 0);
        read(INFILE, $g_data, 10) == 10 || die "reading glyph $i";
        ($g_xmin[$j], $g_ymin[$j], $g_xmax[$j], $g_ymax[$j])
                = unpack("x2nnnn", $g_data);
        $g_xmin[$j] = $g_xmin[$j] - ($g_xmin[$j] > 32768 ? 65536 : 0);
        $g_ymin[$j] = $g_ymin[$j] - ($g_ymin[$j] > 32768 ? 65536 : 0);
        $g_xmax[$j] = $g_xmax[$j] - ($g_xmax[$j] > 32768 ? 65536 : 0);
        $g_ymax[$j] = $g_ymax[$j] - ($g_ymax[$j] > 32768 ? 65536 : 0);
        }
    }
print STDERR "4\n" if (defined $opt_z);
}