# -*- mode: Perl -*-
# /=====================================================================\ #
# |  utf8.def                                                           | #
# | Implementation for LaTeXML                                          | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;

#**********************************************************************
# Override the LaTeX distribution's implementation.
# (1) I don't know if we want(need) to get into reassembling bytes to decode utf8.
# (2) why bother?
#**********************************************************************
# Undo disabling all the upper half plane (ascii) chars
foreach my $code ((0 .. 8), 0xB, (0xE .. 0x1E), (128 .. 255)) {
  my $char = pack('C', $code);
  AssignCatcode($char, CC_OTHER); }
AssignValue(INPUT_ENCODING => undef);

# And set the Mouth to simply pass-thru the utf8.
AssignValue(PERL_INPUT_ENCODING => 'UTF8');

# Also add the uc and lc code definitions:
our @PRECOMPUTED_UC_LC = (
  ["\x{00C1}",   193,  "\x{00E1}",   225],
  ["\x{00C2}",   194,  "\x{00E2}",   226],
  ["\x{00C3}",   195,  "\x{00E3}",   227],
  ["\x{00100}",  256,  "\x{00101}",  257],
  ["\x{00102}",  258,  "\x{00103}",  259],
  ["\x{00226}",  550,  "\x{00227}",  551],
  ["\x{00C4}",   196,  "\x{00E4}",   228],
  ["\x{001EA2}", 7842, "\x{001EA3}", 7843],
  ["\x{00C5}",   197,  "\x{00E5}",   229],
  ["\x{001CD}",  461,  "\x{001CE}",  462],
  ["\x{00200}",  512,  "\x{00201}",  513],
  ["\x{00202}",  514,  "\x{00203}",  515],
  ["\x{001EA0}", 7840, "\x{001EA1}", 7841],
  ["\x{001E00}", 7680, "\x{001E01}", 7681],
  ["\x{00104}",  260,  "\x{00105}",  261],
  ["\x{00C0}",   192,  "\x{00E0}",   224],
  ["\x{00C1}",   193,  "\x{00E1}",   225],
  ["\x{001E02}", 7682, "\x{001E03}", 7683],
  ["\x{001E04}", 7684, "\x{001E05}", 7685],
  ["\x{001E06}", 7686, "\x{001E07}", 7687],
  ["\x{00106}",  262,  "\x{00107}",  263],
  ["\x{00108}",  264,  "\x{00109}",  265],
  ["\x{0010A}",  266,  "\x{0010B}",  267],
  ["\x{0010C}",  268,  "\x{0010D}",  269],
  ["\x{00C7}",   199,  "\x{00E7}",   231],
  ["\x{00106}",  262,  "\x{00107}",  263],
  ["\x{001E0A}", 7690, "\x{001E0B}", 7691],
  ["\x{0010E}",  270,  "\x{0010F}",  271],
  ["\x{001E0C}", 7692, "\x{001E0D}", 7693],
  ["\x{001E10}", 7696, "\x{001E11}", 7697],
  ["\x{001E12}", 7698, "\x{001E13}", 7699],
  ["\x{001E0E}", 7694, "\x{001E0F}", 7695],
  ["\x{00C8}",   200,  "\x{00E8}",   232],
  ["\x{00C9}",   201,  "\x{00E9}",   233],
  ["\x{00CA}",   202,  "\x{00EA}",   234],
  ["\x{001EBC}", 7868, "\x{001EBD}", 7869],
  ["\x{00112}",  274,  "\x{00113}",  275],
  ["\x{00114}",  276,  "\x{00115}",  277],
  ["\x{00116}",  278,  "\x{00117}",  279],
  ["\x{00CB}",   203,  "\x{00EB}",   235],
  ["\x{001EBA}", 7866, "\x{001EBB}", 7867],
  ["\x{0011A}",  282,  "\x{0011B}",  283],
  ["\x{00204}",  516,  "\x{00205}",  517],
  ["\x{00206}",  518,  "\x{00207}",  519],
  ["\x{001EB8}", 7864, "\x{001EB9}", 7865],
  ["\x{00228}",  552,  "\x{00229}",  553],
  ["\x{00118}",  280,  "\x{00119}",  281],
  ["\x{001E18}", 7704, "\x{001E19}", 7705],
  ["\x{001E1A}", 7706, "\x{001E1B}", 7707],
  ["\x{00C8}",   200,  "\x{00E8}",   232],
  ["\x{00C9}",   201,  "\x{00E9}",   233],
  ["\x{001E1E}", 7710, "\x{001E1F}", 7711],
  ["\x{001F4}",  500,  "\x{001F5}",  501],
  ["\x{0011C}",  284,  "\x{0011D}",  285],
  ["\x{001E20}", 7712, "\x{001E21}", 7713],
  ["\x{0011E}",  286,  "\x{0011F}",  287],
  ["\x{00120}",  288,  "\x{00121}",  289],
  ["\x{001E6}",  486,  "\x{001E7}",  487],
  ["\x{00122}",  290,  "\x{00123}",  291],
  ["\x{001F4}",  500,  "\x{001F5}",  501],
  ["\x{00124}",  292,  "\x{00125}",  293],
  ["\x{001E22}", 7714, "\x{001E23}", 7715],
  ["\x{001E26}", 7718, "\x{001E27}", 7719],
  ["\x{0021E}",  542,  "\x{0021F}",  543],
  ["\x{001E24}", 7716, "\x{001E25}", 7717],
  ["\x{001E28}", 7720, "\x{001E29}", 7721],
  ["\x{001E2A}", 7722, "\x{001E2B}", 7723],
  ["\x{00CC}",   204,  "\x{00EC}",   236],
  ["\x{00CD}",   205,  "\x{00ED}",   237],
  ["\x{00CE}",   206,  "\x{00EE}",   238],
  ["\x{00128}",  296,  "\x{00129}",  297],
  ["\x{0012A}",  298,  "\x{0012B}",  299],
  ["\x{0012C}",  300,  "\x{0012D}",  301],
  ["\x{00CF}",   207,  "\x{00EF}",   239],
  ["\x{001EC8}", 7880, "\x{001EC9}", 7881],
  ["\x{001CF}",  463,  "\x{001D0}",  464],
  ["\x{00208}",  520,  "\x{00209}",  521],
  ["\x{0020A}",  522,  "\x{0020B}",  523],
  ["\x{001ECA}", 7882, "\x{001ECB}", 7883],
  ["\x{0012E}",  302,  "\x{0012F}",  303],
  ["\x{001E2C}", 7724, "\x{001E2D}", 7725],
  ["\x{00CC}",   204,  "\x{00EC}",   236],
  ["\x{00CD}",   205,  "\x{00ED}",   237],
  ["\x{001E2E}", 7726, "\x{001E2F}", 7727],
  ["\x{00134}",  308,  "\x{00135}",  309],
  ["\x{001E30}", 7728, "\x{001E31}", 7729],
  ["\x{001E8}",  488,  "\x{001E9}",  489],
  ["\x{001E32}", 7730, "\x{001E33}", 7731],
  ["\x{00136}",  310,  "\x{00137}",  311],
  ["\x{001E34}", 7732, "\x{001E35}", 7733],
  ["\x{001E30}", 7728, "\x{001E31}", 7729],
  ["\x{00139}",  313,  "\x{0013A}",  314],
  ["\x{0013D}",  317,  "\x{0013E}",  318],
  ["\x{001E36}", 7734, "\x{001E37}", 7735],
  ["\x{0013B}",  315,  "\x{0013C}",  316],
  ["\x{001E3C}", 7740, "\x{001E3D}", 7741],
  ["\x{001E3A}", 7738, "\x{001E3B}", 7739],
  ["\x{00139}",  313,  "\x{0013A}",  314],
  ["\x{001E3E}", 7742, "\x{001E3F}", 7743],
  ["\x{001E40}", 7744, "\x{001E41}", 7745],
  ["\x{001E42}", 7746, "\x{001E43}", 7747],
  ["\x{001E3E}", 7742, "\x{001E3F}", 7743],
  ["\x{001F8}",  504,  "\x{001F9}",  505],
  ["\x{00143}",  323,  "\x{00144}",  324],
  ["\x{00D1}",   209,  "\x{00F1}",   241],
  ["\x{001E44}", 7748, "\x{001E45}", 7749],
  ["\x{00147}",  327,  "\x{00148}",  328],
  ["\x{001E46}", 7750, "\x{001E47}", 7751],
  ["\x{00145}",  325,  "\x{00146}",  326],
  ["\x{001E4A}", 7754, "\x{001E4B}", 7755],
  ["\x{001E48}", 7752, "\x{001E49}", 7753],
  ["\x{001F8}",  504,  "\x{001F9}",  505],
  ["\x{00143}",  323,  "\x{00144}",  324],
  ["\x{00D2}",   210,  "\x{00F2}",   242],
  ["\x{00D3}",   211,  "\x{00F3}",   243],
  ["\x{00D4}",   212,  "\x{00F4}",   244],
  ["\x{00D5}",   213,  "\x{00F5}",   245],
  ["\x{0014C}",  332,  "\x{0014D}",  333],
  ["\x{0014E}",  334,  "\x{0014F}",  335],
  ["\x{0022E}",  558,  "\x{0022F}",  559],
  ["\x{00D6}",   214,  "\x{00F6}",   246],
  ["\x{001ECE}", 7886, "\x{001ECF}", 7887],
  ["\x{00150}",  336,  "\x{00151}",  337],
  ["\x{001D1}",  465,  "\x{001D2}",  466],
  ["\x{0020C}",  524,  "\x{0020D}",  525],
  ["\x{0020E}",  526,  "\x{0020F}",  527],
  ["\x{001A0}",  416,  "\x{001A1}",  417],
  ["\x{001ECC}", 7884, "\x{001ECD}", 7885],
  ["\x{001EA}",  490,  "\x{001EB}",  491],
  ["\x{00D2}",   210,  "\x{00F2}",   242],
  ["\x{00D3}",   211,  "\x{00F3}",   243],
  ["\x{001E54}", 7764, "\x{001E55}", 7765],
  ["\x{001E56}", 7766, "\x{001E57}", 7767],
  ["\x{001E54}", 7764, "\x{001E55}", 7765],
  ["\x{00154}",  340,  "\x{00155}",  341],
  ["\x{001E58}", 7768, "\x{001E59}", 7769],
  ["\x{00158}",  344,  "\x{00159}",  345],
  ["\x{00210}",  528,  "\x{00211}",  529],
  ["\x{00212}",  530,  "\x{00213}",  531],
  ["\x{001E5A}", 7770, "\x{001E5B}", 7771],
  ["\x{00156}",  342,  "\x{00157}",  343],
  ["\x{001E5E}", 7774, "\x{001E5F}", 7775],
  ["\x{00154}",  340,  "\x{00155}",  341],
  ["\x{0015A}",  346,  "\x{0015B}",  347],
  ["\x{0015C}",  348,  "\x{0015D}",  349],
  ["\x{001E60}", 7776, "\x{001E61}", 7777],
  ["\x{00160}",  352,  "\x{00161}",  353],
  ["\x{001E62}", 7778, "\x{001E63}", 7779],
  ["\x{00218}",  536,  "\x{00219}",  537],
  ["\x{0015E}",  350,  "\x{0015F}",  351],
  ["\x{0015A}",  346,  "\x{0015B}",  347],
  ["\x{001E6A}", 7786, "\x{001E6B}", 7787],
  ["\x{00164}",  356,  "\x{00165}",  357],
  ["\x{001E6C}", 7788, "\x{001E6D}", 7789],
  ["\x{0021A}",  538,  "\x{0021B}",  539],
  ["\x{00162}",  354,  "\x{00163}",  355],
  ["\x{001E70}", 7792, "\x{001E71}", 7793],
  ["\x{001E6E}", 7790, "\x{001E6F}", 7791],
  ["\x{00D9}",   217,  "\x{00F9}",   249],
  ["\x{00DA}",   218,  "\x{00FA}",   250],
  ["\x{00DB}",   219,  "\x{00FB}",   251],
  ["\x{00168}",  360,  "\x{00169}",  361],
  ["\x{0016A}",  362,  "\x{0016B}",  363],
  ["\x{0016C}",  364,  "\x{0016D}",  365],
  ["\x{00DC}",   220,  "\x{00FC}",   252],
  ["\x{001EE6}", 7910, "\x{001EE7}", 7911],
  ["\x{0016E}",  366,  "\x{0016F}",  367],
  ["\x{00170}",  368,  "\x{00171}",  369],
  ["\x{001D3}",  467,  "\x{001D4}",  468],
  ["\x{00214}",  532,  "\x{00215}",  533],
  ["\x{00216}",  534,  "\x{00217}",  535],
  ["\x{001AF}",  431,  "\x{001B0}",  432],
  ["\x{001EE4}", 7908, "\x{001EE5}", 7909],
  ["\x{001E72}", 7794, "\x{001E73}", 7795],
  ["\x{00172}",  370,  "\x{00173}",  371],
  ["\x{001E76}", 7798, "\x{001E77}", 7799],
  ["\x{001E74}", 7796, "\x{001E75}", 7797],
  ["\x{00D9}",   217,  "\x{00F9}",   249],
  ["\x{00DA}",   218,  "\x{00FA}",   250],
  ["\x{001D7}",  471,  "\x{001D8}",  472],
  ["\x{001E7C}", 7804, "\x{001E7D}", 7805],
  ["\x{001E7E}", 7806, "\x{001E7F}", 7807],
  ["\x{001E80}", 7808, "\x{001E81}", 7809],
  ["\x{001E82}", 7810, "\x{001E83}", 7811],
  ["\x{00174}",  372,  "\x{00175}",  373],
  ["\x{001E86}", 7814, "\x{001E87}", 7815],
  ["\x{001E84}", 7812, "\x{001E85}", 7813],
  ["\x{001E88}", 7816, "\x{001E89}", 7817],
  ["\x{001E80}", 7808, "\x{001E81}", 7809],
  ["\x{001E82}", 7810, "\x{001E83}", 7811],
  ["\x{001E8A}", 7818, "\x{001E8B}", 7819],
  ["\x{001E8C}", 7820, "\x{001E8D}", 7821],
  ["\x{001EF2}", 7922, "\x{001EF3}", 7923],
  ["\x{00DD}",   221,  "\x{00FD}",   253],
  ["\x{00176}",  374,  "\x{00177}",  375],
  ["\x{001EF8}", 7928, "\x{001EF9}", 7929],
  ["\x{00232}",  562,  "\x{00233}",  563],
  ["\x{001E8E}", 7822, "\x{001E8F}", 7823],
  ["\x{00178}",  376,  "\x{00FF}",   255],
  ["\x{001EF6}", 7926, "\x{001EF7}", 7927],
  ["\x{001EF4}", 7924, "\x{001EF5}", 7925],
  ["\x{001EF2}", 7922, "\x{001EF3}", 7923],
  ["\x{00DD}",   221,  "\x{00FD}",   253],
  ["\x{00179}",  377,  "\x{0017A}",  378],
  ["\x{001E90}", 7824, "\x{001E91}", 7825],
  ["\x{0017B}",  379,  "\x{0017C}",  380],
  ["\x{0017D}",  381,  "\x{0017E}",  382],
  ["\x{001E92}", 7826, "\x{001E93}", 7827],
  ["\x{001E94}", 7828, "\x{001E95}", 7829],
  ["\x{00179}",  377,  "\x{0017A}",  378]
);

for my $row (@PRECOMPUTED_UC_LC) {
  my ($upper, $upper_code, $lower, $lower_code) = @$row;
  $STATE->assignLCcode($upper, $lower_code, 'global');
  $STATE->assignUCcode($upper, $upper_code, 'global');
  $STATE->assignLCcode($lower, $lower_code, 'global');
  $STATE->assignUCcode($lower, $upper_code, 'global');
}

# Obtained via:
#
# our @SUPPORTED_ACCENTS = (
#   "\N{COMBINING GRAVE ACCENT}",
#   "\N{COMBINING ACUTE ACCENT}",
#   "\N{COMBINING CIRCUMFLEX ACCENT}",
#   "\N{COMBINING TILDE}",
#   "\N{COMBINING MACRON}",
#   "\N{COMBINING OVERLINE}",
#   "\N{COMBINING BREVE}",
#   "\N{COMBINING DOT ABOVE}",
#   "\N{COMBINING DIAERESIS}",
#   "\N{COMBINING HOOK ABOVE}",
#   "\N{COMBINING RING ABOVE}",
#   "\N{COMBINING DOUBLE ACUTE ACCENT}",
#   "\N{COMBINING CARON}",
#   "\N{COMBINING VERTICAL LINE ABOVE}",
#   "\N{COMBINING DOUBLE VERTICAL LINE ABOVE}",
#   "\N{COMBINING DOUBLE GRAVE ACCENT}",
#   "\N{COMBINING CANDRABINDU}",
#   "\N{COMBINING INVERTED BREVE}",
#   "\N{COMBINING TURNED COMMA ABOVE}",
#   "\N{COMBINING COMMA ABOVE}",
#   "\N{COMBINING REVERSED COMMA ABOVE}",
#   "\N{COMBINING COMMA ABOVE RIGHT}",
#   "\N{COMBINING GRAVE ACCENT BELOW}",
#   "\N{COMBINING ACUTE ACCENT BELOW}",
#   "\N{COMBINING LEFT TACK BELOW}",
#   "\N{COMBINING RIGHT TACK BELOW}",
#   "\N{COMBINING LEFT ANGLE ABOVE}",
#   "\N{COMBINING HORN}",
#   "\N{COMBINING LEFT HALF RING BELOW}",
#   "\N{COMBINING UP TACK BELOW}",
#   "\N{COMBINING DOWN TACK BELOW}",
#   "\N{COMBINING PLUS SIGN BELOW}",
#   "\N{COMBINING MINUS SIGN BELOW}",
#   "\N{COMBINING PALATALIZED HOOK BELOW}",
#   "\N{COMBINING RETROFLEX HOOK BELOW}",
#   "\N{COMBINING DOT BELOW}",
#   "\N{COMBINING DIAERESIS BELOW}",
#   "\N{COMBINING RING BELOW}",
#   "\N{COMBINING COMMA BELOW}",
#   "\N{COMBINING CEDILLA}",
#   "\N{COMBINING OGONEK}",
#   "\N{COMBINING VERTICAL LINE BELOW}",
#   "\N{COMBINING BRIDGE BELOW}",
#   "\N{COMBINING INVERTED DOUBLE ARCH BELOW}",
#   "\N{COMBINING CARON BELOW}",
#   "\N{COMBINING CIRCUMFLEX ACCENT BELOW}",
#   "\N{COMBINING BREVE BELOW}",
#   "\N{COMBINING INVERTED BREVE BELOW}",
#   "\N{COMBINING TILDE BELOW}",
#   "\N{COMBINING MACRON BELOW}",
#   "\N{COMBINING LOW LINE}",
#   "\N{COMBINING DOUBLE LOW LINE}",
#   "\N{COMBINING TILDE OVERLAY}",
#   "\N{COMBINING SHORT STROKE OVERLAY}",
#   "\N{COMBINING LONG STROKE OVERLAY}",
#   "\N{COMBINING SHORT SOLIDUS OVERLAY}",
#   "\N{COMBINING LONG SOLIDUS OVERLAY}",
#   "\N{COMBINING RIGHT HALF RING BELOW}",
#   "\N{COMBINING INVERTED BRIDGE BELOW}",
#   "\N{COMBINING SQUARE BELOW}",
#   "\N{COMBINING SEAGULL BELOW}",
#   "\N{COMBINING X ABOVE}",
#   "\N{COMBINING VERTICAL TILDE}",
#   "\N{COMBINING DOUBLE OVERLINE}",
#   "\N{COMBINING GRAVE TONE MARK}",
#   "\N{COMBINING ACUTE TONE MARK}",
#   "\N{COMBINING GREEK PERISPOMENI}",
#   "\N{COMBINING GREEK KORONIS}",
#   "\N{COMBINING GREEK DIALYTIKA TONOS}",
#   "\N{COMBINING GREEK YPOGEGRAMMENI}",
#   "\N{COMBINING BRIDGE ABOVE}",
#   "\N{COMBINING EQUALS SIGN BELOW}",
#   "\N{COMBINING DOUBLE VERTICAL LINE BELOW}",
#   "\N{COMBINING LEFT ANGLE BELOW}",
#   "\N{COMBINING NOT TILDE ABOVE}",
#   "\N{COMBINING HOMOTHETIC ABOVE}",
#   "\N{COMBINING ALMOST EQUAL TO ABOVE}",
#   "\N{COMBINING LEFT RIGHT ARROW BELOW}",
#   "\N{COMBINING UPWARDS ARROW BELOW}",
#   "\N{COMBINING GRAPHEME JOINER}",
#   "\N{COMBINING RIGHT ARROWHEAD ABOVE}",
#   "\N{COMBINING LEFT HALF RING ABOVE}",
#   "\N{COMBINING FERMATA}",
#   "\N{COMBINING X BELOW}",
#   "\N{COMBINING LEFT ARROWHEAD BELOW}",
#   "\N{COMBINING RIGHT ARROWHEAD BELOW}",
#   "\N{COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW}",
#   "\N{COMBINING RIGHT HALF RING ABOVE}",
#   "\N{COMBINING DOT ABOVE RIGHT}",
#   "\N{COMBINING ASTERISK BELOW}",
#   "\N{COMBINING DOUBLE RING BELOW}",
#   "\N{COMBINING ZIGZAG ABOVE}",
#   "\N{COMBINING DOUBLE BREVE BELOW}",
#   "\N{COMBINING DOUBLE BREVE}",
#   "\N{COMBINING DOUBLE MACRON}",
#   "\N{COMBINING DOUBLE MACRON BELOW}",
#   "\N{COMBINING DOUBLE TILDE}",
#   "\N{COMBINING DOUBLE INVERTED BREVE}",
#   "\N{COMBINING DOUBLE RIGHTWARDS ARROW BELOW}",
#   "\N{COMBINING LATIN SMALL LETTER A}",
#   "\N{COMBINING LATIN SMALL LETTER E}",
#   "\N{COMBINING LATIN SMALL LETTER I}",
#   "\N{COMBINING LATIN SMALL LETTER O}",
#   "\N{COMBINING LATIN SMALL LETTER U}",
#   "\N{COMBINING LATIN SMALL LETTER C}",
#   "\N{COMBINING LATIN SMALL LETTER D}",
#   "\N{COMBINING LATIN SMALL LETTER H}",
#   "\N{COMBINING LATIN SMALL LETTER M}",
#   "\N{COMBINING LATIN SMALL LETTER R}",
#   "\N{COMBINING LATIN SMALL LETTER T}",
#   "\N{COMBINING LATIN SMALL LETTER V}",
#   "\N{COMBINING LATIN SMALL LETTER X}");
#
# use Unicode::Normalize;
# foreach my $letter (ord('A') .. ord('Z')) {
#   foreach my $accent(@SUPPORTED_ACCENTS) {
#     my $upper = chr($letter);
#     $upper = NFC("$upper$accent");
#     my $lower = chr($letter + 0x20);
#     $lower = NFC("$lower$accent");
#     # Only handle standard characters that have combined normal forms (Perl NFC)
#     if (length($upper) > 1 || length($lower) > 1) { next; }
#     my $upper_code = ord($upper);
#     my $lower_code = ord($lower);
#     my $upper_hex = sprintf("00%X", $upper_code);
#     my $lower_hex = sprintf("00%X", $lower_code);
#     print "[\"\\x{$upper_hex}\", $upper_code, \"\\x{$lower_hex}\", $lower_code],\n";
#  } }
#
#**********************************************************************
1;