# -*- mode: Perl -*-
# /=====================================================================\ #
# | utf8.def | #
# | Implementation for LaTeXML | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;
#**********************************************************************
# Override the LaTeX distribution's implementation.
# (1) I don't know if we want(need) to get into reassembling bytes to decode utf8.
# (2) why bother?
#**********************************************************************
# Undo disabling all the upper half plane (ascii) chars
foreach my $code ((0 .. 8), 0xB, (0xE .. 0x1E), (128 .. 255)) {
my $char = pack('C', $code);
AssignCatcode($char, CC_OTHER); }
AssignValue(INPUT_ENCODING => undef);
# And set the Mouth to simply pass-thru the utf8.
AssignValue(PERL_INPUT_ENCODING => 'UTF8');
# Also add the uc and lc code definitions:
our @PRECOMPUTED_UC_LC = (
["\x{00C1}", 193, "\x{00E1}", 225],
["\x{00C2}", 194, "\x{00E2}", 226],
["\x{00C3}", 195, "\x{00E3}", 227],
["\x{00100}", 256, "\x{00101}", 257],
["\x{00102}", 258, "\x{00103}", 259],
["\x{00226}", 550, "\x{00227}", 551],
["\x{00C4}", 196, "\x{00E4}", 228],
["\x{001EA2}", 7842, "\x{001EA3}", 7843],
["\x{00C5}", 197, "\x{00E5}", 229],
["\x{001CD}", 461, "\x{001CE}", 462],
["\x{00200}", 512, "\x{00201}", 513],
["\x{00202}", 514, "\x{00203}", 515],
["\x{001EA0}", 7840, "\x{001EA1}", 7841],
["\x{001E00}", 7680, "\x{001E01}", 7681],
["\x{00104}", 260, "\x{00105}", 261],
["\x{00C0}", 192, "\x{00E0}", 224],
["\x{00C1}", 193, "\x{00E1}", 225],
["\x{001E02}", 7682, "\x{001E03}", 7683],
["\x{001E04}", 7684, "\x{001E05}", 7685],
["\x{001E06}", 7686, "\x{001E07}", 7687],
["\x{00106}", 262, "\x{00107}", 263],
["\x{00108}", 264, "\x{00109}", 265],
["\x{0010A}", 266, "\x{0010B}", 267],
["\x{0010C}", 268, "\x{0010D}", 269],
["\x{00C7}", 199, "\x{00E7}", 231],
["\x{00106}", 262, "\x{00107}", 263],
["\x{001E0A}", 7690, "\x{001E0B}", 7691],
["\x{0010E}", 270, "\x{0010F}", 271],
["\x{001E0C}", 7692, "\x{001E0D}", 7693],
["\x{001E10}", 7696, "\x{001E11}", 7697],
["\x{001E12}", 7698, "\x{001E13}", 7699],
["\x{001E0E}", 7694, "\x{001E0F}", 7695],
["\x{00C8}", 200, "\x{00E8}", 232],
["\x{00C9}", 201, "\x{00E9}", 233],
["\x{00CA}", 202, "\x{00EA}", 234],
["\x{001EBC}", 7868, "\x{001EBD}", 7869],
["\x{00112}", 274, "\x{00113}", 275],
["\x{00114}", 276, "\x{00115}", 277],
["\x{00116}", 278, "\x{00117}", 279],
["\x{00CB}", 203, "\x{00EB}", 235],
["\x{001EBA}", 7866, "\x{001EBB}", 7867],
["\x{0011A}", 282, "\x{0011B}", 283],
["\x{00204}", 516, "\x{00205}", 517],
["\x{00206}", 518, "\x{00207}", 519],
["\x{001EB8}", 7864, "\x{001EB9}", 7865],
["\x{00228}", 552, "\x{00229}", 553],
["\x{00118}", 280, "\x{00119}", 281],
["\x{001E18}", 7704, "\x{001E19}", 7705],
["\x{001E1A}", 7706, "\x{001E1B}", 7707],
["\x{00C8}", 200, "\x{00E8}", 232],
["\x{00C9}", 201, "\x{00E9}", 233],
["\x{001E1E}", 7710, "\x{001E1F}", 7711],
["\x{001F4}", 500, "\x{001F5}", 501],
["\x{0011C}", 284, "\x{0011D}", 285],
["\x{001E20}", 7712, "\x{001E21}", 7713],
["\x{0011E}", 286, "\x{0011F}", 287],
["\x{00120}", 288, "\x{00121}", 289],
["\x{001E6}", 486, "\x{001E7}", 487],
["\x{00122}", 290, "\x{00123}", 291],
["\x{001F4}", 500, "\x{001F5}", 501],
["\x{00124}", 292, "\x{00125}", 293],
["\x{001E22}", 7714, "\x{001E23}", 7715],
["\x{001E26}", 7718, "\x{001E27}", 7719],
["\x{0021E}", 542, "\x{0021F}", 543],
["\x{001E24}", 7716, "\x{001E25}", 7717],
["\x{001E28}", 7720, "\x{001E29}", 7721],
["\x{001E2A}", 7722, "\x{001E2B}", 7723],
["\x{00CC}", 204, "\x{00EC}", 236],
["\x{00CD}", 205, "\x{00ED}", 237],
["\x{00CE}", 206, "\x{00EE}", 238],
["\x{00128}", 296, "\x{00129}", 297],
["\x{0012A}", 298, "\x{0012B}", 299],
["\x{0012C}", 300, "\x{0012D}", 301],
["\x{00CF}", 207, "\x{00EF}", 239],
["\x{001EC8}", 7880, "\x{001EC9}", 7881],
["\x{001CF}", 463, "\x{001D0}", 464],
["\x{00208}", 520, "\x{00209}", 521],
["\x{0020A}", 522, "\x{0020B}", 523],
["\x{001ECA}", 7882, "\x{001ECB}", 7883],
["\x{0012E}", 302, "\x{0012F}", 303],
["\x{001E2C}", 7724, "\x{001E2D}", 7725],
["\x{00CC}", 204, "\x{00EC}", 236],
["\x{00CD}", 205, "\x{00ED}", 237],
["\x{001E2E}", 7726, "\x{001E2F}", 7727],
["\x{00134}", 308, "\x{00135}", 309],
["\x{001E30}", 7728, "\x{001E31}", 7729],
["\x{001E8}", 488, "\x{001E9}", 489],
["\x{001E32}", 7730, "\x{001E33}", 7731],
["\x{00136}", 310, "\x{00137}", 311],
["\x{001E34}", 7732, "\x{001E35}", 7733],
["\x{001E30}", 7728, "\x{001E31}", 7729],
["\x{00139}", 313, "\x{0013A}", 314],
["\x{0013D}", 317, "\x{0013E}", 318],
["\x{001E36}", 7734, "\x{001E37}", 7735],
["\x{0013B}", 315, "\x{0013C}", 316],
["\x{001E3C}", 7740, "\x{001E3D}", 7741],
["\x{001E3A}", 7738, "\x{001E3B}", 7739],
["\x{00139}", 313, "\x{0013A}", 314],
["\x{001E3E}", 7742, "\x{001E3F}", 7743],
["\x{001E40}", 7744, "\x{001E41}", 7745],
["\x{001E42}", 7746, "\x{001E43}", 7747],
["\x{001E3E}", 7742, "\x{001E3F}", 7743],
["\x{001F8}", 504, "\x{001F9}", 505],
["\x{00143}", 323, "\x{00144}", 324],
["\x{00D1}", 209, "\x{00F1}", 241],
["\x{001E44}", 7748, "\x{001E45}", 7749],
["\x{00147}", 327, "\x{00148}", 328],
["\x{001E46}", 7750, "\x{001E47}", 7751],
["\x{00145}", 325, "\x{00146}", 326],
["\x{001E4A}", 7754, "\x{001E4B}", 7755],
["\x{001E48}", 7752, "\x{001E49}", 7753],
["\x{001F8}", 504, "\x{001F9}", 505],
["\x{00143}", 323, "\x{00144}", 324],
["\x{00D2}", 210, "\x{00F2}", 242],
["\x{00D3}", 211, "\x{00F3}", 243],
["\x{00D4}", 212, "\x{00F4}", 244],
["\x{00D5}", 213, "\x{00F5}", 245],
["\x{0014C}", 332, "\x{0014D}", 333],
["\x{0014E}", 334, "\x{0014F}", 335],
["\x{0022E}", 558, "\x{0022F}", 559],
["\x{00D6}", 214, "\x{00F6}", 246],
["\x{001ECE}", 7886, "\x{001ECF}", 7887],
["\x{00150}", 336, "\x{00151}", 337],
["\x{001D1}", 465, "\x{001D2}", 466],
["\x{0020C}", 524, "\x{0020D}", 525],
["\x{0020E}", 526, "\x{0020F}", 527],
["\x{001A0}", 416, "\x{001A1}", 417],
["\x{001ECC}", 7884, "\x{001ECD}", 7885],
["\x{001EA}", 490, "\x{001EB}", 491],
["\x{00D2}", 210, "\x{00F2}", 242],
["\x{00D3}", 211, "\x{00F3}", 243],
["\x{001E54}", 7764, "\x{001E55}", 7765],
["\x{001E56}", 7766, "\x{001E57}", 7767],
["\x{001E54}", 7764, "\x{001E55}", 7765],
["\x{00154}", 340, "\x{00155}", 341],
["\x{001E58}", 7768, "\x{001E59}", 7769],
["\x{00158}", 344, "\x{00159}", 345],
["\x{00210}", 528, "\x{00211}", 529],
["\x{00212}", 530, "\x{00213}", 531],
["\x{001E5A}", 7770, "\x{001E5B}", 7771],
["\x{00156}", 342, "\x{00157}", 343],
["\x{001E5E}", 7774, "\x{001E5F}", 7775],
["\x{00154}", 340, "\x{00155}", 341],
["\x{0015A}", 346, "\x{0015B}", 347],
["\x{0015C}", 348, "\x{0015D}", 349],
["\x{001E60}", 7776, "\x{001E61}", 7777],
["\x{00160}", 352, "\x{00161}", 353],
["\x{001E62}", 7778, "\x{001E63}", 7779],
["\x{00218}", 536, "\x{00219}", 537],
["\x{0015E}", 350, "\x{0015F}", 351],
["\x{0015A}", 346, "\x{0015B}", 347],
["\x{001E6A}", 7786, "\x{001E6B}", 7787],
["\x{00164}", 356, "\x{00165}", 357],
["\x{001E6C}", 7788, "\x{001E6D}", 7789],
["\x{0021A}", 538, "\x{0021B}", 539],
["\x{00162}", 354, "\x{00163}", 355],
["\x{001E70}", 7792, "\x{001E71}", 7793],
["\x{001E6E}", 7790, "\x{001E6F}", 7791],
["\x{00D9}", 217, "\x{00F9}", 249],
["\x{00DA}", 218, "\x{00FA}", 250],
["\x{00DB}", 219, "\x{00FB}", 251],
["\x{00168}", 360, "\x{00169}", 361],
["\x{0016A}", 362, "\x{0016B}", 363],
["\x{0016C}", 364, "\x{0016D}", 365],
["\x{00DC}", 220, "\x{00FC}", 252],
["\x{001EE6}", 7910, "\x{001EE7}", 7911],
["\x{0016E}", 366, "\x{0016F}", 367],
["\x{00170}", 368, "\x{00171}", 369],
["\x{001D3}", 467, "\x{001D4}", 468],
["\x{00214}", 532, "\x{00215}", 533],
["\x{00216}", 534, "\x{00217}", 535],
["\x{001AF}", 431, "\x{001B0}", 432],
["\x{001EE4}", 7908, "\x{001EE5}", 7909],
["\x{001E72}", 7794, "\x{001E73}", 7795],
["\x{00172}", 370, "\x{00173}", 371],
["\x{001E76}", 7798, "\x{001E77}", 7799],
["\x{001E74}", 7796, "\x{001E75}", 7797],
["\x{00D9}", 217, "\x{00F9}", 249],
["\x{00DA}", 218, "\x{00FA}", 250],
["\x{001D7}", 471, "\x{001D8}", 472],
["\x{001E7C}", 7804, "\x{001E7D}", 7805],
["\x{001E7E}", 7806, "\x{001E7F}", 7807],
["\x{001E80}", 7808, "\x{001E81}", 7809],
["\x{001E82}", 7810, "\x{001E83}", 7811],
["\x{00174}", 372, "\x{00175}", 373],
["\x{001E86}", 7814, "\x{001E87}", 7815],
["\x{001E84}", 7812, "\x{001E85}", 7813],
["\x{001E88}", 7816, "\x{001E89}", 7817],
["\x{001E80}", 7808, "\x{001E81}", 7809],
["\x{001E82}", 7810, "\x{001E83}", 7811],
["\x{001E8A}", 7818, "\x{001E8B}", 7819],
["\x{001E8C}", 7820, "\x{001E8D}", 7821],
["\x{001EF2}", 7922, "\x{001EF3}", 7923],
["\x{00DD}", 221, "\x{00FD}", 253],
["\x{00176}", 374, "\x{00177}", 375],
["\x{001EF8}", 7928, "\x{001EF9}", 7929],
["\x{00232}", 562, "\x{00233}", 563],
["\x{001E8E}", 7822, "\x{001E8F}", 7823],
["\x{00178}", 376, "\x{00FF}", 255],
["\x{001EF6}", 7926, "\x{001EF7}", 7927],
["\x{001EF4}", 7924, "\x{001EF5}", 7925],
["\x{001EF2}", 7922, "\x{001EF3}", 7923],
["\x{00DD}", 221, "\x{00FD}", 253],
["\x{00179}", 377, "\x{0017A}", 378],
["\x{001E90}", 7824, "\x{001E91}", 7825],
["\x{0017B}", 379, "\x{0017C}", 380],
["\x{0017D}", 381, "\x{0017E}", 382],
["\x{001E92}", 7826, "\x{001E93}", 7827],
["\x{001E94}", 7828, "\x{001E95}", 7829],
["\x{00179}", 377, "\x{0017A}", 378]
);
for my $row (@PRECOMPUTED_UC_LC) {
my ($upper, $upper_code, $lower, $lower_code) = @$row;
$STATE->assignLCcode($upper, $lower_code, 'global');
$STATE->assignUCcode($upper, $upper_code, 'global');
$STATE->assignLCcode($lower, $lower_code, 'global');
$STATE->assignUCcode($lower, $upper_code, 'global');
}
# Obtained via:
#
# our @SUPPORTED_ACCENTS = (
# "\N{COMBINING GRAVE ACCENT}",
# "\N{COMBINING ACUTE ACCENT}",
# "\N{COMBINING CIRCUMFLEX ACCENT}",
# "\N{COMBINING TILDE}",
# "\N{COMBINING MACRON}",
# "\N{COMBINING OVERLINE}",
# "\N{COMBINING BREVE}",
# "\N{COMBINING DOT ABOVE}",
# "\N{COMBINING DIAERESIS}",
# "\N{COMBINING HOOK ABOVE}",
# "\N{COMBINING RING ABOVE}",
# "\N{COMBINING DOUBLE ACUTE ACCENT}",
# "\N{COMBINING CARON}",
# "\N{COMBINING VERTICAL LINE ABOVE}",
# "\N{COMBINING DOUBLE VERTICAL LINE ABOVE}",
# "\N{COMBINING DOUBLE GRAVE ACCENT}",
# "\N{COMBINING CANDRABINDU}",
# "\N{COMBINING INVERTED BREVE}",
# "\N{COMBINING TURNED COMMA ABOVE}",
# "\N{COMBINING COMMA ABOVE}",
# "\N{COMBINING REVERSED COMMA ABOVE}",
# "\N{COMBINING COMMA ABOVE RIGHT}",
# "\N{COMBINING GRAVE ACCENT BELOW}",
# "\N{COMBINING ACUTE ACCENT BELOW}",
# "\N{COMBINING LEFT TACK BELOW}",
# "\N{COMBINING RIGHT TACK BELOW}",
# "\N{COMBINING LEFT ANGLE ABOVE}",
# "\N{COMBINING HORN}",
# "\N{COMBINING LEFT HALF RING BELOW}",
# "\N{COMBINING UP TACK BELOW}",
# "\N{COMBINING DOWN TACK BELOW}",
# "\N{COMBINING PLUS SIGN BELOW}",
# "\N{COMBINING MINUS SIGN BELOW}",
# "\N{COMBINING PALATALIZED HOOK BELOW}",
# "\N{COMBINING RETROFLEX HOOK BELOW}",
# "\N{COMBINING DOT BELOW}",
# "\N{COMBINING DIAERESIS BELOW}",
# "\N{COMBINING RING BELOW}",
# "\N{COMBINING COMMA BELOW}",
# "\N{COMBINING CEDILLA}",
# "\N{COMBINING OGONEK}",
# "\N{COMBINING VERTICAL LINE BELOW}",
# "\N{COMBINING BRIDGE BELOW}",
# "\N{COMBINING INVERTED DOUBLE ARCH BELOW}",
# "\N{COMBINING CARON BELOW}",
# "\N{COMBINING CIRCUMFLEX ACCENT BELOW}",
# "\N{COMBINING BREVE BELOW}",
# "\N{COMBINING INVERTED BREVE BELOW}",
# "\N{COMBINING TILDE BELOW}",
# "\N{COMBINING MACRON BELOW}",
# "\N{COMBINING LOW LINE}",
# "\N{COMBINING DOUBLE LOW LINE}",
# "\N{COMBINING TILDE OVERLAY}",
# "\N{COMBINING SHORT STROKE OVERLAY}",
# "\N{COMBINING LONG STROKE OVERLAY}",
# "\N{COMBINING SHORT SOLIDUS OVERLAY}",
# "\N{COMBINING LONG SOLIDUS OVERLAY}",
# "\N{COMBINING RIGHT HALF RING BELOW}",
# "\N{COMBINING INVERTED BRIDGE BELOW}",
# "\N{COMBINING SQUARE BELOW}",
# "\N{COMBINING SEAGULL BELOW}",
# "\N{COMBINING X ABOVE}",
# "\N{COMBINING VERTICAL TILDE}",
# "\N{COMBINING DOUBLE OVERLINE}",
# "\N{COMBINING GRAVE TONE MARK}",
# "\N{COMBINING ACUTE TONE MARK}",
# "\N{COMBINING GREEK PERISPOMENI}",
# "\N{COMBINING GREEK KORONIS}",
# "\N{COMBINING GREEK DIALYTIKA TONOS}",
# "\N{COMBINING GREEK YPOGEGRAMMENI}",
# "\N{COMBINING BRIDGE ABOVE}",
# "\N{COMBINING EQUALS SIGN BELOW}",
# "\N{COMBINING DOUBLE VERTICAL LINE BELOW}",
# "\N{COMBINING LEFT ANGLE BELOW}",
# "\N{COMBINING NOT TILDE ABOVE}",
# "\N{COMBINING HOMOTHETIC ABOVE}",
# "\N{COMBINING ALMOST EQUAL TO ABOVE}",
# "\N{COMBINING LEFT RIGHT ARROW BELOW}",
# "\N{COMBINING UPWARDS ARROW BELOW}",
# "\N{COMBINING GRAPHEME JOINER}",
# "\N{COMBINING RIGHT ARROWHEAD ABOVE}",
# "\N{COMBINING LEFT HALF RING ABOVE}",
# "\N{COMBINING FERMATA}",
# "\N{COMBINING X BELOW}",
# "\N{COMBINING LEFT ARROWHEAD BELOW}",
# "\N{COMBINING RIGHT ARROWHEAD BELOW}",
# "\N{COMBINING RIGHT ARROWHEAD AND UP ARROWHEAD BELOW}",
# "\N{COMBINING RIGHT HALF RING ABOVE}",
# "\N{COMBINING DOT ABOVE RIGHT}",
# "\N{COMBINING ASTERISK BELOW}",
# "\N{COMBINING DOUBLE RING BELOW}",
# "\N{COMBINING ZIGZAG ABOVE}",
# "\N{COMBINING DOUBLE BREVE BELOW}",
# "\N{COMBINING DOUBLE BREVE}",
# "\N{COMBINING DOUBLE MACRON}",
# "\N{COMBINING DOUBLE MACRON BELOW}",
# "\N{COMBINING DOUBLE TILDE}",
# "\N{COMBINING DOUBLE INVERTED BREVE}",
# "\N{COMBINING DOUBLE RIGHTWARDS ARROW BELOW}",
# "\N{COMBINING LATIN SMALL LETTER A}",
# "\N{COMBINING LATIN SMALL LETTER E}",
# "\N{COMBINING LATIN SMALL LETTER I}",
# "\N{COMBINING LATIN SMALL LETTER O}",
# "\N{COMBINING LATIN SMALL LETTER U}",
# "\N{COMBINING LATIN SMALL LETTER C}",
# "\N{COMBINING LATIN SMALL LETTER D}",
# "\N{COMBINING LATIN SMALL LETTER H}",
# "\N{COMBINING LATIN SMALL LETTER M}",
# "\N{COMBINING LATIN SMALL LETTER R}",
# "\N{COMBINING LATIN SMALL LETTER T}",
# "\N{COMBINING LATIN SMALL LETTER V}",
# "\N{COMBINING LATIN SMALL LETTER X}");
#
# use Unicode::Normalize;
# foreach my $letter (ord('A') .. ord('Z')) {
# foreach my $accent(@SUPPORTED_ACCENTS) {
# my $upper = chr($letter);
# $upper = NFC("$upper$accent");
# my $lower = chr($letter + 0x20);
# $lower = NFC("$lower$accent");
# # Only handle standard characters that have combined normal forms (Perl NFC)
# if (length($upper) > 1 || length($lower) > 1) { next; }
# my $upper_code = ord($upper);
# my $lower_code = ord($lower);
# my $upper_hex = sprintf("00%X", $upper_code);
# my $lower_hex = sprintf("00%X", $lower_code);
# print "[\"\\x{$upper_hex}\", $upper_code, \"\\x{$lower_hex}\", $lower_code],\n";
# } }
#
#**********************************************************************
1;