# -*- mode: Perl -*-
# /=====================================================================\ #
# | TeX | #
# | Core TeX Implementation for LaTeXML | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;
use Unicode::Normalize;
use LaTeXML::Util::Pathname;
use List::Util qw(min max);
RegisterNamespace(ltx => "http://dlmf.nist.gov/LaTeXML");
RegisterNamespace(svg => "http://www.w3.org/2000/svg");
RegisterNamespace(xlink => "http://www.w3.org/1999/xlink"); # Needed for SVG
# Not directly used, but let's stake out the ground
RegisterNamespace(m => "http://www.w3.org/1998/Math/MathML");
RegisterNamespace(xhtml => "http://www.w3.org/1999/xhtml");
DefMacroI("\\\@empty", undef, Tokens());
#======================================================================
# Core ID functionality.
#======================================================================
# DOCUMENTID is the ID of the document
# AND prefixes IDs on all other elements.
if (my $docid = LookupValue('DOCUMENTID')) {
# Wrap in T_OTHER so funny chars don't screw up (no space!)
DefMacroI('\thedocument@ID', undef, T_OTHER($docid)); }
else {
Let('\thedocument@ID', '\@empty'); }
NewCounter('@XMARG', 'document', idprefix => 'XM');
# Optionally, add ID's to ALL nodes.
# By default, this is OFF;
# Set to 1 (or \usepackage[ids]{latexml}) to enable.
# Set to 0 (or \usepackage[noids]{latexml}) to disable.
#### AssignValue(GENERATE_IDS=>1,'global');
Tag('ltx:*', afterOpen => sub {
# If GENERATE_IDS is true, we'll assign an ID to EVERY element,
# EXCEPT ltx:document which only gets an id from an EXPLICIT \thedocument@id.
my $tag = $_[0]->getNodeQName($_[1]);
if (($tag ne 'ltx:document')
&& ($tag ne 'ltx:XMWrap') # No auto-generated id on wrap???
&& LookupValue('GENERATE_IDS')) {
GenerateID(@_); } });
#======================================================================
Tag('ltx:document', afterOpen => \&ProcessPendingResources);
RequireResource('LaTeXML.css');
#======================================================================
# The default "initial context" for XML+RDFa specifies some default
# terms and prefixes, but no default vocabulary.
# Ought to have a default for @vocab, but settable?
# can we detect use of simple "term"s in attributes so we know whether we need @vocab?
# Ought to have a default set of prefixes from RDFa Core,
# but allow prefixes to be added.
# Probably ought to scan rdf attributes for all uses of prefixes,
# and include them in @prefix
# The following prefixes are listed in http://www.w3.org/2011/rdfa-context/rdfa-1.1
{
my %rdf_prefixes = (
"cc" => "http://creativecommons.org/ns#",
"ctag" => "http://commontag.org/ns#",
"dc" => "http://purl.org/dc/terms/",
"dcterms" => "http://purl.org/dc/terms/",
"ical" => "http://www.w3.org/2002/12/cal/icaltzd#",
"foaf" => "http://xmlns.com/foaf/0.1/",
"gr" => "http://purl.org/goodrelations/v1#",
"grddl" => "http://www.w3.org/2003/g/data-view#",
"ma" => "http://www.w3.org/ns/ma-ont#",
"og" => "http://ogp.me/ns#",
"owl" => "http://www.w3.org/2002/07/owl#",
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfa" => "http://www.w3.org/ns/rdfa#",
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
"rev" => "http://purl.org/stuff/rev#",
"rif" => "http://www.w3.org/2007/rif#",
"rr" => "http://www.w3.org/ns/r2rml#",
"schema" => "http://schema.org/",
"sioc" => "http://rdfs.org/sioc/ns#",
"skos" => "http://www.w3.org/2004/02/skos/core#",
"skosxl" => "http://www.w3.org/2008/05/skos-xl#",
"v" => "http://rdf.data-vocabulary.org/#",
"vcard" => "http://www.w3.org/2006/vcard/ns#",
"void" => "http://rdfs.org/ns/void#",
"xhv" => "http://www.w3.org/1999/xhtml/vocab#",
"xml" => "http://www.w3.org/XML/1998/namespace",
"xsd" => "http://www.w3.org/2001/XMLSchema#",
"wdr" => "http://www.w3.org/2007/05/powder#",
"wdrs" => "http://www.w3.org/2007/05/powder-s#",
);
foreach my $p (keys %rdf_prefixes) {
AssignMapping('RDFa_prefixes', $p => $rdf_prefixes{$p}); }
}
#**********************************************************************
# CORE TeX; Built-in commands.
#**********************************************************************
#======================================================================
# Define parsers for standard parameter types.
DefParameterType('Plain', sub {
my ($gullet, $inner) = @_;
my $value = $gullet->readArg;
if ($inner) {
($value) = $inner->reparseArgument($gullet, $value); }
$value; },
reversion => sub {
my ($arg, $inner) = @_;
(T_BEGIN,
($inner ? $inner->revertArguments($arg) : Revert($arg)),
T_END); });
DefParameterType('Optional', sub {
my ($gullet, $default, $inner) = @_;
my $value = $gullet->readOptional;
if (!$value && $default) {
$value = $default; }
elsif ($inner) {
($value) = $inner->reparseArgument($gullet, $value); }
$value; },
optional => 1,
reversion => sub {
my ($arg, $default, $inner) = @_;
if ($arg) {
(T_OTHER('['),
($inner ? $inner->revertArguments($arg) : Revert($arg)),
T_OTHER(']')); }
else { (); } });
# This is a peculiar type of argument of the form
# <general text> = <filler>{<balanced text><right brace>
# however, <filler> does get expanded while searching for the initial {
# which IS required in contrast to a general argument; ie a single token is not correct.
DefParameterType('GeneralText', sub {
my ($gullet) = @_;
my $open = $gullet->readXToken;
if ($open->equals(T_BEGIN)) {
return $gullet->readBalanced; }
else {
Error('expected', '{', $gullet,
"Expected <general text> here");
return $open; } });
DefParameterType('Until', sub {
my ($gullet, $until) = @_;
$gullet->readUntil($until); },
reversion => sub {
my ($arg, $until) = @_;
(Revert($arg), Revert($until)); });
# Skip any spaces, but don't contribute an argument.
DefParameterType('SkipSpaces', sub { $_[0]->skipSpaces; 1; }, novalue => 1);
DefParameterType('Skip1Space', sub { $_[0]->skip1Space; 1; }, novalue => 1);
# Read the next token
DefParameterType('Token', sub { $_[0]->readToken; });
# Read the next token, after expanding any expandable ones.
DefParameterType('XToken', sub { $_[0]->readXToken; });
# Read a number
DefParameterType('Number', sub { $_[0]->readNumber; });
# Read a floating point number
DefParameterType('Float', sub { $_[0]->readFloat; });
sub ReadFloat {
my ($gullet) = @_;
$gullet->skipSpaces;
return ($gullet->readFloat || Float(0)); }
# Read a dimension
DefParameterType('Dimension', sub { $_[0]->readDimension; });
# Read a Glue (aka skip)
DefParameterType('Glue', sub { $_[0]->readGlue; });
# Read a MuDimension (math)
DefParameterType('MuDimension', sub { $_[0]->readMuDimension; });
# Read a MuGlue (math)
DefParameterType('MuGlue', sub { $_[0]->readMuGlue; });
# Read until the next (balanced) open brace {
# used for the last TeX-style delimited argument
DefParameterType('UntilBrace', sub {
my ($gullet) = @_;
$gullet->readUntilBrace; });
# Yet another special case: Require a { but do not read it!!!
DefParameterType('RequireBrace', sub {
my ($gullet) = @_;
if (!$gullet->ifNext(T_BEGIN)) {
Error('expected', '{', $gullet, "Expected a { here"); }
T_BEGIN; },
novalue => 1);
DefParameterType('XUntil', sub {
my ($gullet, $until) = @_;
($until) = $until->unlist; # Make sure it's a single token!!!
my ($token, @tokens) = ();
while ($token = $gullet->readXToken(0)) {
if ($token->equals($until)) {
last; }
elsif ($token->getCatcode == CC_BEGIN) {
push(@tokens, $token, $gullet->readBalanced->unlist, T_END); }
elsif (my $defn = LookupDefinition($token)) {
push(@tokens, Invocation($token, $defn->readArguments($gullet))); }
else {
push(@tokens, $token); } }
Tokens(@tokens); });
# This is sorta like readbalanced, but expands as it goes.
# This appears to be needed by certain primitives (eg. \noalign ?)
# and maybe what we should be using for some Digested ??
DefParameterType('Expanded', sub {
my ($gullet) = @_;
my $token = $gullet->readXToken(0);
my @tokens = ($token);
if ($token->getCatcode == CC_BEGIN) {
my $level = 1;
while ($token = $gullet->readXToken(0)) {
push(@tokens, $token);
my $cc = $$token[1];
if ($cc == CC_END) {
$level--;
last unless $level; }
elsif ($cc == CC_BEGIN) {
$level++; } } }
return Tokens(@tokens); });
# Read a matching keyword, eg. Match:=
DefParameterType('Match', sub { shift->readMatch(@_); });
# Read a keyword; eg. Keyword:to
# (like Match, but ignores catcodes)
DefParameterType('Keyword', sub { shift->readKeyword(@_); });
# Read balanced material (?)
DefParameterType('Balanced', sub { $_[0]->readBalanced; });
# Read a Semiverbatim argument; ie w/ most catcodes neutralized.
DefParameterType('Semiverbatim', sub { $_[0]->readArg; }, semiverbatim => 1,
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read a LaTeX-style optional argument (ie. in []), but the contents read as Semiverbatim.
DefParameterType('OptionalSemiverbatim', sub { $_[0]->readOptional; },
semiverbatim => 1, optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
# Be careful here: if % appears before the initial {, it's still a comment!
# Also, note that non-typewriter fonts will mess up some chars on digestion!
DefParameterType('Verbatim', sub {
my ($gullet) = @_;
$gullet->readUntil(T_BEGIN);
StartSemiverbatim('%', '\\');
my $arg = $gullet->readBalanced();
EndSemiverbatim();
return $arg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(family => 'typewriter'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read an argument that will not be digested.
DefParameterType('Undigested', sub { $_[0]->readArg; }, undigested => 1,
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Read a LaTeX-style optional argument (ie. in []), but it will not be digested.
DefParameterType('OptionalUndigested', sub { $_[0]->readOptional; },
undigested => 1, optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
# Read a keyword value (KeyVals), that will not be digested.
DefParameterType('UndigestedKey', sub { $_[0]->readArg; }, undigested => 1);
# Read a token as used when defining it, ie. it may be enclosed in braces.
DefParameterType('DefToken', sub {
my ($gullet) = @_;
my $token = $gullet->readToken;
while ($token && $token->equals(T_BEGIN)) {
my @toks = grep { !$_->equals(T_SPACE) } $gullet->readBalanced->unlist;
$token = shift(@toks);
$gullet->unread(@toks); }
$token; },
undigested => 1);
# Read a variable, ie. a token (after expansion) that is a writable register.
DefParameterType('Variable', sub {
my ($gullet) = @_;
my $token = $gullet->readXToken;
my $defn = $token && LookupDefinition($token);
if ((defined $defn) && $defn->isRegister && !$defn->isReadonly) {
[$defn, $defn->readArguments($gullet)]; }
else {
Error('expected', '<variable>', $gullet,
"A <variable> was supposed to be here", "Got " . Stringify($token));
undef; } },
reversion => sub {
my ($var) = @_;
my ($defn, @args) = @$var;
$defn->invocation(@args); });
# Same, but not necessarily writable
DefParameterType('Register', sub {
my ($gullet) = @_;
my $token = $gullet->readXToken;
my $defn = $token && LookupDefinition($token);
if ((defined $defn) && $defn->isRegister) {
[$defn, $defn->readArguments($gullet)]; }
else {
Error('expected', '<register>', $gullet,
"A <register> was supposed to be here", "Got " . Stringify($token));
undef; } },
reversion => sub {
my ($var) = @_;
my ($defn, @args) = @$var;
$defn->invocation(@args); });
DefParameterType('TeXFileName', sub {
my ($gullet) = @_;
my ($token, $cc, @tokens) = ();
while (($token = $gullet->readXToken(0))
&& (($cc = $token->getCatcode) != CC_SPACE) && ($cc != CC_EOL) && ($cc != CC_COMMENT) && ($cc != CC_CS)) {
push(@tokens, $token); }
$gullet->unread($token) unless ($cc == CC_SPACE) || ($cc == CC_EOL) || ($cc == CC_COMMENT);
my $lead_cc = @tokens && $tokens[0]->getCatcode();
if ($lead_cc == CC_BEGIN) {
my $trail_cc = @tokens && $tokens[-1]->getCatcode();
if ($trail_cc == CC_END) {
# A begin-end wrapper indicates latex style {filename} use,
# so first unwrap,
@tokens = @tokens[1 .. $#tokens - 1];
# then load latex, and proceed
if (!LookupValue('LaTeX.pool_loaded')) { # if already loaded, DONT redefine!
LoadPool("LaTeX"); } } }
Tokens(@tokens); });
# A LaTeX style directory List
DefParameterType('DirectoryList', sub {
my ($gullet) = @_;
if ($gullet->ifNext(T_BEGIN)) {
$gullet->readToken;
my @dirs = ();
while ($gullet->ifNext(T_BEGIN)) {
# Should these be Semiverbatim ??
push(@dirs, $gullet->readArg);
$gullet->readMatch(T_OTHER(',')); }
if ($gullet->ifNext(T_END)) {
$gullet->readToken; }
else {
Error('expected', '}', $gullet, "A closing } was supposed to be here"); }
LaTeXML::Core::Array->new(open => T_BEGIN, close => T_END, itemopen => T_BEGIN, itemclose => T_END,
type => LaTeXML::Package::parseParameters(ToString("Semiverbatim"), "CommaList")->[0],
values => [@dirs]); }
else {
Error('expected', 'DirectoryList', $gullet, "A DirectoryList was supposed to be here"); } });
# This reads a Box as needed by \raise, \lower, \moveleft, \moveright.
# Hopefully there are no issues with the box being digested
# as part of the reader???
DefParameterType('MoveableBox', sub {
my ($gullet) = @_;
$gullet->skipSpaces;
my ($box, @stuff) = $STATE->getStomach->invokeToken($gullet->readXToken);
Error('expected', '<box>', $gullet,
"A <box> was supposed to be here", "Got " . Stringify($box))
unless $box && $box->isa('LaTeXML::Core::Whatsit')
&& ($box->getDefinition->getCSName =~ /^(\\hbox|\\vbox||\\vtop)$/);
$box; });
# Read a parenthesis delimited argument.
# Note that this does NOT balance () within the argument.
DefParameterType('BalancedParen', sub {
my ($gullet) = @_;
my $tok = $gullet->readXToken;
if (ref $tok && ToString($tok) eq '(') {
$gullet->readUntil(T_OTHER(')'));
} else {
$gullet->unread($tok) if ref $tok;
undef; } },
reversion => sub {
(T_OTHER('('), Revert($_[0]), T_OTHER(')')); });
# Read a digested argument.
# The usual parameter (generally written as {}) gets
# tokenized and digested in separate stages (like TeX),
# and so it is tokenized w/o recognizing any special macros within (eg. \url).
# This parameter gets digested until the (required) opening { is balanced.
# It is useful when the content would usually need to have been \protect'd
# in order to correctly deal with catcodes.
DefParameterType('Digested', sub {
my ($gullet) = @_;
$gullet->skipSpaces;
my $ismath = $STATE->lookupValue('IN_MATH');
my $token;
do { $token = $gullet->readXToken(0);
} while (defined $token && $token->getCatcode == CC_SPACE);
my @list = ();
if (!defined $token) { }
elsif ($token->equals(T_BEGIN)) {
Digest($token);
@list = $STATE->getStomach->digestNextBody(); pop(@list); }
else {
@list = $STATE->getStomach->invokeToken($token); }
# In most (all?) cases, we're really looking for a single Whatsit here...
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
List(@list, mode => ($ismath ? 'math' : 'text')); },
undigested => 1, # since _already_ digested.
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# A variation: Digest until we encounter a given token!
DefParameterType('DigestUntil', sub {
my ($gullet, $until) = @_;
($until) = $until->unlist; # Make sure it's a single token!!!
$gullet->skipSpaces;
my $ismath = $STATE->lookupValue('IN_MATH');
my @list = $STATE->getStomach->digestNextBody($until);
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
List(@list, mode => ($ismath ? 'math' : 'text')); },
undigested => 1, # since _already_ digested.
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Reads until the current group has ended.
# This is useful for environment-like constructs,
# particularly alignments (which may or may not be actual environments),
# but which need special treatment of some of their content
# as the expansion is carried out.
DefParameterType('DigestedBody', sub {
my ($gullet) = @_;
my $ismath = $STATE->lookupValue('IN_MATH');
my @list = $STATE->getStomach->digestNextBody();
# In most (all?) cases, we're really looking for a single Whatsit here...
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
List(@list, mode => ($ismath ? 'math' : 'text')); },
undigested => 1);
# In addition to the standard TeX Dimension, there are various LaTeX constructs
# (particularly, the LaTeX picture environment, and the various pstricks packages)
# that take a different sort of length. They differ in two ways.
# (1) They do not accept a comma as decimal separator
# (they generally use it to separate coordinates), and
# (2) They accept a plain float which is scaled against a Dimension register.
# Actually, there are two subcases:
# (a) picture accepts a float, which is scaled against \unitlength
# (b) pstricks accepts a float, and optionally a unit,
# If the unit is omitted, it is relative to \psxunit or \psyunit.
# How to capture these ?
## DefParameterType('Length', sub {
## my($gullet,$unit)=@_;
# CommaList expects something like {balancedstuff,...}
DefParameterType('CommaList', sub {
my ($gullet, $type) = @_;
my $typedef = $type && LaTeXML::Package::parseParameters(ToString($type), "CommaList")->[0];
my @items = ();
if ($gullet->ifNext(T_BEGIN)) {
$gullet->readToken;
my @tokens = ();
my $comma = T_OTHER(',');
while (my $token = $gullet->readToken) {
if ($token->equals(T_END)) {
push(@items, Tokens(@tokens));
last; }
elsif ($token->equals($comma)) {
push(@items, Tokens(@tokens)); @tokens = (); }
elsif ($token->equals(T_BEGIN)) {
push(@tokens, $token, $gullet->readBalanced->unlist, T_END); }
else {
push(@tokens, $token); } }
if ($typedef) {
@items = map { [$typedef->reparseArgument($gullet, $_)]->[0] } @items; } }
else {
# If no brace, just read one item or token, but still make Array!
push(@items, ($typedef ? $typedef->readArguments($gullet, "CommaList")
: ($gullet->readToken))); }
LaTeXML::Core::Array->new(open => T_BEGIN, close => T_END, type => $typedef,
values => [@items]); });
DefParameterType('RequiredKeyVals', sub {
my ($gullet, $keyset) = @_;
$keyset = ToString($keyset);
if ($gullet->ifNext(T_BEGIN)) {
return (LaTeXML::Core::KeyVals::readKeyVals($gullet, $keyset, T_END)); }
else {
Error('expected', '{', $gullet, "Missing keyval arguments");
return (LaTeXML::Core::KeyVals->new($keyset, [], open => T_BEGIN, close => T_END)); } });
DefParameterType('OptionalKeyVals', sub {
my ($gullet, $keyset) = @_;
$keyset = ToString($keyset);
return ($gullet->ifNext(T_OTHER('['))
? (LaTeXML::Core::KeyVals::readKeyVals($gullet, $keyset, T_OTHER(']'))) : undef); },
optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
# Not sure that this is the most elegant solution, but...
# What I'd really like are some sort of parameter modifiers, mathstyle, font... until...?
DefParameterType('DisplayStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'display'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('TextStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'text'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('ScriptStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'script'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
DefParameterType('ScriptscriptStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(mathstyle => 'scriptscript'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# Perverse naming convention: not script style, but in the style of a script relative to current.
DefParameterType('InScriptStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(scripted => 1); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
# NOTE: the various parameter features don't combine easily!!
# I need a ScriptStyleUntil for \root!!!
# I also need to redo fractions using these new types....
DefParameterType('OptionalInScriptStyle', sub {
$_[0]->readOptional; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(scripted => 1); },
afterDigest => sub {
$_[0]->egroup; },
optional => 1,
reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); });
DefParameterType('InFractionStyle', sub {
$_[0]->readArg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(fraction => 1); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });
#**********************************************************************
# LaTeX has a very particular notion of "Undefined",
# so let's get that squared away at the outset; it's useful for TeX, too!
# Naturally, it uses \csname to check, which ends up DEFINING the possibly undefined macro as \relax
DefMacro('\@ifundefined{}{}{}', sub {
my ($gullet, $name, $if, $else) = @_;
my $cs = T_CS('\\' . ToString(Expand($name)));
if (IsDefined($cs)) {
return $else->unlist; }
else {
Let($cs, '\relax'); # Yuck, but traditional!
return $if->unlist; } });
sub isDefinable {
my ($token) = @_;
my $meaning = LookupMeaning($token);
my $name = $token->getString; $name =~ s/^\\//;
return (((!defined $meaning) || ($meaning eq LookupMeaning(T_CS('\relax'))))
&& (($name ne 'relax') && ($name !~ /^end/))); }
#**********************************************************************
# Expandable Primitives
# See The TeXBook, Ch. 20, Definitions (also called Macros) pp. 212--215
#**********************************************************************
#======================================================================
# Should complain if we aren't actually evaluating an \if
# The following special cases are built-in to Definition
DefConditional('\else', undef);
DefConditional('\or', undef);
DefConditional('\fi', undef);
DefConditional('\ifcase Number', undef);
sub compare {
my ($u, $rel, $v) = @_;
$u = $u->valueOf if ref $u;
$v = $v->valueOf if ref $v;
if ($rel->equals(T_OTHER('<')) || $rel->equals(T_CS('\@@<'))) {
return $u < $v; }
elsif ($rel->equals(T_OTHER('='))) {
return $u == $v; }
elsif ($rel->equals(T_OTHER('>')) || $rel->equals(T_CS('\@@>'))) {
return $u > $v; }
else {
Error('expected', '<relationaltoken>', $STATE->getStomach->getGullet,
"Expected a relational token for comparision", "Got " . Stringify($rel));
return; } }
DefConditional('\ifnum Number Token Number', sub { compare($_[1], $_[2], $_[3]); });
DefConditional('\ifdim Dimension Token Dimension', sub { compare($_[1], $_[2], $_[3]); });
DefConditional('\ifodd Number', sub { $_[1]->valueOf % 2; });
# NOTE: We don't KNOW if we're in vertical, horizontal or inner mode!!!!!!!
DefConditionalI('\ifvmode', undef, sub { 0; });
DefConditionalI('\ifhmode', undef, sub { 0; });
DefConditionalI('\ifinner', undef, sub { 0; });
DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); });
DefConditional('\if XToken XToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; });
DefConditional('\ifcat XToken XToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; });
DefConditional('\ifx Token Token', sub { XEquals($_[1], $_[2]); });
# Kinda rough: We don't really keep track of modes as carefully as TeX does.
# We'll assume that a box is horizontal if there's anything at all,
# but it's not a vbox (!?!?)
sub classify_box {
my ($boxnum) = @_;
my $box = LookupValue('box' . $boxnum->valueOf);
if (!$box) {
return; }
elsif ($box->isa('LaTeXML::Core::Whatsit') && ($box->getDefinition eq LookupDefinition(T_CS('\vbox')))) {
return 'vbox'; }
else {
return 'hbox'; } }
DefConditional('\ifvoid Number', sub { !classify_box($_[1]); });
DefConditional('\ifhbox Number', sub { classify_box($_[1]) eq 'hbox'; });
DefConditional('\ifvbox Number', sub { classify_box($_[1]) eq 'vbox'; });
DefConditionalI('\iftrue', undef, sub { 1; });
DefConditionalI('\iffalse', undef, sub { 0; });
#======================================================================
# This makes \relax disappear completely after digestion
# (which seems most TeX like).
DefPrimitive('\relax', sub { (); });
## However, this keeps a box, so it can appear in UnTeX
### DefPrimitive('\relax',undef);
## But if you do that, you've got to watch out since it usually
## shouldn't be a box; See the isRelax code in handleScripts, below
DefMacro('\number Number', sub { Explode($_[1]->valueOf); });
# define it here (only approxmiately), since it's already useful.
Let('\protect', '\relax');
#======================================================================
DefMacro('\romannumeral Number', sub { roman($_[1]->valueOf); });
DefMacro('\string Token', sub { ExplodeText($_[1]->getString); });
DefMacroI('\jobname', undef, Tokens()); # Set to the filename by initialization
DefMacroI('\fontname', undef, sub { Explode("fontname not implemented"); });
our @CATCODE_MEANING = (
"the escape character", "begin-group character",
"end-group character", "math shift character",
"alignment tab character", "end-of-line character",
"macro parameter character", "superscript character",
"subscript character", "ignored character",
"blank space", "the letter",
"the character", "active character",
"comment character", "invalid character");
# Not sure about this yet...
DefMacro('\meaning Token', sub {
my ($gullet, $tok) = @_;
my $meaning = 'undefined';
if (my $definition = (Equals($tok, T_ALIGN) ? $tok : LookupMeaning($tok))) {
my $type = ref $definition;
$type =~ s/^LaTeXML:://;
# I don't understand what this is about
if (lc($type) =~ /primitive/) {
$definition = $definition->getCS;
$type = ref $definition;
$type =~ s/^LaTeXML:://; }
if (lc($type) =~ /con(ditional|structor)/) {
$definition = $definition->getCS;
$type = ref $definition;
$type =~ s/^LaTeXML:://; }
if (lc($type) =~ /token/) {
my $cc = $definition->getCatcode;
my $char = $definition->getString;
$meaning = ($CATCODE_MEANING[$cc] || '') . ' ' . ($cc == CC_SPACE ? ' ' : $char); }
elsif (lc($type) =~ /expandable/) {
my $expansion = $definition->getExpansion;
my $ltxps = $definition->getParameters;
my @params;
my $argcount = 0;
if (defined $ltxps) {
@params = $ltxps->getParameters;
$argcount = $ltxps->getNumArgs;
}
my $sp;
my @specparts = map { (($sp = $_->{spec}) =~ s/^(\w+):// ? $sp : $sp) } @params;
my $arg = 1;
foreach (@specparts) {
last if ($arg > $argcount);
$_ .= "#$arg";
$arg++; }
my $spec = join("", @specparts);
$spec =~ s/\{\}//g;
$spec =~ s/Token//g;
$meaning = "macro:" . ToString($spec) . "->" . ToString($expansion); }
Explode($meaning); }
else {
Explode('undefined'); } });
DefParameterType('CSName', sub {
my ($gullet) = @_;
## my ($token, @toks) = ();
my $token;
my $cs = '\\';
my $s;
# keep newlines from having \n inside!
while (($token = $gullet->readXToken(0)) && (($s = $token->getString) ne '\endcsname')) {
my $cc = $token->getCatcode;
if ($cc == CC_CS) {
if (defined $STATE->lookupDefinition($token)) {
Error('unexpected', $token, $gullet,
"The control sequence " . ToString($token) . " should not appear between \csname and \endcsname"); }
else {
Error('undefined', $token, $gullet, "The token " . Stringify($token) . " is not defined"); } }
# Keep newlines from having \n!
## push(@toks, ($cc == CC_SPACE ? T_SPACE : $token)); }
$cs .= ($cc == CC_SPACE ? ' ' : $s); }
## T_CS("\\" . ToString(Tokens(@toks))); });
T_CS($cs); });
DefMacro('\csname CSName', sub {
my ($gullet, $token) = @_;
Let($token, '\relax') unless defined LookupMeaning($token);
$token; });
DefPrimitive('\endcsname', sub {
my ($stomach) = @_;
Error('unexpected', '\endcsname', $_[0], "Extra \\endcsname",
$stomach->getGullet->showUnexpected);
return; });
DefMacro('\expandafter Token Token', sub {
my ($gullet, $tok, $xtok) = @_;
my $defn;
if (defined($defn = LookupDefinition($xtok)) && $defn->isExpandable) {
# Note that IF expandafter ends up expanding a \the in an \edef,
# that it Overrides the implicit noexpand that \edef would normally use for\the!!
local $LaTeXML::NOEXPAND_THE = undef;
my $x = $defn->invoke($gullet);
($tok, ($x ? @{$x} : ())); } # Expand $xtok ONCE ONLY!
else {
($tok, $xtok); } });
# Insert magic token that Gullet knows not to expand the next one.
DefMacroI('\noexpand', undef, Token('', CC_NOTEXPANDED));
DefMacroI('\topmark', undef, Tokens());
DefMacroI('\firstmark', undef, Tokens());
DefMacroI('\botmark', undef, Tokens());
DefMacroI('\splitfirstmark', undef, Tokens());
DefMacroI('\splitbotmark', undef, Tokens());
DefMacro('\input TeXFileName', sub { Input($_[1]); });
# Note that TeX doesn't actually close the mouth;
# it just flushes it so that it will close the next time it's read!
DefMacroI('\endinput', undef, sub { $_[0]->flushMouth; return; });
# \the<internal quantity>
DefMacro('\the Register', sub {
my ($gullet, $variable) = @_;
return () unless $variable;
my ($defn, @args) = @$variable;
my $type = $defn->isRegister;
if (!$type) {
my $cs = ToString($defn->getCS);
Error('unexpected', "\\the$cs", $gullet, "You can't use $cs after \\the"); return (); }
my $value = $defn->valueOf(@args);
## In all cases, these should be OTHER, except for space. (!?)
my @tokens = ($type eq 'Tokens' ? ($value ? $value->unlist : ()) : Explode(ToString($value)));
if ($LaTeXML::NOEXPAND_THE) { # See \the for the sense in this.
@tokens = $gullet->neutralizeTokens(@tokens); }
return @tokens; });
#**********************************************************************
# Primitives
# See The TeXBook, Chapter 24, Summary of Vertical Mode
# and Chapter 25, Summary of Horizontal Mode.
# Parsing of basic types (pp.268--271) is (mostly) handled in Gullet.pm
#**********************************************************************
#======================================================================
# Registers & Parameters
# See Chapter 24, Summary of Vertical Mode
# Define a whole mess of useless registers here ...
# Values are from Appendix B, pp. 348-349 (for whatever its worth)
#======================================================================
#======================================================================
# Integer registers; TeXBook p. 272-273
DefRegister('\tracingmacros', Number(0),
getter => sub { Number(LookupValue('TRACINGMACROS') || 0); },
setter => sub { AssignValue(TRACINGMACROS => $_[0]->valueOf); });
DefRegister('\tracingcommands', Number(0),
getter => sub { Number(LookupValue('TRACINGCOMMANDS')); },
setter => sub { AssignValue(TRACINGCOMMANDS => $_[0]->valueOf); });
{
my %iparms = (
pretolerance => 100, tolerance => 200, hbadness => 1000, vbadness => 1000,
linepenalty => 10, hyphenpenalty => 50, exhyphenpenalty => 50,
binoppenalty => 700, relpenalty => 500,
clubpenalty => 150, widowpenalty => 150, displaywidowpenalty => 50,
brokenpenalty => 100, predisplaypenalty => 10000,
postdisplaypenalty => 0, interlinepenalty => 0,
floatingpenalty => 0, outputpenalty => 0,
doublehyphendemerits => 10000, finalhyphendemerits => 5000, adjdemerits => 10000,
looseness => 0, pausing => 0,
holdinginserts => 0, tracingonline => 0, tracingstats => 0,
tracingparagraphs => 0, tracingpages => 0, tracingoutput => 0,
tracinglostchars => 1,
tracingrestores => 0, language => 0, uchyph => 1, lefthyphenmin => 0,
righthyphenmin => 0, globaldefs => 0, defaulthyphenchar => ord('-'), defaultskewchar => -1,
escapechar => 0, endlinechar => 0, newlinechar => -1, maxdeadcycles => 0, hangafter => 0,
fam => -1, mag => 1000, magnification => 1000, delimiterfactor => 0,
time => 0, day => 0, month => 0, year => 0,
showboxbreadth => 5, showboxdepth => 3, errorcontextlines => 5);
foreach my $p (keys %iparms) {
DefRegister("\\$p", Number($iparms{$p})); }
}
# Most of these are ignored, but...
DefMacro('\tracingall',
'\tracingonline=1 \tracingcommands=2 \tracingstats=2'
. ' \tracingpages=1 \tracingoutput=1 \tracinglostchars=1'
. ' \tracingmacros=2 \tracingparagraphs=1 \tracingrestores=1'
. ' \showboxbreadth=\maxdimen \showboxdepth=\maxdimen \errorstopmode');
# This may mess up Daemon state?
{ my ($sec, $min, $hour, $mday, $mon, $year) = localtime();
AssignValue('\day' => Number($mday), 'global');
AssignValue('\month' => Number($mon + 1), 'global');
AssignValue('\year' => Number(1900 + $year), 'global');
AssignValue('\time' => Number(60 * $hour + $min), 'global'); }
our @MonthNames = (qw( January February March April May June
July August September October November December));
# Return a string for today's date.
sub today {
return $MonthNames[LookupValue('\month')->valueOf - 1]
. " " . LookupValue('\day')->valueOf
. ', ' . LookupValue('\year')->valueOf; }
# Read-only Integer registers
{
my %ro_iparms = (lastpenalty => 0, inputlineno => 0, badness => 0);
foreach my $p (keys %ro_iparms) {
DefRegister("\\$p", Number($ro_iparms{$p}), readonly => 1); }
}
# Special integer registers (?)
# <special integer> = \spacefactor | \prevgraf | \deadcycles | \insertpenalties
{
my %sp_iparms = (spacefactor => 0, prevgraf => 0, deadcycles => 0, insertpenalties => 0);
foreach my $p (keys %sp_iparms) {
DefRegister("\\$p", Number($sp_iparms{$p})); }
}
#======================================================================
# Dimen registers; TeXBook p. 274
{
my %dparms = (
hfuzz => '0.1pt', vfuzz => '0.1pt', overfullrule => '5pt',
emergencystretch => 0,
hsize => '6.5in', vsize => '8.9in',
maxdepth => '4pt', splitmaxdepth => '16383.99999pt', boxmaxdepth => '16383.99999pt',
lineskiplimit => 0,
delimitershortfall => '5pt', nulldelimiterspace => '1.2pt', scriptspace => '0.5pt',
mathsurround => 0,
predisplaysize => 0, displaywidth => 0, displayindent => 0, parindent => '20pt',
hangindent => 0, hoffset => 0, voffset => 0,);
foreach my $p (keys %dparms) {
DefRegister("\\$p", Dimension($dparms{$p})); }
}
# Read-only dimension registers.
{
my %ro_dparms = (lastkern => 0);
foreach my $p (keys %ro_dparms) {
DefRegister("\\$p", Dimension($ro_dparms{$p}), readonly => 1); }
}
# Special dimension registers (?)
# <special dimen> = \prevdepth | \pagegoal | \pagetotal | \pagestretch | \pagefilstretch
# | \pagefillstretch | \pagefilllstretch | pageshrink | \pagedepth
{
my %sp_dparms = (
prevdepth => 0, pagegoal => 0, pagetotal => 0, pagestretch => 0, pagefilstretch => 0,
pagefillstretch => 0, pagefilllstretch => 0, pageshrink => 0, pagedepth => 0);
foreach my $p (keys %sp_dparms) {
DefRegister("\\$p", Dimension($sp_dparms{$p})); }
}
#======================================================================
# Glue registers; TeXBook p.274
{
my %gparms = (
baselineskip => '12pt', lineskip => '1pt',
parskip => '0pt plus 1pt',
abovedisplayskip => '12pt plus 3pt minus 9pt',
abovedisplayshortskip => '0pt plus 3pt',
belowdisplayskip => '12pt plus 3pt minus 9pt',
belowdisplayshortskip => '0pt plus 3pt',
leftskip => 0, rightskip => 0, topskip => '10pt', splittopskip => '10pt',
tabskip => 0, spaceskip => 0, xspaceskip => 0, parfillskip => '0pt plus 1fil');
foreach my $p (keys %gparms) {
DefRegister("\\$p", Glue($gparms{$p})); }
}
#======================================================================
# MuGlue registers; TeXBook p.274
{
my %mparms = (
thinmuskip => '3mu', medmuskip => '4mu plus 2mu minus 4mu', thickmuskip => '5mu plus 5mu');
foreach my $p (keys %mparms) {
DefRegister("\\$p", Glue($mparms{$p})); }
}
#======================================================================
# Token registers; TeXBook p.275
{
my @tparms = qw(output everypar everymath everydisplay everyhbox everyvbox
everyjob everycr everyhelp);
foreach my $p (@tparms) {
DefRegister("\\$p", Tokens()); }
}
#======================================================================
# Assignment, TeXBook Ch.24, p.275
#======================================================================
# <assignment> = <non-macro assignment> | <macro assignment>
#======================================================================
# Macros
# See Chapter 24, p.275-276
# <macro assignment> = <definition> | <prefix><macro assignment>
# <definition> = <def><control sequence><definition text>
# <def> = \def | \gdef | \edef | \xdef
# <definition text> = <register text><left brace><balanced text><right brace>
sub parseDefParameters {
my ($cs, $params) = @_;
my @tokens = $params->unlist;
# Now, recognize parameters and delimiters.
my @params = ();
my $n = 0;
while (@tokens) {
my $t = shift(@tokens);
if ($t->getCatcode == CC_PARAM) {
if (!@tokens) { # Special case: lone # NOT following a numbered parameter
# Note that we require a { to appear next, but do NOT read it!
push(@params, LaTeXML::Core::Parameter->new('RequireBrace', 'RequireBrace')); }
else {
$n++; $t = shift(@tokens);
Fatal('expected', "#$n", $STATE->getStomach,
"Parameters for '" . ToString($cs) . "' not in order in " . ToString($params))
unless (defined $t) && ($n == (ord($t->getString) - ord('0')));
# Check for delimiting text following the parameter #n
my @delim = ();
my ($pc, $cc) = (-1, 0);
while (@tokens && (($cc = $tokens[0]->getCatcode) != CC_PARAM)) {
my $d = shift(@tokens);
push(@delim, $d) unless $cc == $pc && $cc == CC_SPACE; # BUT collapse whitespace!
$pc = $cc; }
# Found text that marks the end of the parameter
if (@delim) {
my $expected = Tokens(@delim);
push(@params, LaTeXML::Core::Parameter->new('Until',
'Until:' . ToString($expected),
extra => [$expected])); }
# Special case: trailing sole # => delimited by next opening brace.
elsif ((scalar(@tokens) == 1) && ($tokens[0]->getCatcode == CC_PARAM)) {
shift(@tokens);
push(@params, LaTeXML::Core::Parameter->new('UntilBrace', 'UntilBrace')); }
# Nothing? Just a plain parameter.
else {
push(@params, LaTeXML::Core::Parameter->new('Plain', '{}')); } } }
else {
# Initial delimiting text is required.
my @lit = ($t);
while (@tokens && ($tokens[0]->getCatcode != CC_PARAM)) {
push(@lit, shift(@tokens)); }
my $expected = Tokens(@lit);
push(@params, LaTeXML::Core::Parameter->new('Match',
'Match:' . ToString($expected),
extra => [$expected],
novalue => 1)); }
}
return (@params ? LaTeXML::Core::Parameters->new(@params) : undef); }
sub do_def {
my ($globally, $expanded, $gullet, $cs, $params, $body) = @_;
if (!$cs) {
Error('expected', 'Token', $gullet, "Expected definition token");
return; }
elsif (!$params) {
Error('misdefined', $cs, $gullet, "Expected definition parameter list");
return; }
$params = parseDefParameters($cs, $params);
if ($expanded) {
local $LaTeXML::NOEXPAND_THE = 1;
$body = Expand($body); }
$STATE->installDefinition(LaTeXML::Core::Definition::Expandable->new($cs, $params, $body),
($globally ? 'global' : undef));
AfterAssignment();
return; }
DefPrimitive('\def SkipSpaces Token UntilBrace {}', sub { do_def(0, 0, @_); }, locked => 1);
DefPrimitive('\gdef SkipSpaces Token UntilBrace {}', sub { do_def(1, 0, @_); }, locked => 1);
DefPrimitive('\edef SkipSpaces Token UntilBrace {}', sub { do_def(0, 1, @_); }, locked => 1);
DefPrimitive('\xdef SkipSpaces Token UntilBrace {}', sub { do_def(1, 1, @_); }, locked => 1);
# <prefix> = \global | \long | \outer
# See Stomach.pm & Stomach.pm
DefPrimitiveI('\global', undef, sub { $STATE->setPrefix('global'); return; }, isPrefix => 1);
DefPrimitiveI('\long', undef, sub { $STATE->setPrefix('long'); return; }, isPrefix => 1);
DefPrimitiveI('\outer', undef, sub { $STATE->setPrefix('outer'); return; }, isPrefix => 1);
#======================================================================
# Non-Macro assignments; TeXBook Ch.24, pp 276--277
# <non-macro assignment> = <simple assignment> | \global <non-macro assignment>
# <filler> = <optional spaces> | <filler>\relax<optional spaces>
# <general text> = <filler>{<balanced text><right brace>
# <simple assignment> = <variable assignment> | <arithmetic>
# | <code assignment> | <let assignment> | <shorthand definition>
# | <fontdef token> | <family assignment> | <shape assignment>
# | \read <number> to <optional spaces><control sequence>
# | \setbox<8bit><equals><filler><box>
# | \font <control sequence><equals><file name><at clause>
# | <global assignment>
# <variable assignment> = <integer variable><equals><number>
# | <dimen variable><equals><dimen>
# | <glue variable><equals><dimen>
# | <muglue variable><equals><muglue>
# | <token variable><equals><general text>
# | <token variable><equals><token variable>
# <at clause> = at <dimen> | scaled <number> | <optional spaces>
# <code assignment> = <codename><8bit><equals><number>
# Need to handle "at" too!!!
DefPrimitive('\font Token SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $cs, $name) = @_;
my $gullet = $stomach->getGullet;
$name = ToString($name);
my %props = LaTeXML::Common::Font::decodeFontname($name,
($gullet->readKeyword('at')
? $gullet->readDimension->ptValue : undef),
($gullet->readKeyword('scaled')
? $gullet->readNumber->valueOf / 1000 : undef));
if (!keys %props) { # Failed?
Info('unexpected', $name, $stomach, "Unrecognized font name '$name'",
"Font switch macro " . ToString($cs) . " will have no effect"); }
$gullet->skipSpaces;
AssignValue('fontinfo_' . ToString($cs) => {%props});
DefPrimitiveI($cs, undef, undef, font => {%props});
});
# Not sure what this should be...
DefPrimitiveI('\nullfont', undef, undef, font => { family => 'nullfont' });
DefRegister('\count Number' => Number(0));
DefRegister('\dimen Number' => Dimension(0));
DefRegister('\skip Number' => Glue(0));
DefRegister('\muskip Number' => MuGlue(0));
DefRegister('\toks Number' => Tokens());
# <integer variable> = <integer parameter> | <countdef token> | \count<8bit>
# <dimen var> = <dimen parameter> | <dimendef token> | \dimen<8bit>
# <glue variable> = <glue parameter> | <skipdef token> | \skip<8bit>
# <muglue variable> = <muglue parameter> | <muskipdef token> | \muskip<8bit>
# <arithmetic> = \advance <integer variable><optional by><number>
# | \advance <dimen variable><optional by><dimen>
# | \advance <glue variable><optional by><glue>
# | \advance <muglue variable><optional by><muglue>
# | \multiply <numeric variable><optional by><number>
# | \divide <numeric variable><optional by><number>
DefPrimitive('\advance Variable SkipKeyword:by', sub {
my ($stomach, $var) = @_;
return () unless $var;
my ($defn, @args) = @$var;
local $LaTeXML::CURRENT_TOKEN = $defn;
$defn->setValue($defn->valueOf(@args)->add($stomach->getGullet->readValue($defn->isRegister)), @args); });
DefPrimitive('\multiply Variable SkipKeyword:by Number', sub {
my ($stomach, $var, $scale) = @_;
return () unless $var;
my ($defn, @args) = @$var;
$defn->setValue($defn->valueOf(@args)->multiply($scale->valueOf), @args); });
DefPrimitive('\divide Variable SkipKeyword:by Number', sub {
my ($stomach, $var, $scale) = @_;
return () unless $var;
my ($defn, @args) = @$var;
my $denom = $scale->valueOf;
if ($denom == 0) {
Error('misdefined', $scale, $stomach, "Illegal \\divide by 0; assuming 1");
$denom = 1; }
$defn->setValue($defn->valueOf(@args)->multiply(1 / $denom), @args); });
# <let assignment> = \futurelet <control sequence><token><token>
# | \let<control sequence><equals><one optional space><token>
DefPrimitive('\let Token SkipMatch:= Skip1Space Token', sub {
my ($stomach, $token1, $token2) = @_;
Let($token1, $token2);
return; });
DefMacro('\futurelet Token Token Token', sub {
my ($stomach, $cs, $token1, $token2) = @_;
Let($cs, $token2);
($token1, $token2); });
# <shorthand definition> = \chardef<control sequence><equals><8bit>
# | \mathchardef <control sequence><equals><15bit>
# | <registerdef><control sequence><equals><8bit>
# <registerdef> = \countdef | \dimendef | \skipdef | \muskipdef | toksdef
# See below for \chardef & \mathchardef
DefPrimitive('\countdef Token SkipMatch:= Number', sub {
my ($stomach, $cs, $num) = @_;
my $count = '\count' . $num->valueOf;
DefRegisterI($cs, undef, Number(0),
getter => sub { LookupValue($count) || Number(0); },
setter => sub { AssignValue($count => $_[0]); });
AfterAssignment(); });
DefPrimitive('\dimendef Token SkipMatch:= Number', sub {
my ($stomach, $cs, $num) = @_;
my $dimen = '\dimen' . $num->valueOf;
DefRegisterI($cs, undef, Dimension(0),
getter => sub { LookupValue($dimen) || Dimension(0); },
setter => sub { AssignValue($dimen => $_[0]); });
AfterAssignment(); });
DefPrimitive('\skipdef Token SkipMatch:= Number', sub {
my ($stomach, $cs, $num) = @_;
my $glue = '\skip' . $num->valueOf;
DefRegisterI($cs, undef, Glue(0),
getter => sub { LookupValue($glue) || Glue(0); },
setter => sub { AssignValue($glue => $_[0]); });
AfterAssignment(); });
DefPrimitive('\muskipdef Token SkipMatch:= Number', sub {
my ($stomach, $cs, $num) = @_;
my $muglue = '\muskip' . $num->valueOf;
DefRegisterI($cs, undef, MuGlue(0),
getter => sub { LookupValue($muglue) || MuGlue(0); },
setter => sub { AssignValue($muglue => $_[0]); });
AfterAssignment(); });
DefPrimitive('\toksdef Token SkipMatch:= Number', sub {
my ($stomach, $cs, $num) = @_;
my $toks = '\toks' . $num->valueOf;
DefRegisterI($cs, undef, Tokens(),
getter => sub { LookupValue($toks) || Tokens(); },
setter => sub { AssignValue($toks => $_[0]); });
AfterAssignment(); });
# NOTE: Get all these handled as registers
# <internal integer> = <integer parameter> | <special integer> | \lastpenalty
# | <countdef token> | \count<8bit> | <codename><8bit>
# | <chardef token> | <mathchardef token> | \parshape | \inputlineno
# | \hyphenchar<font> | \skewchar<font> | \badness
DefRegister('\lastpenalty', Number(0), readonly => 1);
# \parshape !?!??
DefPrimitive('\parshape SkipMatch:= Number', sub {
my ($stomach, $n) = @_;
$n = $n->valueOf;
my $gullet = $stomach->getGullet;
for (my $i = 0 ; $i < $n ; $i++) {
$gullet->readDimension; $gullet->readDimension; }
# we _could_ conceivably store this somewhere for some attempt at stylistic purpose...
return; });
#DefRegister('\inputlineno',Number(0),
# readonly=>1,
# getter=>{ Number($stomach->getGullet->getMouth????? ->lineno); });
DefRegister('\badness', Number(0), readonly => 1);
# <codename> = \catcode | \mathcode | \lccode | \uccode | \sfcode | \delcode
DefRegister('\catcode Number', Number(0),
getter => sub { my $code = LookupCatcode(chr($_[0]->valueOf));
Number(defined $code ? $code : CC_OTHER); },
setter => sub { AssignCatcode(chr($_[1]->valueOf) => $_[0]->valueOf); });
# # Only used for active math characters, so far
DefRegister('\mathcode Number', Number(0),
getter => sub {
my $ch = $_[0]->valueOf;
my $code = $STATE->lookupMathcode(chr($ch));
Number(defined $code ? $code : $ch); }, # defaults to the char's code itself(?)
setter => sub { $STATE->assignMathcode(chr($_[1]->valueOf) => $_[0]->valueOf); });
# Not used anywhere (yet)
DefRegister('\sfcode Number', Number(0),
getter => sub { my $code = $STATE->lookupSFcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignSFcode(chr($_[1]->valueOf) => $_[0]->valueOf); });
DefRegister('\lccode Number', Number(0),
getter => sub { my $code = $STATE->lookupLCcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignLCcode(chr($_[1]->valueOf) => $_[0]->valueOf); });
DefRegister('\uccode Number', Number(0),
getter => sub { my $code = $STATE->lookupUCcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignUCcode(chr($_[1]->valueOf) => $_[0]->valueOf); });
# Not used anywhere (yet)
DefRegister('\delcode Number', Number(0),
getter => sub { my $code = $STATE->lookupDelcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
setter => sub { $STATE->assignDelcode(chr($_[1]->valueOf) => $_[0]->valueOf); });
# Remember, we're assigning a NUMBER (codepoint) to a CHARACTER!
foreach my $letter (ord('A') .. ord('Z')) {
$STATE->assignLCcode(chr($letter), $letter + 0x20, 'global');
$STATE->assignUCcode(chr($letter), $letter, 'global');
$STATE->assignLCcode(chr($letter + 0x20), $letter + 0x20, 'global');
$STATE->assignUCcode(chr($letter + 0x20), $letter, 'global'); }
# Stub definitions ???
DefRegister('\hyphenchar{}', Number(ord('-')));
DefRegister('\skewchar{}', Number(0)); # no idea what the default is here
DefMacro('\hyphenation GeneralText', Tokens());
# <font> = <fontdef token> | \font | <family member>
# <family member> = <font range><4bit>
# <font range> = \textfont | \scriptfont | \scriptscriptfont
# Doubtful that we can do anything useful with these.
# These look essentially like Registers, although Knuth doesn't call them that.
DefRegister('\textfont Number' => T_CS('\tenrm'),
getter => sub {
my ($fam) = @_;
LookupValue('fontinfo_' . $fam->valueOf . '_text'); },
setter => sub {
my ($font, $fam) = @_;
AssignValue('fontinfo_' . $fam->valueOf . '_text' => $font, 'global'); });
DefRegister('\scriptfont Number' => T_CS('\sevenrm'),
getter => sub {
my ($fam) = @_;
LookupValue('fontinfo_' . $fam->valueOf . '_script'); },
setter => sub {
my ($font, $fam) = @_;
AssignValue('fontinfo_' . $fam->valueOf . '_script' => $font, 'global'); });
DefRegister('\scriptscriptfont Number' => T_CS('\fiverm'),
getter => sub {
my ($fam) = @_;
LookupValue('fontinfo_' . $fam->valueOf . '_scriptscript'); },
setter => sub {
my ($font, $fam) = @_;
AssignValue('fontinfo_' . $fam->valueOf . '_scriptscript' => $font, 'global'); });
# <internal dimen> = <dimen parameter> | <special dimen> | \lastkern
# | <dimendef token> | \dimen<8bit> | <box dimension><8bit> | \fontdimen<number><font>
DefRegister('\lastkern' => Dimension(0), readonly => 1);
# <box dimension> = \ht | \wd | \dp
DefRegister('\ht Number', Dimension(0),
getter => sub {
my ($n) = @_;
my $stuff = LookupValue('box' . $n->valueOf);
return ($stuff ? $stuff->getHeight : Dimension(0)); },
setter => sub {
my ($value, $n) = @_;
my $stuff = LookupValue('box' . $n->valueOf);
$stuff->setHeight($value) if $stuff;
return; });
DefRegister('\wd Number', Dimension(0),
getter => sub {
my ($n) = @_;
my $stuff = LookupValue('box' . $n->valueOf);
return ($stuff ? $stuff->getWidth : Dimension(0)); },
setter => sub {
my ($value, $n) = @_;
my $stuff = LookupValue('box' . $n->valueOf);
$stuff->setWidth($value) if $stuff;
return; });
DefRegister('\dp Number', Dimension(0),
getter => sub {
my ($n) = @_;
my $stuff = LookupValue('box' . $_[0]->valueOf);
return ($stuff ? $stuff->getDepth : Dimension(0)); },
setter => sub {
my ($value, $n) = @_;
my $stuff = LookupValue('box' . $n->valueOf);
$stuff->setDepth($value) if $stuff;
return; });
#DefRegister('\fontdimen Number TeXFileName'=>Dimension(0));
DefRegister('\fontdimen Number Token' => Dimension(0),
getter => sub { my $p = ToString($_[0]);
if ($p == 2) { Dimension('0.5em'); } # interword space
elsif ($p == 5) { Dimension('1ex'); } # x-height
elsif ($p == 6) { Dimension('1em'); } # quad width
else { Dimension(0); } });
# Could be handled by setting dimensions whenever the box itself is set?
# <internal glue> = <glue parameter> | \lastskip | <skipdef token> | \skip<8bit>
DefRegister('\lastskip' => Glue(0), readonly => 1);
# <internal muglue> = <muglue parameter> | \lastskip | <muskipdef token> | \muskip<8bit>
# <family assignment> = <family member><equals><font>
# <shape assignment> = \parshape<equals><number><shape dimensions>
# <shape dimensions> is 2n <dimen>
# <global assignment> = <font assignment> | <hyphenation assignment>
# | <box size assignment> | <interaction mode assignment>
# | <intimate assignment>
# <font assignment> = \fontdimen <number><font><equals><dimen>
# | \hyphenchar<font><equals><number> | \skewchar<font><equals><number>
# <hyphenation assignment> = \hyphenation<general text>
# | \patterns<general text>
# <box size assignment> = <box dimension><8bit><equals><dimen>
# <interaction mode assignment> = \errorstopmode | \scrollmode | \nonstopmode | \batchmode
# These are no-ops; Basically, LaTeXML runs in scrollmode
DefPrimitiveI('\errorstopmode', undef, undef);
DefPrimitiveI('\scrollmode', undef, undef);
DefPrimitiveI('\nonstopmode', undef, undef);
DefPrimitiveI('\batchmode', undef, undef);
# <intimate assignment> = <special integer><equals><number>
# | <special dimension><equals><dimen>
DefMacro('\fontencoding{}', '\@@@fontencoding{#1}');
DefPrimitive('\@@@fontencoding{}', sub {
my ($stomach, $encoding) = @_;
$encoding = ToString(Expand($encoding));
if (LoadFontMap($encoding)) {
MergeFont(encoding => $encoding); }
return; });
DefMacroI('\f@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); });
DefMacroI('\cf@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); });
# Used for SemiVerbatim text
DeclareFontMap('ASCII',
[undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, undef, undef,
undef, undef, undef, undef, undef, undef, undef, undef,
" ", '!', "\"", '#', '$', '%', '&', "'",
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', "\\", ']', "^", "_",
"`", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', "{", "|", "}", "~", undef]);
# Note that several entries are used for accents, and in practice will actually
# be used in something like an m:mover; thus they needn't (shouldn't?) be "small"
# There are also some questions about which choices are best
# grave & acute accents (entry 0x12 & 0x13) (often typed using 0x60 & 0x27)
# are probably best using U+60(grave accent) & U+B4(acute accent)
# but could be U+2035 (reversed prime) & U+2032 (prime). (particularly for math?)
# [we do use these for \prime, however!]
# or U+02CB (modifier letter grave accent) & U+02CA (modifier letter acute accent)
# Similarly, hat & tilde (entries 0x5E & 0x7E)
# typed using ^ 0x5E circumflex accent) & ~ 0x7E tilde
# are probably best just sticking with U+5E & U+7E
# but could be U+02C6 (modifier letter circumflex accent) U+02DC (small tilde)
# [Note that generally we're using codepoints characterized as "modifier letter"
# only when no other spacing point is available.]
DeclareFontMap('OT1',
["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
"\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{FB00}", "\x{FB01}", "\x{FB02}", "\x{FB03}", "\x{FB04}",
"\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}",
UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8),
UTF(0xA0) . "\x{0335}", '!', "\x{201D}", '#', '$', '%', '&', "\x{2019}",
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', UTF(0xA1), '=', UTF(0xBF), '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', "\x{201C}", ']', "^", "\x{02D9}",
"\x{2018}", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', "\x{2013}", "\x{2014}", "\x{02DD}", UTF(0x7E), UTF(0xA8)]);
DeclareFontMap('OT1',
["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
"\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{2191}", "\x{2193}", "'", UTF(0xA1), UTF(0xBF),
"\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}",
UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8),
"\x{2423}", '!', "\"", '#', '$', '%', '&', "'",
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', "<", '=', ">", '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', "\\", ']', "^", "_",
"`", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', "{", "|", "}", "~", UTF(0xA8)],
family => 'typewriter');
DeclareFontMap('OML',
[ # \Gamma \Delta \Theta \Lambda \Xi \Pi \Sigma \Upsilon
"\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}",
# \Phi \Psi \Omega alpha beta gamma delta epsilon
"\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{03B1}", "\x{03B2}", "\x{03B3}", "\x{03B4}", "\x{03F5}",
# zeta eta theta iota kappa lambda mu nu
"\x{03B6}", "\x{03B7}", "\x{03B8}", "\x{03B9}", "\x{03BA}", "\x{03BB}", "\x{03BC}", "\x{03BD}",
# xi pi rho sigma tau upsilon phi chi
"\x{03BE}", "\x{03C0}", "\x{03C1}", "\x{03C3}", "\x{03C4}", "\x{03C5}", "\x{03D5}", "\x{03C7}",
# psi omega varepsilon vartheta varpi varrho varsigma varphi
"\x{03C8}", "\x{03C9}", "\x{03B5}", "\x{03D1}", "\x{03D6}", "\x{03F1}", "\x{03C2}", "\x{03C6}",
# l.harp.up l.harp.dn r.harp.up r.harp.dn lhook rhook rt.tri lf.tri
"\x{21BC}", "\x{21BD}", "\x{21C0}", "\x{21C1}", "\x{2E26}", "\x{2E27}", "\x{25B7}", "\x{25C1}",
# old style numerals! (no separate codepoints ?)
# 0 1 2 3 4 5 6 7
'0', '1', '2', '3', '4', '5', '6', '7',
# 8 9 . , < / > star
'8', '9', '.', ',', UTF(0x3C), UTF(0x2F), UTF(0x3E), "\x{22C6}",
# partial A B C D E F G
"\x{2202}", 'A', 'B', 'C', 'D', 'E', 'F', 'G',
# H I J K L M N O
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
# P Q R S T U V W
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
# X Y Z flat natural sharp smile frown
'X', 'Y', 'Z', "\x{266D}", "\x{266E}", "\x{266F}", "\x{2323}", "\x{2322}",
# ell a b c d e f g
"\x{2113}", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
# h i j k l m n o
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
# p q r s t u v w
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
# x y z dotless i dotless j weier-p arrow acc. inv.breve
'x', 'y', 'z', "\x{0131}", "j", "\x{2118}", "\x{2192}", UTF(0xA0) . "\x{0311}"]);
DeclareFontMap('OMS',
[ #minus dot times ast divide diamond plus-minus minus-plus
"-", "\x{22C5}", UTF(0xD7), "\x{2217}", UTF(0xF7), "\x{22C4}", UTF(0xB1), "\x{2213}",
# oplus ominus otimes oslash odot bigcirc circ bullet
"\x{2295}", "\x{2296}", "\x{2297}", "\x{2298}", "\x{2299}", "\x{25CB}", "\x{2218}", "\x{2219}",
# asymp equiv subseteq supseteq leq geq preceq succeq
"\x{224D}", "\x{2261}", "\x{2286}", "\x{2287}", "\x{2264}", "\x{2265}", "\x{2AAF}", "\x{2AB0}",
# sim approx subset supset ll gg prec succ
"\x{223C}", "\x{2248}", "\x{2282}", "\x{2283}", "\x{226A}", "\x{226B}", "\x{227A}", "\x{227B}",
# leftarrow rightarrow uparrow downarrow leftrightar nearrow searrow simeq
"\x{2190}", "\x{2192}", "\x{2191}", "\x{2193}", "\x{2194}", "\x{2197}", "\x{2198}", "\x{2243}",
# Leftarrow Rightarrow Uparrow Downarrow Leftrightar nwarrow swarrow propto
"\x{21D0}", "\x{21D2}", "\x{21D1}", "\x{21D3}", "\x{21D4}", "\x{2196}", "\x{2199}", "\x{221D}",
# prime infty in ni bigtri.up bigtri.dn slash mapsto
"\x{2032}", "\x{221E}", "\x{2208}", "\x{220B}", "\x{25B3}", "\x{25BD}", "/", "\x{21A6}",
# forall exists not emptyset Re Im top bot
"\x{2200}", "\x{2203}", UTF(0xAC), "\x{2205}", "\x{211C}", "\x{2111}", "\x{22A4}", "\x{22A5}",
# aleph cal A cal B cal C cal D cal E cal F cal G
"\x{2135}", "\x{1D49C}", "\x{212C}", "\x{1D49E}", "\x{1D49F}", "\x{2130}", "\x{2131}", "\x{1D4A2}",
# cal H cal I cal J cal K cal L cal M cal N cal O
"\x{210B}", "\x{2110}", "\x{1D4A5}", "\x{1D4A6}", "\x{2112}", "\x{2133}", "\x{1D4A9}", "\x{1D4AA}",
# cal P cal Q cal R cal S cal T cal U cal V cal W
"\x{1D4AB}", "\x{1D4AC}", "\x{211B}", "\x{1D4AE}", "\x{1D4AF}", "\x{1D4B0}", "\x{1D4B1}", "\x{1D4B2}",
# cal X cal Y cal Z cup cap uplus wedge vee
"\x{1D4B3}", "\x{1D4B4}", "\x{1D4B5}", "\x{222A}", "\x{2229}", "\x{228C}", "\x{2227}", "\x{2228}",
# vdash dashv lfloor rfloor lceil rceil lbrace rbrace
"\x{22A2}", "\x{22A3}", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}",
# langle rangle | \| updownarrow Updownarrow backslash wr
"\x{27E8}", "\x{27E9}", "|", "\x{2225}", "\x{2195}", "\x{21D5}", UTF(0x5C), "\x{2240}",
# surd amalg nabla int sqcup sqcap sqsubseteq sqsupseteq
"\x{221A}", "\x{2210}", "\x{2207}", "\x{222B}", "\x{2294}", "\x{2293}", "\x{2291}", "\x{2292}",
# section dagger ddagger para clubsuit diam.suit heartsuit spadesuit
UTF(0xA7), "\x{2020}", "\x{2021}", UTF(0xB6), "\x{2663}", "\x{2662}", "\x{2661}", "\x{2660}"]);
DeclareFontMap('OMX',
[ # ( ) [ ] lfloor rfloor lceil rceil
"(", ")", "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}",
#lbrace rbrace langle rangle | || / \
"{", "}", "\x{27E8}", "\x{27E9}", "|", "\x{2225}", "/", UTF(0x5C),
"(", ")", "(", ")", "[", "]", "\x{230A}", "\x{230B}",
"\x{2308}", "\x{2309}", "{", "}", "\x{27E8}", "\x{27E9}", "/", UTF(0x5C),
"(", ")", "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}",
"{", "}", "\x{27E8}", "\x{27E9}", "/", UTF(0x5C), "/", UTF(0x5C),
# next two rows are just fragments
# l.up.paren r.up.paren l.up.brak r.up.brak l.bot.brak r.bot.brak l.brak.ext r.brak.ext
"\x{239B}", "\x{239E}", "\x{23A1}", "\x{23A4}", "\x{23A3}", "\x{23A6}", "\x{23A2}", "\x{23A5}",
# l.up.brace r.up.brace l.bot.brace r.bot.brace l.brace.mid r.brace.mid brace.ext v.arrow.ext
"\x{23A7}", "\x{23AB}", "\x{23A9}", "\x{23AD}", "\x{23A8}", "\x{23AC}", "\x{23AA}", "\x{23D0}",
# l.bot.paren r.bot.paren l.paren.ext r.paren.ext
"\x{239D}", "\x{23A0}", "\x{239C}", "\x{239F}", "\x{27E8}", "\x{27E9}", "\x{2294}", "\x{2294}",
"\x{222E}", "\x{222E}", "\x{2299}", "\x{2299}", "\x{2295}", "\x{2295}", "\x{2297}", "\x{2297}",
"\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}",
"\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}",
"\x{2210}", "\x{2210}", UTF(0x5E), UTF(0x5E), UTF(0x5E), UTF(0x7E), UTF(0x7E), UTF(0x7E),
"[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}",
# [missing rad frags] double arrow ext.
"\x{23B7}", "\x{23B7}", "\x{23B7}", "\x{23B7}", "\x{23B7}", undef, undef, undef,
# [missing tips for horizontal curly braces]
"\x{2191}", "\x{2193}", undef, undef, undef, undef, "\x{21D1}", "\x{21D3}"]);
#DefPrimitive('\char Number', sub { $_[0]->invokeToken(T_OTHER(chr($_[1]->valueOf))); });
DefPrimitive('\char Number', sub {
Box(FontDecode($_[1]->valueOf), undef, undef, Invocation(T_CS('\char'), $_[1])); });
# Almost like a register, but different...
DefPrimitive('\chardef Token SkipMatch:= Number', sub {
my ($stomach, $newcs, $value) = @_;
my $csname = $newcs->getCSName;
my $internalcs = T_CS('\@chardef@' . $csname);
DefPrimitiveI($internalcs, undef, sub {
Box(FontDecode($value->valueOf), undef, undef, Invocation(T_CS('\char'), $value)); });
$STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value, $internalcs));
AfterAssignment();
return; });
our @mathclassrole = (undef, 'BIGOP', 'BINOP', 'RELOP', 'OPEN', 'CLOSE', 'PUNCT', undef);
# Is this "fontinfo" stuff sufficient to maintain a math font "family" ??
# What we're really after is a connectio nto a font encoding mapping.
sub decodeMathChar {
my ($n) = @_;
my $class = int($n / (16 * 256)); $n = $n % (16 * 256);
my $fam = int($n / 256); $n = $n % 256;
my $font = LookupValue('fontinfo_' . $fam . '_text')
|| LookupValue('fontinfo_' . $fam . '_script')
|| LookupValue('fontinfo_' . $fam . '_scriptscript');
my $char = chr($n);
# If no specific class, Lookup properties from a DefMath?
my $charinfo = LookupValue('math_token_attributes_' . $char);
my $fontinfo = LookupValue('fontinfo_' . ToString($font));
my $role = $mathclassrole[$class];
$role = $$charinfo{role} if (!defined $role) && $charinfo;
return ($role,
($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : $char)); }
DefConstructor('\mathchar Number',
"?#glyph(<ltx:XMTok role='#role'>#glyph</ltx:XMTok>)",
sizer => '#1',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $n = $whatsit->getArg(1)->valueOf;
my ($role, $glyph) = decodeMathChar($n);
$whatsit->setProperty(glyph => $glyph) if $glyph;
$whatsit->setProperty(role => $role) if defined $role;
$whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph;
return; });
# Almost like a register, but different...
DefPrimitive('\mathchardef Token SkipMatch:= Number', sub {
my ($stomach, $newcs, $value) = @_;
my $csname = $newcs->getCSName;
my ($role, $glyph) = decodeMathChar($value->valueOf);
my $internalcs = $glyph && T_CS('\@mathchardef@' . $csname);
DefConstructorI($internalcs, undef, "<ltx:XMTok role='#role'>#glyph</ltx:XMTok>",
sizer => '#1',
properties => { role => $role, glyph => $glyph,
font => sub { LookupValue('font')->specialize($glyph); } },
# reversion => '\mathchar' . $value->valueOf . '\relax',
# revert to the plain glyph if it seems plausible? (this helps \left,\right, eg)
reversion => (ord($glyph) < 128 ? $glyph : '\mathchar' . $value->valueOf . '\relax'),
) if $glyph;
$STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value, $internalcs));
AfterAssignment();
return; });
DefConstructor('\mathaccent Number Digested',
"<ltx:XMApp><ltx:XMTok role='OVERACCENT'>#glyph</ltx:XMTok><ltx:XMArg>#2</ltx:XMArg></ltx:XMApp>",
sizer => '#1', # Close enough?
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $n = $whatsit->getArg(1)->valueOf;
my ($role, $glyph) = decodeMathChar($n);
$whatsit->setProperty(glyph => $glyph) if $glyph;
$whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph;
return; });
# <box> = \box <8bit> | \copy <8bit> | \lastbox | \vsplit <8bit> to <dimen>
# | \hbox <box specification>{<horizontal mode material>}
# | \vbox <box specification>{<vertical mode material>}
# | \vtop <box specification>{<vertical mode material>}
# <box specification> = to <dimen><filler> | spread <dimen><filler> | <filler>
# \setbox<number>=\hbox to <dimen>{<horizontal mode material>}
DefPrimitive('\setbox Number SkipMatch:=', sub {
my ($stomach) = @_;
no warnings 'recursion';
my $box = 'box' . $_[1]->valueOf;
# If there is any afterAssignment tokens, move them over so BoxContents parameter will use them
if (my $token = LookupValue('afterAssignment')) {
AssignValue('afterAssignment' => undef, 'global');
AssignValue('BeforeNextBox' => $token); }
# Save global flag, since we're digesting to get the box content, which resets the flag!
# Should afterDigest be responsible for resetting flags?
my $scope = $STATE->getPrefix('global') && 'global';
$STATE->clearPrefixes; # before invoke, below; we've saved the only relevant one (global)
my ($stuff, @rest) = $stomach->invokeToken($stomach->getGullet->readXToken);
AssignValue('box' . $_[1]->valueOf => $stuff, $scope);
@rest; });
DefPrimitive('\box Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
AssignValue($box, undef);
($stuff ? $stuff->unlist : ()); });
DefPrimitive('\copy Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
($stuff ? $stuff->unlist : ()); });
sub revert_spec {
my ($whatsit, $keyword) = @_;
my $value = $whatsit->getProperty($keyword);
return ($value ? (Explode($keyword), Revert($value)) : ()); }
DefParameterType('BoxSpecification', sub {
my ($gullet) = @_;
if (my $key = $gullet->readKeyword('to', 'spread')) {
LaTeXML::Core::KeyVals->new(undef, [$key, $gullet->readDimension]); } },
optional => 1, undigested => 1);
# Risky: I think this needs to be digested as a body to work like TeX (?)
# but parameter think's it's just parsing from gullet...
sub readBoxContents {
my ($gullet, $everybox) = @_;
my $t;
while (($t = $gullet->readToken) && !Equals($t, T_BEGIN)) { } # Skip till { or \bgroup
# Now, insert some extra tokens, if any, possibly from \afterassignment
if (my $token = LookupValue('BeforeNextBox')) {
AssignValue(BeforeNextBox => undef, 'global');
$gullet->unread($token); }
# AND, insert any extra tokens passed in, due to everyhbox or everyvbox
$gullet->unread($everybox->unlist) if $everybox;
my ($contents, @stuff) = $STATE->getStomach->invokeToken(T_BEGIN);
return $contents; }
DefParameterType('HBoxContents', sub {
readBoxContents($_[0], LookupValue('\everyhbox')); },
undigested => 1); # Cause it already is digested!
DefParameterType('VBoxContents', sub {
readBoxContents($_[0], LookupValue('\everyvbox')); },
undigested => 1); # Cause it already is digested!
# DefParameterType('BoxContents',sub {
# my($gullet)=@_;
# my $t;
# while(($t=$gullet->readToken) && !Equals($t,T_BEGIN)){} # Skip till { or \bgroup
# my($contents,@stuff) = $STATE->getStomach->invokeToken(T_BEGIN);
# $contents; },
# undigested=>1); # Cause it already is digested!
# This re-binds a number of important control sequences to their default text binding.
# This is useful within common boxing or footnote macros that can appear within
# alignments or special environments that have redefined many of these.
AssignValue(TEXT_MODE_BINDINGS => []);
AssignValue(HTEXT_MODE_BINDINGS => []);
AssignValue(VTEXT_MODE_BINDINGS => []);
PushValue(HTEXT_MODE_BINDINGS => [T_MATH, T_CS('\@dollar@in@textmode')]);
PushValue(VTEXT_MODE_BINDINGS => [T_MATH, T_CS('\@dollar@in@normalmode')]);
###PushValue(TEXT_MODE_BINDINGS => [T_CS('\centerline'), T_CS('\relax')]);
sub reenterTextMode {
my ($verticalmode) = @_;
map { Let($$_[0], $$_[1]) }
@{ LookupValue(($verticalmode ? 'VTEXT_MODE_BINDINGS' : 'HTEXT_MODE_BINDINGS')) },
@{ LookupValue('TEXT_MODE_BINDINGS') };
return }
sub REF {
my ($thing, $key) = @_;
return $thing && $$thing{$key}; }
DefConstructor('\hbox BoxSpecification HBoxContents', sub {
# "<ltx:text width='#width' _noautoclose='1'>#2</ltx:text>",
my ($document, $spec, $contents, %props) = @_;
my $model = $document->getModel;
my $context = $document->getElement;
my $current = $context;
# What is the CORRECT (& general) way to ask whether we're in "vertical mode"??
# my $vmode = $tag eq 'ltx:inline-block'; # ie, explicitly \vbox !?!?!?!
my $vmode = $current && $current->getAttribute('_vertical_mode_');
my $newtag = ($vmode ? 'ltx:p' : 'ltx:text');
my $node = $document->openElement($newtag, _noautoclose => 1,
width => $props{width});
$document->absorb($contents);
$document->closeNode($node); },
mode => 'text', bounded => 1,
sizer => '#2',
# Workaround for $ in alignment; an explicit \hbox gives us a normal $.
# And also things like \centerline that will end up bumping up to block level!
beforeDigest => sub { reenterTextMode(); },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $spec = $whatsit->getArg(1);
my $box = $whatsit->getArg(2);
if (my $w = GetKeyVal($spec, 'to')) {
$whatsit->setWidth($w); }
elsif (my $s = GetKeyVal($spec, 'spread')) {
$whatsit->setWidth($box->getWidth->add($s)); }
return; });
# This attempts to be a generalize vbox construction;
# It tries to figure out whether an ltx:inline-block or ltx:para is needed,
# and attempts to figure out whether sequences of the inserted content
# need to be explicitly wrapped in some kind of block element (presumably ltx:p).
# It returns the inserted inner blocks,
# whether or not they got wrapped by that ltx:inline-block; which it DOESN'T TELL YOU ABOUT!
sub insertBlock {
my ($document, $contents, %blockattr) = @_;
# Create something like:
# "<ltx:inline-block vattach='$vattach' height='#height'>#2</ltx:inline-block>"
my $model = $document->getModel;
my $context = $document->getElement; # Where we originally start inserting.
# If we're "explicitly" asking for a vbox, or can't currently put an ltx:p in
# (which would be a typical block mode item), or if the current point accepts raw text,
# then we'd better open an inline-block.
# [A thought is to close any open ltx:p, or create & delete one, just to "prime" it.
# but those seem a bad idea, because we may want a vbox inside a p, or....]
my $newblock = undef;
my $unwrap = 0;
map { ($blockattr{$_} || delete $blockattr{$_}) } keys %blockattr;
my $hasattr = scalar(keys %blockattr);
if ($hasattr || !$document->canContainSomehow($context, 'ltx:p') || $document->canContain($context, '#PCDATA')) {
# [If we DONT open, we lose the vattach!]
$newblock = $document->openElement('ltx:inline-block', '_autoclose' => 1, %blockattr); }
elsif ($document->canContainSomehow($context, 'ltx:para')) {
$newblock = $document->openElement('ltx:para', '_autoclose' => 1, %blockattr); }
# else {
# $unwrap = 1;
# $newblock = $document->openElement('ltx:inline-block',%blockattr); }
# Insert the content for the block, and reduce
$document->setAttribute($document->getElement, '_vertical_mode_' => 1); # HACK!!!! (see \hbox)
my @nodes = $document->filterChildren($document->filterDeletions($document->absorb($contents)));
# Scan the inserted nodes, wrapping sequences of Inline items with a ltx:p
my @newnodes = ();
while (@nodes) {
if ($model->getNodeQName($nodes[0]) eq 'ltx:break') { # ltx:break are superflous, now.
$document->removeNode(shift(@nodes));
next; }
my @n; # Collect up sequences of Inline
while (@nodes && ($model->isInSchemaClass('Inline', $nodes[0]))) {
push(@n, shift(@nodes)); }
if (@n) {
push(@newnodes, $document->wrapNodes('ltx:p', @n)); }
else {
push(@newnodes, shift(@nodes)); } }
# If we've inserted a wrapper element, close all open elements up to it's parent
# It may have auto-opened some element to contain it, but leave that open for following material
# Otherwise, close everything back up to the originally open element (but only if still open!)
if ($newblock) {
$document->closeToNode($newblock->parentNode, 1); }
else {
$document->closeToNode($context, 1); }
# Check if the ltx:inline-block container is really needed.
if ($newblock) {
my @rows = $newblock->childNodes;
if (scalar(@rows) < 1) { # Insertion came up empty?
$document->removeNode($newblock); } # then remove the new block entirely
elsif ($unwrap ||
((scalar(@rows) == 1) # Else only 1 item inside, then flatten
&& $document->canContain($newblock->parentNode, $rows[0]) # if allowed.
&& (!$hasattr || !grep { !$document->canHaveAttribute($rows[0], $_) } keys %blockattr))) {
map { $document->setAttribute($rows[0], $_ => $blockattr{$_}) } keys %blockattr;
$document->unwrapNodes($newblock); } }
# And return the list of "rows" in the box (in case they need attributes....)
return @newnodes; }
DefConstructor('\vbox BoxSpecification VBoxContents', sub {
my ($document, $spec, $contents, %props) = @_;
my @block = insertBlock($document, $contents, vattach => 'bottom'); },
sizer => '#2',
mode => 'text',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $spec = $whatsit->getArg(1);
my $box = $whatsit->getArg(2);
if (my $h = GetKeyVal($spec, 'to')) {
$whatsit->setHeight($h); }
elsif (my $s = GetKeyVal($spec, 'spread')) {
$whatsit->setHeight($box->getHeight->add($s)); }
return; });
DefConstructor('\vtop BoxSpecification VBoxContents', sub {
my ($document, $spec, $contents, %props) = @_;
insertBlock($document, $contents, vattach => 'top'); },
sizer => '#2',
mode => 'text',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $spec = $whatsit->getArg(1);
my $box = $whatsit->getArg(2);
if (my $h = GetKeyVal($spec, 'to')) {
$whatsit->setHeight($h); }
elsif (my $s = GetKeyVal($spec, 'spread')) {
$whatsit->setHeight($box->getHeight->add($s)); }
return; });
DefParameterType('RuleSpecification', sub {
my ($gullet) = @_;
my %spec = ();
while (my $key = $gullet->readKeyword('width', 'height', 'depth')) {
$spec{$key} = $gullet->readDimension; }
LaTeXML::Core::KeyVals->new(undef, [%spec]); },
optional => 1, undigested => 1);
DefConstructor('\vrule RuleSpecification',
"?#invisible()(?#isVerticalRule()"
. "(<ltx:rule height='&GetKeyVal(#1,height)' depth='&GetKeyVal(#1,depth)'"
. " width='&GetKeyVal(#1,width)' color='#color'/>))",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $dims = $whatsit->getArg(1);
my $width = GetKeyVal($dims, 'width');
my $height = GetKeyVal($dims, 'height');
my $depth = GetKeyVal($dims, 'depth');
$whatsit->setProperty(width => $width) if $width;
$whatsit->setProperty(height => $height) if $height;
$whatsit->setProperty(depth => $depth) if $depth;
my $w = ($width ? $width->ptValue : undef);
my $h = ($height ? $height->ptValue : undef);
my $d = ($depth ? $depth->ptValue : undef);
$h -= $d if $h && $d; # - ??
if (my $alignment = LookupValue('Alignment')) {
if (((!defined $h) && (!defined $w)) || ((defined $h) && ($h > 20))
|| ((defined $h) && (defined $w) && ($h > 3 * $w))) {
# This isXxxxRule property is to determine if it is used for separating rules within alignments
$whatsit->setProperty(isVerticalRule => 1) } }
elsif ((defined $w) && ($w == 0)) {
$whatsit->setProperty(invisible => 1); }
if (my $color = LookupValue('font')->getColor) {
if ($color ne 'black') {
$whatsit->setProperty(color => $color); } }
return; });
DefConstructor('\hrule RuleSpecification',
"?#isHorizontalRule()"
. "(<ltx:rule height='&GetKeyVal(#1,height)' depth='&GetKeyVal(#1,depth)'"
. " width='&GetKeyVal(#1,width)' color='#color'/>)",
# properties=>{isHorizontalRule=>1});
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $dims = $whatsit->getArg(1);
my $width = GetKeyVal($dims, 'width');
my $height = GetKeyVal($dims, 'height');
my $depth = GetKeyVal($dims, 'depth');
$whatsit->setProperty(width => $width) if $width;
$whatsit->setProperty(height => $height) if $height;
$whatsit->setProperty(depth => $depth) if $depth;
my $w = ($width ? $width->ptValue : undef);
my $h = ($height ? $height->ptValue : undef);
my $d = ($depth ? $depth->ptValue : undef);
$h -= $d if $h && $d; # - ??
if (my $alignment = LookupValue('Alignment')) {
# What is the intended logic here?
if (((!defined $h) && (!defined $w)) || ((defined $w) && ($w > 20))
|| ((defined $h) && (defined $w) && ($w > 3 * $h))) {
# This isXxxxRule property is to determine if it is used for separating rules within alignments
$alignment->addLine('t');
$whatsit->setProperty(isHorizontalRule => 1) } }
else {
$dims->setValue(width => '100%') unless defined $w;
$dims->setValue(height => '1px') unless defined $h; }
if (my $color = LookupValue('font')->getColor) {
if ($color ne 'black') {
$whatsit->setProperty(color => $color); } }
return; });
#======================================================================
# Remaining Mode independent primitives in Ch.24, pp.279-280
# \relax was done as expandable (isn't that right?)
# }
# Note, we don't bother making sure begingroup is ended by endgroup.
# These define the handler for { } (or anything of catcode BEGIN, END)
# These are actually TeX primitives, but we treat them as a Whatsit so they
# remain in the constructed tree.
#DefConstructor('{','#body', beforeDigest=>sub{$_[0]->bgroup;}, captureBody=>1);
DefPrimitive('{', sub {
my ($stomach) = @_;
$stomach->bgroup;
my $open = Box(undef, undef, undef, T_BEGIN);
my $ismath = $STATE->lookupValue('IN_MATH');
my @body = $stomach->digestNextBody();
List($open, @body, mode => ($ismath ? 'math' : 'text')); });
#DefConstructor('}', '', beforeDigest=>sub{$_[0]->egroup;});
DefPrimitive('}', sub { my $f = LookupValue('font'); $_[0]->egroup; Box(undef, $f, undef, T_END); });
# These are for those screwy cases where you need to create a group like box,
# more than just bgroup, egroup,
# BUT you DON'T want extra {, } showing up in any untex-ing.
DefConstructor('\@hidden@bgroup', '#body', beforeDigest => sub { $_[0]->bgroup; }, captureBody => 1,
reversion => sub { Revert($_[0]->getProperty('body')); });
DefConstructor('\@hidden@egroup', '', afterDigest => sub { $_[0]->egroup; },
reversion => '');
DefPrimitive('\begingroup', sub { $_[0]->begingroup; });
DefPrimitive('\endgroup', sub { $_[0]->endgroup; });
# Debugging aids; Ignored!
DefPrimitive('\show Token', undef);
DefPrimitive('\showbox Number', undef);
DefPrimitive('\showlists', undef);
DefPrimitive('\showthe Token', undef);
# DefPrimitive('\shipout ??
DefPrimitive('\ignorespaces SkipSpaces', undef);
# \afterassignment saves ONE token (globally!) to execute after the next assignment
DefPrimitive('\afterassignment Token', sub { AssignValue(afterAssignment => $_[1], 'global'); });
# \aftergroup saves ALL tokens (from repeated calls) to be executed IN ORDER after the next egroup or }
DefPrimitive('\aftergroup Token', sub { PushValue(afterGroup => $_[1]); });
# \uppercase<general text>, \lowercase<general text>
sub ucToken {
my ($token) = @_;
my $code = $STATE->lookupUCcode($token->getString);
return ((defined $code) && ($code != 0) ? Token(chr($code), $token->getCatcode) : $token); }
sub lcToken {
my ($token) = @_;
my $code = $STATE->lookupLCcode($token->getString);
return ((defined $code) && ($code != 0) ? Token(chr($code), $token->getCatcode) : $token); }
DefMacro('\uppercase GeneralText', sub {
my ($gullet, $tokens) = @_;
return map { ucToken($_) } $tokens->unlist; });
DefMacro('\lowercase GeneralText', sub {
my ($gullet, $tokens) = @_;
return map { lcToken($_) } $tokens->unlist; });
DefPrimitive('\message{}', sub {
my ($stomach, $stuff) = @_;
print STDERR ToString(Expand($stuff)) . "\n" if LookupValue('VERBOSITY') > -1;
return; });
DefRegister('\errhelp' => Tokens());
DefPrimitive('\errmessage{}', sub {
my ($stomach, $stuff) = @_;
print STDERR ToString(Expand($stuff)) . ": " . ToString(Expand(Tokens(T_CS('\the'), T_CS('\errhelp')))) . "\n";
return; });
# TeX I/O primitives
DefPrimitive('\openin Number SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $port, $filename) = @_;
# possibly should close $port if it's already been opened?
$port = ToString($port);
$filename = ToString($filename);
# Rely on FindFile to enforce any access restrictions
if (my $path = FindFile($filename)) {
my $mouth = LaTeXML::Core::Mouth->create($path,
content => LookupValue($path . '_contents'));
AssignValue('input_file:' . $port => $mouth, 'global'); }
return; });
DefPrimitive('\closein Number', sub {
my ($stomach, $port, $filename) = @_;
# close the mouth (if any) and clear the variable
$port = ToString($port);
if (my $mouth = LookupValue('input_file:' . $port)) {
$mouth->finish;
AssignValue('input_file:' . $port => undef, 'global'); }
return; });
DefPrimitive('\read Number SkipKeyword:to SkipSpaces Token', sub {
my ($stomach, $port, $token) = @_;
$port = ToString($port);
if (my $mouth = LookupValue('input_file:' . $port)) {
$stomach->bgroup;
AssignValue(PRESERVE_NEWLINES => 2);
my @tokens = ();
my $EOL = Token("\n", CC_SPACE);
my ($t, $cc, $level) = (undef, 0, 0);
while ($t = $mouth->readToken) {
push(@tokens, $t);
$level++ if $cc == CC_BEGIN;
$level-- if $cc == CC_END;
last if ((($cc = $t->getCatcode) == CC_SPACE) && ($t->getString eq "\n")) && !$level; }
$stomach->egroup;
# I'm unclear whether there should be a trailing $EOL on the last line, but apparently not?
@tokens = (T_CS('\par')) unless @tokens; # trailing blank line
DefMacroI($token, undef, Tokens(@tokens)); }
return; });
DefConditional('\ifeof Number', sub {
my ($gullet, $port) = @_;
$port = ToString($port);
if (my $mouth = LookupValue('input_file:' . $port)) {
return !$mouth->hasMoreInput; }
else {
return 1; } });
# For output files, we'll write the data to a cached internal copy
# rather than to the actual file system.
DefPrimitive('\openout Number SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $port, $filename) = @_;
$port = ToString($port);
$filename = ToString($filename);
AssignValue('output_file:' . $port => $filename, 'global');
AssignValue($filename . '_contents' => "", 'global');
return; });
DefPrimitive('\closeout Number', sub {
my ($stomach, $port) = @_;
$port = ToString($port);
AssignValue('output_file:' . $port => undef, 'global');
return; });
DefPrimitive('\write Number {}', sub {
my ($stomach, $port, $tokens) = @_;
$port = ToString($port);
if (my $filename = LookupValue('output_file:' . $port)) {
my $handle = $filename . '_contents';
my $contents = LookupValue($handle);
AssignValue($handle => $contents . UnTeX($tokens) . "\n", 'global'); }
else {
print STDERR UnTeX($tokens) . "\n"; }
return; });
# Since we don't paginate, we're effectively always "shipping out",
# so all operations are \immediate
DefPrimitive('\immediate', undef);
#======================================================================
# Remaining semi- Vertical Mode primitives in Ch.24, pp.280--281
DefPrimitive('\special {}', undef);
DefPrimitive('\penalty Number', undef);
DefPrimitive('\kern Dimension', undef);
DefPrimitive('\mkern MuGlue', undef);
DefPrimitiveI('\unpenalty', undef, undef);
DefPrimitiveI('\unkern', undef, undef);
## Worrisome, but...
DefPrimitiveI('\unskip', undef, sub {
my ($stomach) = @_;
my $box;
while (($box = $LaTeXML::LIST[-1]) && IsEmpty($box)) {
pop(@LaTeXML::LIST); }
return; });
DefPrimitive('\mark{}', undef);
# \insert<8bit><filler>{<vertical mode material>}
DefPrimitive('\insert Number', undef); # Just let the insertion get processed(?)
# \vadjust<filler>{<vertical mode material>}
# Note: \vadjust ignores in vertical mode...
# is it sufficient to just clear the macro to avoid recursion?
# (we don't track horizontal/vertical mode)
DefMacroI('\LTX@vadjust@afterpar', undef, '\def\LTX@vadjust@afterpar{}');
DefMacroI('\LTX@clear@vadjust@afterpar', undef, '\def\LTX@vadjust@afterpar{\def\LTX@vadjust@afterpar{}}');
DefPrimitive('\vadjust {}', sub {
AddToMacro('\LTX@vadjust@afterpar', $_[1]->unlist);
return; });
#======================================================================
# Remaining Vertical Mode primitives in Ch.24, pp.281--283
# \vskip<glue>, \vfil, \vfill, \vss, \vfilneg
# <leaders> = \leaders | \cleaders | \xleaders
# <box or rule> = <box> | <vertical rule> | <horizontal rule>
# <vertical rule> = \vrule<rule specification>
# <horizontal rule> = \hrule<rule specification>
# <rule specification> = <optional spaces> | <rule dimension><rule specification>
# <rule dimension> = width <dimen> | height <dimen> | depth <dimen>
# Stuff to ignore for now...
foreach my $op ('\vfil', '\vfill', '\vss', '\vfilneg',
'\leaders', '\cleaders', '\xleaders') {
DefPrimitive($op, undef); }
# \moveleft<dimen><box>, \moveright<dimen><box>
DefConstructor('\moveleft Dimension MoveableBox',
"<ltx:text xoffset='#x' _noautoclose='1'>#2</ltx:text>",
afterDigest => sub {
$_[1]->setProperty(x => $_[1]->getArg(1)->multiply(-1)); });
DefConstructor('\moveright Dimension MoveableBox',
"<ltx:text xoffset='#x' _noautoclose='1'>#2</ltx:text>",
afterDigest => sub {
$_[1]->setProperty(x => $_[1]->getArg(1)); });
# \unvbox<8bit>, \unvcopy<8bit>
DefPrimitive('\unvbox Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
AssignValue($box, undef);
(defined $stuff ? $stuff->unlist : ()); });
DefPrimitive('\unvcopy Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
(defined $stuff ? $stuff->unlist : ()); });
#======================================================================
# If this is the right solution...
# then we also should put the desired spacing on a style attribute?!?!?!
DefConstructor('\vskip Glue', sub {
my ($document, $length) = @_;
$length = $length->ptValue;
if ($length > 10) { # Or what!?!?!?!
if ($document->isCloseable('ltx:para')) {
$document->closeElement('ltx:para'); }
elsif ($document->isOpenable('ltx:break')) {
$document->insertElement('ltx:break'); } }
return; },
properties => { isSpace => 1, isVerticalSpace => 1 });
#======================================================================
# Basic alignment support needed by most environments & commands.
#======================================================================
Tag('ltx:td', afterClose => \&trimNodeWhitespace);
#----------------------------------------------------------------------
# Primitive column types;
# This is really LaTeX, but the mechanisms are used behind-the-scenes here, too.
DefColumnType('|', sub {
$LaTeXML::BUILD_TEMPLATE->addBetweenColumn(T_CS('\vrule')); return; });
DefColumnType('l', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(after => Tokens(T_CS('\hfil'))); return; });
DefColumnType('c', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil')),
after => Tokens(T_CS('\hfil'))); return; });
DefColumnType('r', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil'))); return; });
# This collects paragraph text, like \hbox, but for use within alignment cells;
# no ltx:text wrapper is needed, since it is within a cell.
# and it handles $ and & appropriately
DefConstructor('\tabularcell@hbox HBoxContents',
"#1",
mode => 'text', bounded => 1,
# Workaround for $ in alignment; an explicit \hbox gives us a normal $.
# And also things like \centerline that will end up bumping up to block level!
beforeDigest => sub {
## reenterTextMode(); # BUT NOT \\\\ !!!!!!
Let(T_MATH, '\@dollar@in@textmode');
Let('\centerline', '\relax'); },
afterConstruct => sub { # Override nowrap on right,left,center cells
my $cell = $_[0]->getElement;
$_[0]->addClass($cell, 'ltx_wrap') unless ($cell->getAttribute('align') || '') eq 'justify'; });
DefColumnType('p{Dimension}', sub {
$LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\tabularcell@hbox'), T_BEGIN),
after => Tokens(T_END),
align => 'justify', width => $_[1]); return; });
DefColumnType('*{Number}{}', sub {
my ($gullet, $n, $pattern) = @_;
map { $pattern->unlist } 1 .. $n->valueOf; });
DefColumnType('@{}', sub {
my ($gullet, $filler) = @_;
$LaTeXML::BUILD_TEMPLATE->addBetweenColumn($filler->unlist); return; });
#----------------------------------------------------------------------
# This is where ALL(?) alignments start & finish
# \@open@alignment will be the object representing the entire alignment!
DefMacroI('\@start@alignment', undef,
'\@open@alignment\@open@row\@open@column\@open@inner@column');
DefMacroI('\@finish@alignment', undef,
'\@close@inner@column\@close@column\@close@row\@close@alignment');
#----------------------------------------------------------------------
# These are to be bound to &, \span, \cr and \\
# The macro layer expands into appropriate begin & end markers for rows & columns;
# The constructor layer carries out any side effect and records a token for reversion.
DefMacroI('\@alignment@align', undef,
'\@close@inner@column\@close@column'
. '\@alignment@align@marker'
. '\@open@column\@open@inner@column');
DefConstructorI('\@alignment@align@marker', undef, '', reversion => '&');
#DefMacro('\@alignment@span',
DefMacroI('\span', undef,
'\@close@inner@column'
. '\@alignment@span@marker'
. '\@open@inner@column');
DefConstructorI('\@alignment@span@marker', undef, '', reversion => '\span',
sizer => 0,
properties => { alignmentSkippable => 1 });
DefConstructorI('\omit', undef, '', properties => { alignmentSkippable => 1 });
DefMacroI('\@alignment@cr', undef, sub {
my ($gullet) = @_;
my $t = $gullet->readXToken;
$gullet->unread($t);
# SPECIAL CASE for endings of \halign (& friends).
# We need the appropriate ending, to close the row/col/etc, but only see a }!!
if (Equals($t, T_END) || Equals($t, T_CS('\egroup'))) { # Ending an \halign?
(T_CS('\@finish@alignment')); }
else {
(T_CS('\@close@inner@column'), T_CS('\@close@column'), T_CS('\@close@row'),
T_CS('\@alignment@cr@marker'),
T_CS('\@open@row'), T_CS('\@open@column'), T_CS('\@open@inner@column')); } });
DefConstructorI('\@alignment@cr@marker', undef, '', reversion => '\cr');
DefConstructorI('\default@cr', undef, "\n"); # Default binding.
Let('\cr', '\default@cr');
Let('\crcr', '\cr');
# NOTE that this does NOT skip spaces before * or []!!!!!
# As if: \@alignment@newline OptionalMatch:* [Dimension]
sub readNewlineArgs {
my ($gullet) = @_;
my $next = $gullet->readToken;
my ($star, $optional);
if ($next && $next->equals(T_OTHER('*'))) {
$star = 1;
$next = $gullet->readToken; }
if ($next && $next->equals(T_OTHER('['))) {
$optional = $gullet->readUntil(T_OTHER(']'));
$next = undef; }
$gullet->unread($next) if $next;
return ($star, $optional); }
# The next two macros are for binding to \\
# one version does NOT skip spaces (esp. newline!) before * and []; the other DOES
# We need to be careful which one is used in which place.
# [LaTeX's tabular, eqnarray DO skip;
# some (all?) ams environments do NOT skip]
# What about halign? What should be the default?
DefMacroI('\@alignment@newline@noskip', undef, sub {
my ($gullet) = @_;
readNewlineArgs($gullet);
(T_CS('\@close@inner@column'), T_CS('\@close@column'), T_CS('\@close@row'),
T_CS('\@alignment@newline@marker'),
T_CS('\@open@row'), T_CS('\@open@column'), T_CS('\@open@inner@column')); });
DefMacro('\@alignment@newline OptionalMatch:* [Dimension]', sub {
my ($gullet) = @_;
(T_CS('\@close@inner@column'), T_CS('\@close@column'), T_CS('\@close@row'),
T_CS('\@alignment@newline@marker'),
T_CS('\@open@row'), T_CS('\@open@column'), T_CS('\@open@inner@column')); });
DefConstructorI('\@alignment@newline@marker', undef, '', reversion => Tokens(T_CS("\\\\"), T_CR));
DefConstructorI('\@alignment@hline', undef, '',
afterDigest => sub {
if (my $alignment = LookupValue('Alignment')) {
$alignment->addLine('t'); } },
properties => { isHorizontalRule => 1 },
alias => '\hline');
# Special forms for $ appearing within alignments.
# Note that $ within a math alignment (eg array environment),
# switches to text mode! There's no $$ for display math.
# This is the "normal" case: $ appearing with an alignment that is in text mode.
# It's just like regular $, except it doesn't look for $$ (no display math).
DefPrimitiveI('\@dollar@in@textmode', undef, sub {
$_[0]->invokeToken(T_CS((LookupValue('IN_MATH') ? '\@@ENDINLINEMATH' : '\@@BEGININLINEMATH'))); });
# This one is for $ appearing within an alignment that's already math.
# This should switch to text mode (because it's balancing the hidden $
# wrapping each alignment cell!!!!!!)
# However, it should be like a normal $ if it's inside something like \mbox
# that itself makes a text box!!!!!!
# Thus, we need to know at what boxing level we started the last math or text.
# This is all complicated by the need to know _how_ we got into or out of math mode!
# Gawd, this is awful!
# NOTE: Probably the most "Right" thing to do would be to process
# alignments in text mode only (like TeX), sneaking $'s in where needed,
# but then afterwards, morph them into math arrays?
# This would be complicated by the need to hide these $ from untex.
DefPrimitiveI('\@dollar@in@mathmode', undef, sub {
my ($stomach) = @_;
my $level = $stomach->getBoxingLevel;
if ((LookupValue('MATH_ALIGN_$_BEGUN') || 0) == $level) { # If we're begun making _something_ with $.
my @l = ();
if (LookupValue('IN_MATH')) { # But we're somehow in math?
@l = $stomach->invokeToken(T_CS('\@@ENDINLINEMATH')); }
else {
@l = $stomach->invokeToken(T_CS('\@@ENDINLINETEXT')); }
AssignValue('MATH_ALIGN_$_BEGUN' => 0); # Reset this AFTER finishing the something
@l; }
else {
AssignValue('MATH_ALIGN_$_BEGUN' => $level + 1); # Note that we've begun something
if (LookupValue('IN_MATH')) { # If we're "still" in math
$stomach->invokeToken(T_CS('\@@BEGININLINETEXT')); }
else {
$stomach->invokeToken(T_CS('\@@BEGININLINEMATH')); } } });
DefConstructorI('\@@BEGININLINETEXT', undef,
"<ltx:XMText>"
. "#body"
. "</ltx:XMText>",
alias => '$', beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1);
DefConstructorI('\@@ENDINLINETEXT', undef, "", alias => '$',
beforeDigest => sub { $_[0]->endMode('text'); });
DefPrimitiveI('\@LTX@nonumber', undef, sub { AssignValue(EQUATIONROW_NUMBER => 0, 'global'); });
# \noalign{} provides vertical material that doesn't get aligned.
# This could be a bunch of text that would be treated like AMS' \intertext,
# OR (more commonly) it might be more or less empty, \vspace,\hline etc.
# In the latter case, we DON'T want the tr/td even with colspan!!!
# Unfortunately, the timing is wrong to remove them (until ALignment is processing)
# MOREOVER, there're odd cases (\displaylines) where we apparently should be in an alignment,
# but aren't, so more punting is in order!
# Note that \no align processes (at least expands) it's argument as it reads it;
# See the peculiar construct in LaTeX for \hline and \@xhline
DefMacro('\noalign Expanded',
'\if@in@alignment'
. '\@multicolumn{\@alignment@ncolumns}{l}{\@@LTX@noalign{#1}}\@LTX@nonumber\@alignment@newline'
. '\else#1\fi');
# This just processes the argument in text mode, but notices whether it is "empty" or not.
# If so, tell the current row that it can safely be collapsed later on.
DefConstructor('\@@LTX@noalign{}', sub {
my ($document, $body) = @_;
# Open an ltx:p, if allowed, otherwise just ltx:text
$document->insertElement(($document->isOpenable('ltx:p') ? 'ltx:p' : 'ltx:text'),
$body, class => 'ltx_intertext'); },
mode => 'text',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $empty = 1;
# Check if this really deserves a paragraph
foreach my $box ($whatsit->getArg(1)->unlist) {
if ($box->getProperty('isFill')) { }
elsif ($box->getProperty('isVerticalRule')) { }
elsif ($box->getProperty('isHorizontalRule')) { } # we need to put this somewhere?
elsif ($box->getProperty('alignmentSkippable')) { }
elsif (ref $box eq 'LaTeXML::Core::Comment') { }
elsif ($box->getProperty('isSpace')) { }
elsif (IsEmpty($box)) { }
else {
$empty = 0; last; } }
if (my $alignment = LookupValue('Alignment')) {
$alignment->currentRow->{empty} = $empty; }
$whatsit->setProperty(alignmentSkippable => $empty);
return; });
DefMacroI('\hidewidth', undef, Tokens());
DefMacro('\multispan{Number}', sub {
my ($gullet, $span) = @_;
$span = $span->valueOf;
(T_CS('\omit'), map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1); });
DefRegisterI('\@alignment@ncolumns', undef, Dimension(0),
getter => sub {
if (my $alignment = LookupValue('Alignment')) {
Number(scalar($alignment->getTemplate->columns)); }
else { Number(0); } });
DefRegisterI('\@alignment@column', undef, Dimension(0),
getter => sub {
if (my $alignment = LookupValue('Alignment')) {
Number($alignment->currentColumnNumber); }
else { Number(0); } });
DefMacro('\@multicolumn {Number} AlignmentTemplate {}', sub {
my ($gullet, $span, $template, $tokens) = @_;
my $column = $template->column(1);
$span = $span->valueOf;
# First part, like \multispan
(T_CS('\omit'), (map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1),
# Next part, just put the template in-line, since it's only used once.
($column ? beforeCellUnlist($$column{before}) : ()),
$tokens->unlist,
($column ? afterCellUnlist($$column{after}) : ())); });
DefConditionalI('\if@in@alignment', undef, sub { LookupValue('Alignment'); });
# This is the primary idiom for creating Alignment structures
# (\halign, tabular, matrix, even eqnarray)
# Along with Let bindings that redefine various things inside the body,
# the important thing is to create an Alignment object (a specialized Whatsit).
# Unlike other Whatsits, we need to bind it in STATE as Alignment
# so that we can get access to it, setting rows, columns, borders, ...
# Also, it holds bits of code which are Constructor analogs,
# responsible for opening & closing the elements for container, rows & columns.
#
# A typical alignment would be defined as a macro like:
# \foo{} ==> \@@foo{\@start@alignment#1\@finish@alignment}
# where \@@foo is a constructor with '#1' as the pattern.
# To create the actual Alignment object, \@@foo should invokes
# alignmentBindings in beforeDigest, or perhaps put
# a CS similar to \@alignment@bindings before \@start@alignment.
# Obviously a bit more involved for environments, but similar.
sub alignmentBindings {
my ($template, $mode, %properties) = @_;
$mode = LookupValue('MODE') unless $mode;
my $ismath = $mode =~ /math$/;
my $container = ($ismath ? 'ltx:XMArray' : 'ltx:tabular');
my $rowtype = ($ismath ? 'ltx:XMRow' : 'ltx:tr');
my $coltype = ($ismath ? 'ltx:XMCell' : 'ltx:td');
AssignValue(Alignment => LaTeXML::Core::Alignment->new(
template => $template,
openContainer => sub { $_[0]->openElement($container, @_[1 .. $#_]); },
closeContainer => sub { $_[0]->closeElement($container); },
openRow => sub { $_[0]->openElement($rowtype, @_[1 .. $#_]); },
closeRow => sub { $_[0]->closeElement($rowtype); },
openColumn => sub { $_[0]->openElement($coltype, @_[1 .. $#_]); },
closeColumn => sub { $_[0]->closeElement($coltype); },
isMath => $ismath,
properties => {%properties}));
Let(T_ALIGN, '\@alignment@align');
Let("\\\\", '\@alignment@newline');
Let('\tabularnewline', '\@alignment@newline');
Let('\cr', '\@alignment@cr');
Let('\crcr', '\@alignment@cr');
Let('\hline', '\@alignment@hline');
Let(T_MATH, ($ismath ? '\@dollar@in@mathmode' : '\@dollar@in@textmode'));
Let('\@open@row', '\default@open@row');
Let('\@close@row', '\default@close@row');
return; }
DefPrimitive('\@alignment@bindings AlignmentTemplate []', sub {
my ($stomach, $template, $mode) = @_;
alignmentBindings($template, $mode); });
# Utility, not really TeX, but used by LaTeX, AmSTeX...
# Convert a vertical positioning, optional argument.
# t = "top", b = "bottom"; default is "middle".
# Note that the default for vattach attribute is "baseline".
sub translateAttachment {
my ($pos) = @_;
$pos = ($pos ? ToString($pos) : '');
return ($pos eq 't' ? 'top' : ($pos eq 'b' ? 'bottom' : 'middle')); } # undef meaning 'baseline'
#----------------------------------------------------------------------
# To recognize where rows & columns start and stop, we need to
# recognize things that have expanded into &, \cr, etc.
# Additionally, \span creates a single column out of several.
#----------------------------------------------------------------------
# Overall Alignment;
DefPrimitive('\@close@alignment', sub { $_[0]->egroup; });
# This makes the Alignment object act as if it were the Whatsit.
# ie. the alignment gets absorbed into the document, is sized, etc.
# But it still reverts to whatever stuff was digested.
DefConstructor('\@open@alignment SkipSpaces DigestedBody',
"#alignment",
reversion => '#1',
sizer => '#alignment',
beforeDigest => sub { $_[0]->bgroup; },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $alignment = LookupValue('Alignment');
$whatsit->setProperty(alignment => $alignment);
$alignment->setBody($whatsit); });
#----------------------------------------------------------------------
# Row; The content is stuffed into the Alignment, so we don't construct anything here.
##DefMacroI('\default@close@row',undef,'\@row@after\@@default@close@row');
##DefMacroI('\default@open@row',undef,'\@@default@open@row\@row@before');
DefMacroI('\default@close@row', undef, '\@@default@close@row');
DefMacroI('\default@open@row', undef, '\@@default@open@row');
DefMacroI('\@row@before', undef, '');
DefMacroI('\@row@after', undef, '');
DefPrimitive('\@@default@close@row', sub {
if (my $alignment = LookupValue('Alignment')) {
$alignment->addAfterRow(Digest(T_CS('\@row@after'))); }
$_[0]->egroup; });
DefConstructor('\@@default@open@row SkipSpaces DigestedBody',
"",
reversion => '#1',
beforeDigest => sub {
$_[0]->bgroup;
if (my $alignment = LookupValue('Alignment')) {
$alignment->newRow;
$alignment->addBeforeRow(Digest(T_CS('\@row@before'))); }
return; });
#----------------------------------------------------------------------
# Column
# Here, a column represents 1 or more "inner columns".
# inner columns can be separated by \span's yielding a single column with
# colspan > 1. Also, inner columns recognize \omit which removes the
# before & after tokens which would otherwise wrap the inner column.
DefPrimitiveI('\@tabular@begin@heading', undef, sub { AssignValue(IN_TABULAR_HEAD => 1, 'global'); });
DefPrimitiveI('\@tabular@end@heading', undef, sub { AssignValue(IN_TABULAR_HEAD => 0, 'global'); });
DefMacroI('\@close@column', undef, '\@column@after\@@close@column');
DefMacroI('\@open@column', undef, '\@@open@column\@column@before');
DefMacroI('\@column@before', undef, '');
DefMacroI('\@column@after', undef, '');
DefPrimitiveI('\@@close@column', undef, sub { $_[0]->egroup; });
DefPrimitive('\@@open@column SkipSpaces DigestedBody', sub {
my ($stomach, $boxes) = @_;
my $alignment = LookupValue('Alignment');
return () unless $alignment; # ??
my $n0 = LookupValue('alignmentStartColumn') + 1;
my $n1 = $alignment->currentColumnNumber;
my $colspec = $alignment->getColumn($n0);
my $align = $$colspec{align} || 'left';
my $border = '';
# Peel off any boxes from both sides until we get the "meat" of the column.
# from this we can establish borders, alignment and emptiness.
# But we, of course, immediately put them back...
my @boxes = $boxes->unlist;
my @saveleft = ();
my @saveright = ();
while (@boxes) {
if ($boxes[0]->getProperty('isFill')) {
$align = 'right'; shift(@boxes); last; }
elsif ($boxes[0]->getProperty('isVerticalRule')) {
$border .= 'l'; shift(@boxes); }
elsif ($boxes[0]->getProperty('isHorizontalRule')) {
push(@saveleft, shift(@boxes)); }
elsif ($boxes[0]->getProperty('alignmentSkippable')) {
push(@saveleft, shift(@boxes)); }
elsif (ref $boxes[0] eq 'LaTeXML::Core::Comment') {
push(@saveleft, shift(@boxes)); }
elsif ($boxes[0]->getProperty('isSpace')) {
push(@saveleft, shift(@boxes)); }
elsif (IsEmpty($boxes[0])) {
push(@saveleft, shift(@boxes)); }
else {
last; } }
while (@boxes) {
if ($boxes[-1]->getProperty('isFill')) {
if ($align eq 'right') { $align = 'center'; }
pop(@boxes); last; }
elsif ($boxes[-1]->getProperty('isVerticalRule')) {
$border .= 'r'; pop(@boxes); }
elsif ($boxes[-1]->getProperty('isHorizontalRule')) {
unshift(@saveright, pop(@boxes)); }
elsif ($boxes[-1]->getProperty('alignmentSkippable')) {
unshift(@saveright, pop(@boxes)); }
elsif (ref $boxes[-1] eq 'LaTeXML::Core::Comment') {
unshift(@saveright, pop(@boxes)); }
elsif ($boxes[-1]->getProperty('isSpace')) {
unshift(@saveright, pop(@boxes)); }
elsif (IsEmpty($boxes[-1])) {
unshift(@saveright, pop(@boxes)); }
else {
last; } }
delete $$colspec{width} unless $align eq 'justify';
my $empty = scalar(@boxes) == 0;
$align = undef if $empty;
@boxes = (@saveleft, @boxes, @saveright);
$boxes = List(@boxes, mode => ($boxes->isMath ? 'math' : 'text'));
# record relevant info in the Alignment.
$$colspec{align} = $align;
$$colspec{border} = $border = ($$colspec{border} || '') . $border;
$$colspec{boxes} = $boxes;
$$colspec{colspan} = $n1 - $n0 + 1;
$$colspec{empty} = 1 if $empty;
if (LookupValue('IN_TABULAR_HEAD') || LookupValue('IN_TABULAR_FOOT')) {
$$colspec{thead}{column} = 1; }
for (my $i = $n0 + 1 ; $i <= $n1 ; $i++) {
my $c = $alignment->getColumn($i);
$$c{skipped} = 1 if $c; }
# $stomach->egroup;
$boxes; },
beforeDigest => sub {
if (my $alignment = LookupValue('Alignment')) {
AssignValue(alignmentStartColumn => $alignment->currentColumnNumber); }
$_[0]->bgroup; });
AssignValue(ALIGNMENT_LINE_COMMANDS => []);
PushValue(ALIGNMENT_LINE_COMMANDS => T_CS('\hline'));
PushValue(ALIGNMENT_LINE_COMMANDS => T_CS('\cline'));
PushValue(ALIGNMENT_LINE_COMMANDS => T_CS('\label'));
DefMacroI('\@open@inner@column', undef, '\@@open@inner@column\@inner@column@before');
DefMacroI('\@close@inner@column', undef, '\@@eat@space\@inner@column@after\@@close@inner@column');
DefMacroI('\@inner@column@before', undef, '');
DefMacroI('\@inner@column@after', undef, sub {
my $alignment = LookupValue('Alignment');
my $column = $alignment && $alignment->currentColumn;
($column ? afterCellUnlist($$column{after}) : ()); });
DefPrimitiveI('\@@close@inner@column', undef, sub { $_[0]->egroup; });
DefPrimitiveI('\@@open@inner@column', undef, sub {
my ($stomach) = @_;
my $alignment = LookupValue('Alignment');
$stomach->bgroup;
return () unless $alignment; # Presumably will already be reporting (many) errors...
my $colspec = $alignment->nextColumn;
my $gullet = $stomach->getGullet;
my @lines = ();
my @line_tokens = @{ LookupValue('ALIGNMENT_LINE_COMMANDS') };
my @savedtokens = ();
$$colspec{empty} = 0; # Assume the column isn't empty
# Scan for leading \omit, skipping over (& saving) \hline.
while (my $tok = $gullet->readXToken(0)) {
if ($tok->equals(T_SPACE)) { } # Skip leading space
elsif (grep { $tok->equals($_) } @line_tokens) { # Save line commands
push(@lines, $stomach->invokeToken($tok)); }
elsif (Equals($tok, T_BEGIN)) { # Crazy... seems { doesn't "block" \omit!
push(@savedtokens, $tok); }
else {
if (Equals($tok, T_CS('\omit'))) { # \omit removes the before/after tokens for this column.
$$colspec{before} = $$colspec{after} = Tokens(); }
## If we find \@@eat@space, we're at end of the columns content, so consider it empty
elsif (Equals($tok, T_CS('\@@eat@space'))) { # First non-empty token implies column is empty.
$$colspec{empty} = 1; }
$gullet->unread($tok); last; } }
$gullet->unread(@savedtokens);
$gullet->unread(beforeCellUnlist($$colspec{before}));
(@lines, $STATE->getStomach->digestNextBody()); });
# NOTE: Watch here for problems with alignments.
# The previous version threw away too much stuff (esp. metadata).
# This one, I think, is more careful.
# The issue is that it should throw away spaces (or things like spaces?)
# so that various omit/span/fill/etc is properly recognized when analyzing columns.
DefPrimitiveI('\@@eat@space', undef, sub {
my $box;
my @save = ();
while ($box = $LaTeXML::LIST[-1]) {
if ($box->getProperty('alignmentSkippable')
|| $box->getProperty('isFill')) {
push(@save, pop(@LaTeXML::LIST)); }
elsif (IsEmpty($box)) {
pop(@LaTeXML::LIST); }
else {
last; } }
push(@LaTeXML::LIST, @save);
return; });
# Yet more special case hacking. Sometimes the order of tokens works for
# TeX, but confuses us... In particular the order of $ and \hfil!
# \@open@column is too late, since the stuff is already digested.
# Could _almost_ handle the extractions here, but there are several
# rule operators that digest into whatsits with certain properties...
sub beforeCellUnlist {
my ($tokens) = @_;
return () unless $tokens;
my @toks = $tokens->unlist;
my @new = ();
while (my $t = shift(@toks)) {
## if($t->equals(T_MATH) && @toks && $toks[0]->equals(T_CS('\hfil'))){
if (Equals($t, T_MATH) && @toks && Equals($toks[0], T_CS('\hfil'))) {
push(@new, shift(@toks)); unshift(@toks, $t); }
else {
push(@new, $t); } }
return @new; }
sub afterCellUnlist {
my ($tokens) = @_;
return () unless $tokens;
my @toks = $tokens->unlist;
my @new = ();
while (my $t = pop(@toks)) {
## if($t->equals(T_MATH) && @toks && $toks[-1]->equals(T_CS('\hfil'))){
if (Equals($t, T_MATH) && @toks && Equals($toks[-1], T_CS('\hfil'))) {
unshift(@new, pop(@toks)); push(@toks, $t); }
else {
unshift(@new, $t); } }
return @new; }
#----------------------------------------------------------------------
# \halign, See Chap.22
DefConstructor('\halign BoxSpecification',
'#body',
reversion => '\halign #1{#2\cr#3}',
bounded => 1,
sizer => '#1',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $gullet = $stomach->getGullet;
my $t = $gullet->readNonSpace;
Error('expected', '\bgroup', $stomach, "Missing \\halign box") unless Equals($t, T_BEGIN);
# Read the template up till something equivalent to \cr
my @template = ();
# Only expand certain things; See TeX book p.238
while (($t = $gullet->readToken(0)) && !Equals($t, T_CS('\cr'))) {
if ($t->equals(T_CS('\tabskip'))) { # Read the tabskip assignment
$gullet->readKeyword('=');
my $value = $gullet->readGlue; } # Discard! In principle, should store in template!
elsif ($t->equals(T_CS('\span'))) { # ex-span-ded next token.
$gullet->unread($gullet->readXToken(0)); }
else {
push(@template, $t); } }
# Convert the template
my $ismath = $STATE->lookupValue('IN_MATH');
my $before = 1; # true if we're before a # in current column
my @pre = ();
my @post = ();
my @cols = ();
my @nonreps = ();
foreach my $t (@template, T_ALIGN) { # put & at end, to save column!
if ($t->equals(T_PARAM)) {
$before = 0; }
elsif ($t->equals(T_ALIGN)) {
if ($before) { @nonreps = @cols; @cols = (); } # A & while we're before a column means Repeated columns
else { # Finished column spec; add it
# Try some magic for math, so we can create a valid math matrix (maybe!)
# DAMN \halign can't be in math, anyway.
# So, to get a matrix, we'll have to rewrite the alignment!
if ($ismath) {
push(@pre, T_MATH); unshift(@post, T_MATH); }
push(@cols, { before => Tokens(stripDupMath(beforeCellUnlist(Tokens(@pre)))),
after => Tokens(stripDupMath(afterCellUnlist(Tokens(@post)))) });
@pre = @post = (); $before = 1; } }
elsif ($before) {
push(@pre, $t) if @pre || !$t->equals(T_SPACE); }
else {
push(@post, $t) if @post || !$t->equals(T_SPACE); } }
my $template = LaTeXML::Core::Alignment::Template->new((@nonreps ?
(columns => [@nonreps], repeated => [@cols])
: (columns => [@cols])));
# print STDERR "Template = ".Stringify(Tokens(@template))."\n => ".$template->show."\n";
# Now read & digest the body.
# Note that the body MUST end with a \cr, and that we've made Special Arrangments
# with \alignment@cr to recognize the end of the \halign
# and sneak a \@finish@alignment in!!!!!
# (otherwise none of the row/column/alignment constructors know when to end, as written)
my $spec = $whatsit->getArg(1);
alignmentBindings($template, undef,
attributes => {
width => orNull(GetKeyVal($spec, 'to')) });
$stomach->bgroup; # This will be closed by the \halign's closing }
$gullet->unread(T_CS('\@start@alignment'));
$whatsit->setBody($stomach->digestNextBody, undef); # extra undef as dummy "trailer"
if (my $s = GetKeyVal($spec, 'spread')) {
$whatsit->setWidth($whatsit->getBody->getWidth->add($s)); }
return; });
# Cleanup the pre & post tokens for halign columns in math mode.
# If a pair of $..$ enclose stuff that is "OK" in math mode, we don't need the $.
# Note that the 1st $ is switching OUT of math mode!
sub stripDupMath {
my (@tokens) = @_;
my @poss = grep { Equals($tokens[$_], T_MATH) } 0 .. $#tokens;
### pop(@poss) if scalar(@poss) % 2; # Get pairs!
shift(@poss) if scalar(@poss) % 2; # Get pairs!
while (@poss) {
my ($p2, $p1) = (pop(@poss), pop(@poss));
splice(@tokens, $p1, 2) if $p2 == $p1 + 1; }
return @tokens; }
# "Initialized" alignment; presets spacing, but since we're ignoring it anyway...
Let('\ialign', '\halign');
# Overlapping alignments ???
DefMacro('\oalign{}',
'\@@oalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@oalign{}',
'#1',
reversion => '\oalign{#1}', bounded => 1, mode => 'text',
beforeDigest => sub { alignmentBindings('l'); });
# This is actually different; the lines should lie ontop of each other.
# How should this be represented?
DefMacro('\ooalign{}',
'\@@ooalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@ooalign{}',
'#1',
reversion => '\ooalign{#1}', bounded => 1, mode => 'text',
beforeDigest => sub { alignmentBindings('l'); });
#----------------------------------------------------------------------
# These determine whether the _next_ paragraph gets indented!
# thus it needs \par to check whether such indentation has been set.
DefPrimitiveI('\indent', undef, sub { AssignValue(next_para_class => 'ltx_indent'); });
DefPrimitiveI('\noindent', undef, sub { AssignValue(next_para_class => 'ltx_noindent'); });
# <ltx:para> represents a Logical Paragraph, whereas <ltx:p> is a `physical paragraph'.
# A para can contain both p and displayed equations and such.
# Remember; \par _closes_, not opens, paragraphs!
# Here, we want to close both an open p and para (if either are open).
DefConstructorI('\par', undef, sub {
my ($document, %props) = @_;
if ($props{inPreamble}) { }
else {
$document->maybeCloseElement('ltx:p');
if (my $c = $props{class}) {
my $node = $document->getElement;
if ($node && $document->getNodeQName($node) eq 'ltx:para') { # Only set on the para about to close!
$document->setAttribute($node, class => $c); } }
$document->maybeCloseElement('ltx:para'); } },
afterDigest => sub {
if (LookupValue('inPreamble')) {
$_[1]->setProperty(inPreamble => 1); }
else {
if (my $c = LookupValue('next_para_class')) {
$_[1]->setProperty(class => $c);
AssignValue(next_para_class => undef); }
Digest(Tokens(T_CS('\LTX@vadjust@afterpar'),
T_CS('\LTX@clear@vadjust@afterpar'))); } },
properties => { alignmentSkippable => 1 },
alias => "\\par\n");
# OTOH, sometimes \par is just a minimalistic "start a new line"
# This should be closer for those cases.
DefConstructorI('\inner@par', undef, sub {
if ($_[0]->maybeCloseElement('ltx:p')) { }
elsif ($_[0]->canContain($_[0]->getNode, 'ltx:break')) {
$_[0]->insertElement('ltx:break'); } });
Tag('ltx:para', autoClose => 1, autoOpen => 1);
sub trimNodeWhitespace {
my ($document, $node) = @_;
trimNodeLeftWhitespace($document, $node);
trimNodeRightWhitespace($document, $node);
return; }
sub trimNodeLeftWhitespace {
my ($document, $node) = @_;
if (my (@children) = $node->childNodes) {
my $child = $children[0];
my $type = $child->nodeType;
if ($type == XML_TEXT_NODE) {
my $string = $child->data;
# if($string =~ s/^\s+//){
# with some trepidation, I don't think we want to trim nbsp!
if ($string =~ s/^ +//) {
$child->setData($string); } }
elsif ($type == XML_ELEMENT_NODE) {
trimNodeLeftWhitespace($document, $child); } }
return; }
sub trimNodeRightWhitespace {
my ($document, $node) = @_;
if (my (@children) = $node->childNodes) {
my $child = $children[-1];
my $type = $child->nodeType;
if ($type == XML_TEXT_NODE) {
my $string = $child->data;
if ($string =~ s/\s+$//) {
$child->setData($string); } }
elsif ($type == XML_ELEMENT_NODE) {
trimNodeRightWhitespace($document, $child); } }
return; }
Tag('ltx:p', autoClose => 1, autoOpen => 1, afterClose => \&trimNodeWhitespace);
# \dump ???
DefPrimitiveI('\end', undef, sub { $_[0]->getGullet->flush; return; });
#======================================================================
# Horizontal Mode primitives in Ch.25, pp.285--287
# The following cause tex to start a new paragraph -- they switch to horizontal mode.
# <horizontal command> = <letter> | <other> | \char | <chardef token>
# | \noboundary | \unhbox | \unhcopy | \valign | \vrule
# | \hskip | \hfil | \hfill | \hss | \hfilneg
# | \accent | \discretionary | \- | \<space> | $
# a candidate for use by \hskip, \hspace, etc... ?
sub DimensionToSpaces {
my ($dimen) = @_;
my $fs = LookupValue('font')->getSize; # 1 em
my $pt = $dimen->ptValue;
my $ems = $pt / $fs;
if ($ems < 0.01) { return; }
elsif ($ems < 0.17) { return Box("\x{2006}"); } # 6/em
elsif ($ems < 0.30) { return Box("\x{2005}"); } # 4/em
elsif ($ems < 0.40) { return Box("\x{2004}"); } # 3/em (same as nbsp?)
else {
my $n = int(($ems + 0.3) / 0.333); # 10pts per space...?
return Box((UTF(0xA0) x $n)); } }
DefPrimitiveI('\noboundary', undef, undef);
DefMacro('\hskip Glue', '\ifmmode\@math@hskip #1\relax\else\@text@hskip #1\relax\fi');
DefMacro('\mskip MuGlue', '\ifmmode\@math@mskip #1\relax\else\@text@mskip #1\relax\fi');
DefConstructor('\@math@hskip Glue',
"<ltx:XMHint width='#1'/>",
alias => '\hskip',
properties => sub { (width => $_[1], isSpace => 1); }
);
DefPrimitive('\@text@hskip Glue', sub {
my ($stomach, $length) = @_;
DimensionToSpaces($length); },
alias => '\hskip');
DefConstructor('\@math@mskip MuGlue',
"<ltx:XMHint width='#1'/>",
alias => '\mskip',
properties => sub { (width => $_[1], isSpace => 1); }
);
DefPrimitive('\@text@mskip MuGlue', sub {
my ($stomach, $length) = @_;
DimensionToSpaces($length); },
alias => '\mskip');
DefPrimitiveI('\hss', undef, undef);
DefConstructorI('\hfil', undef, "?#isMath(<ltx:XMHint name='hfil'/>)( )",
properties => { isSpace => 1, isFill => 1 });
DefConstructorI('\hfill', undef, "?#isMath(<ltx:XMHint name='hfill'/>)( )",
properties => { isSpace => 1, isFill => 1 });
DefPrimitiveI('\hfilneg', undef, undef);
# \lower <dimen> <box>
# \raise <dimen> <box>
# But <box> apparently must really explicitly be an \hbox, \vbox or \vtop (?)
# OR something that expands into one!!
DefConstructor('\lower Dimension MoveableBox',
"<ltx:text yoffset='#y' _noautoclose='1'>#2</ltx:text>",
afterDigest => sub {
$_[1]->setProperty(y => $_[1]->getArg(1)->multiply(-1)); });
DefConstructor('\raise Dimension MoveableBox',
"<ltx:text yoffset='#y' _noautoclose='1'>#2</ltx:text>",
afterDigest => sub {
$_[1]->setProperty(y => $_[1]->getArg(1)); });
# \unhbox<8bit>, \unhcopy<8bit>
DefPrimitive('\unhbox Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
AssignValue($box, undef);
(defined $stuff ? $stuff->unlist : ()); });
DefPrimitive('\unhcopy Number', sub {
my $box = 'box' . $_[1]->valueOf;
my $stuff = LookupValue($box);
(defined $stuff ? $stuff->unlist : ()); });
# \vrule
# \valign ???
DefMacro('\vspace{}', '\vskip#1\relax');
# \indent, \noindent, \par; see above.
DefMacro('\discretionary{}{}{}', '#3'); # No hyphenation here!
DefPrimitiveI('\-', undef, undef);
DefPrimitive('\setlanguage Number', undef);
#======================================================================
# Math mode stuff
# See TeXBook Ch.26
#======================================================================
# Decide whether we're going into or out of math, inline or display.
Tag('ltx:XMText', autoOpen => 1, autoClose => 1);
DefPrimitiveI(T_MATH, undef, sub {
my ($stomach) = @_;
my $gullet = $stomach->getGullet;
my $mode = LookupValue('MODE');
my $op = '\@@BEGININLINEMATH';
if ($mode eq 'display_math') {
if ($gullet->ifNext(T_MATH)) {
$gullet->readToken;
$op = '\@@ENDDISPLAYMATH'; }
else {
# Avoid a Fatal, but we're likely in trouble.
# Should we switch to text mode? (LaTeX normally wouldn't)
# Did we miss something and would should have already been in text mode? Possibly...
Error('expected', '$', $stomach,
"Missing \$ closing display math.",
"Ignoring; expect to be in wrong math/text mode.");
$op = undef; } }
elsif ($mode eq 'inline_math') {
$op = '\@@ENDINLINEMATH'; }
# elsif(!LookupValue('Alignment') && $gullet->ifNext(T_MATH)){
elsif ($gullet->ifNext(T_MATH)) {
$gullet->readToken;
$op = '\@@BEGINDISPLAYMATH'; }
$stomach->invokeToken(T_CS($op)) if $op; });
# Let this be the default, conventional $
Let(T_CS('\@dollar@in@normalmode'), T_MATH);
# Effectively these are the math hooks, redefine these to do what you want with math?
DefConstructorI('\@@BEGINDISPLAYMATH', undef,
"<ltx:equation>"
. "<ltx:Math mode='display'>"
. "<ltx:XMath>"
. "#body"
. "</ltx:XMath>"
. "</ltx:Math>"
. "</ltx:equation>",
alias => '$$',
beforeDigest => sub { $_[0]->beginMode('display_math'); },
captureBody => 1);
DefConstructorI('\@@ENDDISPLAYMATH', undef, "", alias => '$$',
beforeDigest => sub { $_[0]->endMode('display_math'); });
DefConstructorI('\@@BEGININLINEMATH', undef,
"<ltx:Math mode='inline'>"
. "<ltx:XMath>"
. "#body"
. "</ltx:XMath>"
. "</ltx:Math>",
alias => '$', beforeDigest => sub { $_[0]->beginMode('inline_math'); }, captureBody => 1);
DefConstructorI('\@@ENDINLINEMATH', undef, "", alias => '$',
beforeDigest => sub { $_[0]->endMode('inline_math'); });
# Add the TeX code from the object that created this node,
# unless it has already been recorded on another node.
sub add_TeX {
my ($document, $node, $thing) = @_;
if ($thing && (ref $thing eq 'LaTeXML::Core::Whatsit') && !$thing->getProperty('_added_tex')) {
local $LaTeXML::DUAL_BRANCH = 'presentation';
my $tex = UnTeX($thing);
$LaTeXML::DUAL_BRANCH = 'content';
my $ctex = UnTeX($thing);
$document->setAttribute($node, tex => $tex);
$document->setAttribute($node, 'content-tex' => $ctex) if $ctex ne $tex;
$thing->setProperty('_added_tex', 1); }
return; }
# Same as add_TeX, but add the code from the body of the object.
sub add_body_TeX {
my ($document, $node, $thing) = @_;
if ($thing && !$thing->getProperty('_added_body_tex')) {
if (defined(my $body = $thing->getProperty('body'))) {
local $LaTeXML::DUAL_BRANCH = 'presentation';
my $tex = UnTeX($body);
$LaTeXML::DUAL_BRANCH = 'content';
my $ctex = UnTeX($body);
$document->setAttribute($node, tex => $tex);
$document->setAttribute($node, 'content-tex' => $ctex) if $ctex ne $tex; }
$thing->setProperty('_added_body_tex', 1); }
return; }
Tag('ltx:Math', afterClose => \&add_body_TeX);
Tag('ltx:Math', afterClose => \&cleanup_Math);
# Cleanup ltx:Math elements; particularly if they aren't "really" math.
# But record the oddity with class=ltx_markedasmath
sub cleanup_Math {
my ($document, $mathnode) = @_;
# If the Math ONLY contains XMath/XMText, it apparently isn't math at all!?!
if (!$document->findnodes('ltx:XMath/ltx:*[local-name() != "XMText"]', $mathnode)) {
# So unwrap down to the contents of the XMText's.
my @xmtexts = map { $_->childNodes } map { $_->childNodes } $mathnode->childNodes;
my @texts = ();
foreach my $text (@xmtexts) {
if ($text->nodeType != XML_ELEMENT_NODE) { # Make sure we've got an element
$text = $document->wrapNodes('ltx:text', $text); }
$document->addClass($text, 'ltx_markedasmath'); # Now record that it originally was marked as math
push(@texts, $text); }
$document->replaceNode($mathnode, @texts); } # and replace the whole Math with the pieces
else { # Cleanup any remaining XMTexts
cleanup_XMText_outer($document, $mathnode); }
return; }
# Here's for an inverse case: when an XMText isn't "really" just text
# if it only contains an Math ORR, a tabular with only Math in the cells?
# First case: pull it back into the math, but in an XMWrap to isolate it for parsing.
# Should we just pull any mixed text math up or only a single Math?
# For the tabular case, convert it to an XMArray.
# Note that normally, we'd do afterClose on ltx:XMText,
# but since the ltx:XMText closes before the outer ltx:Math,
# we would keep cleanup_Math from recognizing the trivial case of
# a single ltx:tabular in an equation (perverse, but people do that).
# So, we put this one on ltx:Math also, and scan for any contained XMText to fixup.
sub cleanup_XMText_outer {
my ($document, $mathnode) = @_;
foreach my $textnode ($document->findnodes('descendant::ltx:XMText', $mathnode)) {
cleanup_XMText($document, $textnode); }
return; }
sub cleanup_XMText {
my ($document, $textnode) = @_;
# We're really only interested in reducing nested math, right?
# But actually also collapsing ltx:XMText/ltx:text
# Apply "outer" simplifications: remove ltx:text or ltx:p wrappings.
my $model = $document->getModel;
# A single "simple" element, with a single child
my %simple_element = ('ltx:text' => 1, 'ltx:p' => 1, 'ltx:inline-block' => 1);
my @preserved = (qw(yoffset xoffset));
my @children;
while ((@children = $textnode->childNodes) && (scalar(@children) == 1)
&& $document->findnodes('ltx:text'
. ' | ltx:inline-block[count(*)=1]'
. ' | ltx:p',
$textnode)) {
my $child = $children[0];
$document->setNodeFont($textnode, $document->getNodeFont($child));
foreach my $attr ($child->attributes) { # Copy the child's attributes (should Merge!!)
$textnode->setAttribute($attr->nodeName => $attr->getValue); }
$document->unwrapNodes($child); }
# Now apply a simplifying rule for nested Math
# If the XMText contains a single Math, pull it's content up in
if ((scalar(@children) == 1) && $document->findnodes('ltx:Math', $textnode)) {
# Replace XMText by XMWrap/* (this should preserve the parse?)
$textnode = $document->renameNode($textnode, 'ltx:XMWrap');
$document->replaceNode($children[0], map { $_->childNodes } $children[0]->childNodes); }
# # # RISKY!!!! If SOME nodes are math...
# # # pull the whole sequence up, unwrap the math and putting the rest back in XMText.
# # # Even with the XMWrap, this seems to wreak havoc on parsing and structure?
# # if($document->findnodes('ltx:Math',$textnode)){
# # # Replace XMText by XMWrap/* (this should preserve the parse?)
# # $textnode=$document->renameNode($textnode,'ltx:XMWrap');
# # foreach my $child (@children){
# # if($model->getNodeQName($child) eq 'ltx:Math'){
# # $document->replaceNode($child,map($_->childNodes,$child->childNodes)); }
# # else {
# # $document->wrapNodes('ltx:XMText',$child); }}}
# If a single tabular that ONLY(?) contains Math, turn into an XMArray
# Well, a tabular REALLY shouldn't be in math;
# How much math should determine the switch?
# [will alignment attributes be lost?]
elsif ((scalar(@children) == 1) && ($model->getNodeQName($children[0]) eq 'ltx:tabular')
## Should we ALWAYS do this, or just for some minimal amount of math???
## && !$document->findnodes('ltx:tabular/ltx:tr/ltx:td/text()'
## .' | ltx:tabular/ltx:tbody/ltx:tr/ltx:td/text()'
## .' | ltx:tabular/ltx:tr/ltx:td[not(ltx:Math)]'
## .' | ltx:tabular/ltx:tbody/ltx:tr/ltx:td[not(ltx:Math)]',
## $textnode)
) {
# First step is remove any ltx:tbody from the tabular!
foreach my $tb ($document->findnodes('ltx:tabular/ltx:tbody', $textnode)) {
$document->unwrapNodes($tb); }
# Now, we can start replacing tabular=>XMArray, tr=>XMRow, td=>XMCell
my $table = $document->renameNode($children[0], 'ltx:XMArray');
foreach my $row ($table->childNodes) {
$row = $document->renameNode($row, 'ltx:XMRow');
foreach my $cell ($row->childNodes) {
$cell = $document->renameNode($cell, 'ltx:XMCell');
foreach my $m ($cell->childNodes) {
if ($model->getNodeQName($m) eq 'ltx:Math') { # Math cell, unwrap the Math/XMath layer
$document->replaceNode($m, map { $_->childNodes } $m->childNodes); }
else { # Otherwise, wrap whatever it is in an XMText
$document->wrapNodes('ltx:XMText', $m); }
} } }
# And now we don't need the XMText any more.
$document->unwrapNodes($textnode); }
return; }
#**********************************************************************
# Support for MathFork.
#**********************************************************************
# [Note: this block of code seems like it belongs somewhere else]
# A MathFork supports document-level alignment of math,
# by collecting equations into an equationgroup. Each equation can contain
# one or more MathFork structures which separate the semantically meaningful
# equation (if possible) from the collection of rows and/or column fragments
# for alignment. The goal is to be able to present the aligned structure
# composed of various mathematical fragments in a grid, and yet still represent
# the (presumably) meaningful complete formula.
#
# The structure looks like
# <MathFork>
# <Math><XMath>...</XMath></Math>
# <MathBranch>..</MathBranch>
# </MathFork>
# The initial, "main", Math will contain a complete formula (hopefully).
# The MathBranch will typically contain one or more <tr>, each of which
# contains one or more <td>, each of which contains a <Math> representing
# a cell of the aligned structure.
#======================================================================
# openMathFork($document,$equation) will add a MathFork structure
# to the given $equation, and return ($main, $branch)
# where $main is the initial <ltx:Math> and $branch is the <ltx:MathBranch>.
# You'll probably want to be adding Stuff to one or both of $main & $branch.
# Most typically, you'll be finding math fragments that you've found in the
# current content of $equation and adding them into both $main & $branch
# using addColumnToMathFork.
sub openMathFork {
my ($document, $equation) = @_;
my $fork = $document->openElementAt($equation, 'ltx:MathFork');
my $main = $document->openElementAt($fork, 'ltx:Math', _box => MathWhatsit()); # Start EMPTY!
my $xmath = $document->openElementAt($main, 'ltx:XMath');
my $branch = $document->openElementAt($fork, 'ltx:MathBranch');
return ($main, $branch); }
# Close the appropriate elements of an ltx:MathFork created with openMathFork.
sub closeMathFork {
my ($document, $equation, $main, $branch) = @_;
# Now, close them all.
$document->closeElementAt($branch);
$document->closeElementAt($document->getFirstChildElement($main));
$document->closeElementAt($main);
# $document->closeElementAt($main->parentNode); }
# More defensive? Sometimes we end up with a DocumentFragment as parent of $main????!?!?!?!
my @mfs = $document->findnodes('ltx:MathFork', $equation);
$document->closeElementAt($mfs[-1]);
return; }
# Create an inline math Whatsit from a list of math Boxes, Lists or Whatsits.
# Note that we unwrap @hidden@bgroup's (!) and normalize \displaystyle (!)
# This is primarily useful for synthesizing the Box for a newly created ltx:Math
# that is synthesized from other math content within a ltx:MathFork.
sub MathWhatsit {
my (@items) = @_;
my $hbgd = LookupDefinition(T_CS('\@hidden@bgroup'));
@items = map { ((ref $_ eq 'LaTeXML::Core::Whatsit') && ($_->getDefinition eq $hbgd)
? $_->getBody->unlist : ($_)) }
map { $_->unlist } grep { $_ } @items;
my $locator = undef;
foreach my $i (@items) {
last if $locator;
$locator = $i->getLocator; }
my @styles = grep { UnTeX($_) eq '\displaystyle' } @items;
if (@styles) {
@items = ($styles[0], grep { UnTeX($_) ne '\displaystyle' } @items); }
return LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS('\@@BEGININLINEMATH')), [],
body => List(@items, mode => 'math'),
trailer => T_CS('\@@ENDINLINEMATH'),
# locator=>$gullet->getLocator, font=>$script->getFont,level=>$level
locator => $locator, isMath => 1); }
#======================================================================
# Add a new table column (ltx:td) into the ltx:MathBranch of a ltx:MathFork.
# The insertion point will be at $inbranch, presumably an ltx:tr
# [created in the ltx:MathBranch using: $document->openElementAt($branch,'ltx:tr'); ]
# The content of $cell (an ltx:_Capture_) is typically a single ltx:Math.
# (but occasionally mixed math & ltx:text; some cases may need more semantic analysis?)
# The content of $cell will be MOVED into the new column (ltx:td), (w/ ID's intact)
# and CLONED (w/modified ID's) onto the end of the first child of the main branch, $main,
# of the ltx:MathFork [A Math Whatsit is also synthesized for the main branch, for TeX, etc!).
# Thus, the collection of rows/columns fragments represents the alignment,
# while the main branch synthesizes the (presumed) semantic whole.
# The now-empty $cell is then removed from its parent & the document.
sub addColumnToMathFork {
my ($document, $main, $inbranch, $cell) = @_;
my $td = $document->openElementAt($inbranch, 'ltx:td');
if (my $align = $cell->getAttribute('align')) {
$document->setAttribute($td, align => $align); }
if (my $colspan = $cell->getAttribute('colspan')) {
$document->setAttribute($td, colspan => $colspan); }
# Remove the _Capture_ from the document; parts will get cloned &/or reinserted
$cell->unbindNode;
# Usually, we will have captured a single ltx:Math node, but occasionally text?
foreach my $node ($cell->childNodes) {
# Add a Clone of the cell's contents to the main branch (This will get modified id's)
local $LaTeXML::Core::Document::ID_SUFFIX = '.mf';
# Usually, an ltx:Math element will be the complete content of the _Capture_ (cell)
my $type = $document->getNodeQName($node);
my $box;
if ($type eq 'ltx:Math') {
if (my $xmath = $document->getFirstChildElement($node)) {
# But we CLONE the contents of it's ltx:XMath onto the end of
# the main ltx:Math/ltx:XMath (under $main), modifying id's along the way.
$document->appendClone($document->getFirstChildElement($main),
$document->getChildElements($xmath));
# Add the boxes from this cell to the previously collected ones in the main branch.
$box = $document->getNodeBox($node)->getBody; } }
# The next two cases are unusual (slightly unexpected?),
# typically coming from abused eqnarrays? May need more analysis preceding the MathFork'ing!
elsif ($type eq 'ltx:text') {
next if $node->textContent eq '';
my $txt = $document->openElementAt($document->getFirstChildElement($main), 'ltx:XMText');
$document->appendClone($txt, $node);
$document->closeElementAt($txt);
$box = $document->getNodeBox($node); }
elsif ($type eq '#PCDATA') {
my $string = $node->textContent;
next if $string eq '';
my $txt = $document->openElementAt($document->getFirstChildElement($main), 'ltx:XMText');
$txt->appendText($string);
$document->closeElementAt($txt);
$box = Box($string); }
else {
Warn('unexpected', $type, $cell,
"Don't know how to synthesize equation with $type in column"); }
# Add the boxes from this cell to the previously collected ones in the main branch.
if ($box) {
my $composed = MathWhatsit($document->getNodeBox($main)->getBody, $box);
$document->setNodeBox($main, $composed);
$document->setNodeBox($main->firstChild, $composed); } # And also to the XMath element!
# Finally MOVE (really copy) the node from the _Capture_ ($cell) to the td (in the fork)
# this keeps the same IDs as original; (& appendTree may remove id's from $node!)
$document->unRecordNodeIDs($node);
$document->appendTree($td, $node); }
# We can now remove the _Capture_ (and anything still in it?)
# $cell->unbindNode;
$document->closeElementAt($td);
return; }
#======================================================================
# Higher level support for equationgroups
# equationgroups hold a collection of equations
# each of which will likely have MathFork within that separates
# the complete semantic expression from a collection of rows & column cells.
# The latter are used to present an aligned set of equations;
# the former hopefully will be useful for the math....?
#
# Typically, there will be some sort of alignment macros, using &
# that will be set up to INITIALLY build an arrangement like:
# <equationgroup>
# <equation><_Capture>cellmath</_Capture>...</equation>
# that is, an <equation> for each row.
# Afterwards, we can analyze the cells and determine how the cells and/or rows
# will be divided up into "real" equations, and insert some MathFork's to reflect.
# For example, the <equationgroup> represents a whole eqnarray,
# and (initially, at least) the rows are represented as <equation>'s.
# Some analysis hopefully allows us to recognize
# Given an ltx:equationgroup containing several ltx:equations (representing rows),
# equationgroupJoinRows combines one or more of those rows into a
# semantically meaningful equation and sets up the appropriate MathForks within.
# This is typically useful for eqnarray, after you have analyzed
# which subsequences of ltx:equations actually correspond to single semantic equations.
sub equationgroupJoinRows {
my ($document, $equationgroup, @equations) = @_;
# Make a new equation, with a single MathFork container
my $equation = $document->openElementAt($equationgroup, 'ltx:equation');
$equationgroup->insertBefore($equation, $equations[0]); # Move to correct position.
# move labels, id, refnum to new equation
my ($labels, $id, $refnum, $frefnum, $rrefnum, $idctr);
foreach my $eq (@equations) {
if (my $l = $eq->getAttribute('labels')) {
$labels = ($labels ? "$labels $l" : $l); }
$id = $eq->getAttribute('xml:id') if $eq->hasAttribute('xml:id');
$eq->removeAttribute('xml:id') if $id;
$refnum = $eq->getAttribute('refnum') if $eq->hasAttribute('refnum');
$frefnum = $eq->getAttribute('frefnum') if $eq->hasAttribute('frefnum');
$rrefnum = $eq->getAttribute('rrefnum') if $eq->hasAttribute('rrefnum');
$idctr = $eq->getAttribute('_ID_counter_m_') if $eq->hasAttribute('_ID_counter_m_'); }
$document->unRecordID($id) if $id;
$document->setAttribute($equation, labels => $labels) if $labels;
$document->setAttribute($equation, 'xml:id' => $id) if $id;
$document->setAttribute($equation, refnum => $refnum) if $refnum;
$document->setAttribute($equation, frefnum => $frefnum) if $frefnum;
$document->setAttribute($equation, rrefnum => $rrefnum) if $rrefnum;
$document->setAttribute($equation, '_ID_counter_m_' => $idctr) if $idctr;
# Scan equations to see which ones likely are continuations of previous
my ($main, $branch) = openMathFork($document, $equation);
foreach my $eq (@equations) {
# remove equation; parts will be added in by adding to mathfork (hopefully taking care of ids)
$eq->unbindNode;
my $tr = $document->openElementAt($branch, 'ltx:tr');
my @cells = $document->findnodes('ltx:_Capture_', $eq);
$document->setAttribute($tr, class => 'ltx_eqn_lefteqn')
if ($cells[0]->getAttribute('class') || '') =~ /\blefteqn\b/;
foreach my $cell (@cells) {
addColumnToMathFork($document, $main, $tr, $cell); }
$document->closeElementAt($tr); }
closeMathFork($document, $equation, $main, $branch);
$document->closeElementAt($equation);
return; }
# Given an equation generated in an equationgroup,
# collect each $ncols columns into a MathFork structure,
# with the formatted portion being the columns.
# This is typically useful for AMS's align structures,
# which contain several columns, each pair of which represent a semantic equation.
sub equationgroupJoinCols {
my ($document, $ncols, $equation) = @_;
my ($col, $main, $branch) = (0, undef, undef);
foreach my $cell ($document->findnodes('ltx:_Capture_', $equation)) {
next unless $document->getNodeQName($cell) =~ /(.*?:)?_Capture_$/;
if (($col++ % $ncols) == 0) { # Create new MathFork every $ncols cells.
closeMathFork($document, $equation, $main, $branch) if $main;
($main, $branch) = openMathFork($document, $equation); }
addColumnToMathFork($document, $main, $branch, $cell); }
closeMathFork($document, $equation, $main, $branch) if $main;
return; }
#**********************************************************************
Let('\vcenter', '\vbox');
# \eqno & \leqno are really bizzare.
# They should seemingly digest until $ (or while still in math mode),
# and use that stuff as the reference number.
# However, since people abuse this, and we're really not quite TeX,
# we really can't do it Right.
# Even a \begin{array} ends up expanding into a $ !!!
DefMacroI('\eqno', undef, sub {
my ($gullet) = @_;
my @stuff = ();
# This is risky!!!
while (my $t = $gullet->readXToken(0)) {
if (!defined $t) {
Fatal('unexpected', '\eqno', $gullet, "Fell of the end reading tag for \\eqno!"); }
# What do I need to explicitly list here!?!?!? UGGH!
elsif (Equals($t, T_MATH) || Equals($t, T_CS('\]'))
|| Equals($t, T_CS('\begingroup')) # Totally wrong, but to catch expanded environments
|| (ToString($t) =~ /^\\(?:begin|end)\{/) # any sort of environ begin or end???
# This seems needed within AmSTeX environs
|| Equals($t, T_CS('\@@close@inner@column'))
) {
# || Equals($t,T_CS('\end{equation}'))|| Equals($t,T_CS('\end{equation*}'))){
return (Invocation(T_CS('\@@eqno'), Tokens(@stuff)), $t); }
else {
push(@stuff, $t); } } });
Let('\leqno', '\eqno');
# Revert to nothing, since it really doesn't belong in the TeX string(?)
DefConstructor('\@@eqno{}', "^ refnum='#eqnum'",
reversion => '',
afterDigest => sub { my $num = ToString($_[1]->getArg(1));
# special purpose cleanup!?!?!
$num =~ s/^\\hbox\{(.*)\}$/$1/;
$num =~ s/^\{(.*)\}$/$1/;
$num =~ s/^\((.*)\)$/$1/;
$_[1]->setProperties(eqnum => $num); });
#======================================================================
# Scripts are a bit of a strange beast, with respect to when the arguments
# are processed, and what kind of object should be created.
#
# While scripts look like they take a normal TeX argument, they really
# take the next BOX (AFTER expansion & digestion)! Thus, while
# a^\frac{b}{c} and a^\mathcal{B}
# DO work in TeX, other things like
# a^\sqrt{3} or a^\acute{b}
# DO NOT! (Hint: consider the expansions)
# Note that with
# \def\xyz{xyz}
# a^\xyz => a^{x}yz
# So, we try to mimic, but note that our boxes don't correspond 100% to TeX's
#
# Normally, sub/super scripts should be turned into a sort of postfix operator:
# The parser will attach the script to the appropriate preceding object.
# However, there are a few special cases involving empty boxes {}.
# If the argument is an empty box $x^{}$, the whole script should just disappear.
# If the PRECEDING box is {} (in ${}^{p}$, a sort of `floating' script should be created.
# This may combine, in the parser, with the following object to generate
# a prescript.
# Note that this is also being used by alignment.
sub IsEmpty {
my ($box) = @_;
my $ref = ref $box;
if (!$box) { return 1; }
elsif ($ref eq 'LaTeXML::Core::Comment') { return 1; }
elsif ($ref eq 'LaTeXML::Core::Box') {
my $s = $box->getString;
return (!defined $s) || ($s =~ /^\s*$/); }
elsif ($ref eq 'LaTeXML::Core::List') {
return !grep { !IsEmpty($_) } $box->unlist; }
elsif ($ref eq 'LaTeXML::Core::Whatsit') {
return 1 if $box->getProperty('isSpace'); # A space-like Whatsit
if (($box->getDefinition eq $STATE->lookupDefinition(T_BEGIN))
&& !grep { !IsEmpty($_) } $box->getBody->unlist) {
return 1; } }
return 0; }
# odd...
# # If we don't make \relax disappear, we'll sometimes need to test for it.
# # Or, alternatively, introduce a notion of isBox such that \relax is NOT one!
# sub isRelax {
# my($box)=@_;
# my $ref = ref $box;
# $box && ($ref eq 'LaTeXML::Core::Box')
# && Equals(T_CS('\relax'),$box->revert); }
# my $is=
# (!$box
# || ($ref eq 'LaTeXML::Core::Comment')
# || $box->getProperty('isSpace') # A space-like Whatsit
# || (($ref =~ /^LaTeXML::(Math)?Box$/) && ($box->getString =~ /^\s*$/)) # Space plain box
# || (($ref eq 'LaTeXML::Core::Whatsit') # Whatsit with nothing in it.
# && ($box->getDefinition eq $STATE->lookupDefinition(T_BEGIN))
# && !grep(!IsEmpty($_), $box->getBody->unlist)));
# ##print STDERR "Box ".Stringify($box)." is empty: ".($is ? "yes":"no")."\n";
# $is;
# }
# Remember a "safe" way to test a script Whatsit.
# Returns [ (FLOATING|POST) , (SUBSCRIPT|SUPERSCRIPT) ] or nothing
sub IsScript {
my ($object) = @_;
if ((ref $object eq 'LaTeXML::Core::Whatsit') # careful w/alias in getCSName!
&& ($object->getDefinition->getCS->getCSName =~ /^\\@@(FLOATING|POST)(SUBSCRIPT|SUPERSCRIPT)$/)) {
return [$1, $2]; }
return; }
sub scriptHandler {
my ($stomach, $op) = @_;
my $gullet = $stomach->getGullet;
$gullet->skipSpaces;
my $font = LookupValue('font');
my $style = $font->getMathstyle;
my @putback = ();
my $nscripts = 0;
if (defined $style) {
my $cs = '\@@FLOATING' . $op;
my ($prevscript, $prevspace, $base);
# Check preceding boxes to determine possible attachment (floating vs post),
# and whether there are conflicting preceding scripts, which is an error
# Note that this analysis has to be done now (or sometime like it) before grouping lists go away;
# Parsing is too late!
while (my $prev = pop(@LaTeXML::LIST)) {
if ((ref $prev eq 'LaTeXML::Core::Whatsit') && $prev->getProperty('isSpace')) {
$prevspace = 1; # a space avoids double-scripts
unshift(@putback, $prev); # put back? assuming it will add rpadding to previous???
next; }
elsif (IsEmpty($prev)) { # If empty, the script floats, can't conflict, but don't put back
last; }
elsif (my $prevop = IsScript($prev)) {
unshift(@putback, $prev);
if ($$prevop[1] eq $op) { # Whoops, duplicated; better use FLOATING
Error('unexpected', "double-" . lc($$prevop[1]), $stomach, "Double " . lc($$prevop[1]))
unless $prevspace;
$cs = '\@@FLOATING' . $op;
last; }
else { # Else, is OK (so far) assume POST (it will stack previous script)
$prevscript = $prev; # we'll overlap the width of the previous.
$cs = '\@@POST' . $op; }
last if ++$nscripts > 1; }
else {
# We found something "normal", so assume we'll attach to it, and we're done.
$base = $prev;
unshift(@putback, $prev);
$cs = '\@@POST' . $op;
last; } }
push(@LaTeXML::LIST, @putback);
MergeFont(scripted => 1);
# Now, get following boxes (may have to process several tokens!)
my @stuff = ();
while (my $tok = $gullet->readXToken(0)) {
@stuff = $stomach->invokeToken($tok);
last if @stuff; }
Fatal('expected', '{', $stomach, "Missing { in sub/super-script argument", $gullet->showUnexpected)
unless @stuff;
my $script = shift(@stuff); # ONLY the first box is the script!
my $level = $stomach->getBoxingLevel;
unshift(@stuff,
LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS($cs)), [$script],
locator => $gullet->getLocator,
font => $script->getFont, isMath => 1,
level => $level,
base => $base, # for sizing/positioning
prevscript => $prevscript))
unless IsEmpty($script);
AssignValue(font => $font); # revert
return @stuff; }
else { # Non math use of _ ??
my $c = (($op eq 'SUPERSCRIPT') ? '^' : '_');
Error('unexpected', $c, $stomach, "Script $c can only appear in math mode");
return Box($c, undef, undef, (($op eq 'SUPERSCRIPT') ? T_SUPER : T_SUB));
} }
DefPrimitiveI(T_SUPER, undef, sub { scriptHandler($_[0], 'SUPERSCRIPT'); });
DefPrimitiveI(T_SUB, undef, sub { scriptHandler($_[0], 'SUBSCRIPT'); });
# The `argument' to a sub or superscript will typically be processed as a box,
# and either has braces, or is something that results in a single box.
# When we revert these, we DON'T want to wrap extra braces around, because they'll accumulate;
# at the least they're ugly; in some applications they affect "round trip" processing.
# OTOH, direct use of \@@POSTSUPERSCRIPT, etal, MAY need to have extra braces around them.
# So, when reverting, we're going to a bit of extra trouble to make sure we have ONE set
# of braces, but no extras!! [Worry about lists of lists...]
sub revertScript {
my ($script) = @_;
my @tokens = $script->revert;
my @t = @tokens;
my $l;
if (Equals($t[0], T_BEGIN)) {
$l++; shift(@t); }
while (@t && $l) {
my $t = shift(@t);
if (Equals($t, T_BEGIN)) { $l++; }
elsif (Equals($t, T_END)) { $l--; } }
return (@tokens && !@t ? @tokens : (T_BEGIN, @tokens, T_END)); }
# Compute the 'advance' of this script.
# can we do this before parsing? we can do the advance or something.... Hmmmm.
# * Need to know scriptpos (mid or post) to determine position.
# * need to know sub/super
sub scriptSizer {
my ($script, $base, $prev, $op, $pos) = @_;
# NOTE: Currently, the mathstyle is NOT reflected in the font of the script!!!!
# Or is it now ?????
# [unless it's different from the 'expected' style!!!]
my ($ws, $hs, $ds) = map { $_->valueOf } $script->getSize;
$ws *= 0.8; $hs *= 0.8; $ds *= 0.8; # HACK!@!!
my ($wb, $hb, $db) = map { $_->valueOf } ($base ? $base->getSize
: LookupValue('font')->getNominalSize);
my ($w, $h, $d) = (0, 0, 0);
# Fishing for the scriptpos on the base (if any)
my $attr;
$pos = $base->getProperty('scriptpos') if !defined $pos && defined $base;
$pos = 'post' if !defined $pos;
if ($pos eq 'mid') {
$w = max(0, $ws - $wb); # as if max width of base & script
if ($op eq 'SUPERSCRIPT') {
$h = $hb + $ds + $hs; }
else {
$d = $db + $hs + $ds; } }
else {
my $wp = ($prev && $prev->getWidth) || 0; # as if max of width & prev script's width
$w = max(0, $ws - $wp);
if ($op eq 'SUPERSCRIPT') {
$h = $hb + $hs / 2; }
else {
$d = $hs / 2 + $ds; } }
$w = Dimension($w); $h = Dimension($h); $d = Dimension($d);
# print STDERR " ==> ".ToString($w).' x '.ToString($h).' + '.ToString($d)."\n";
return ($w, $h, $d); }
# NOTE: The When reverting these, the
DefConstructor('\@@POSTSUPERSCRIPT InScriptStyle',
"<ltx:XMApp role='POSTSUPERSCRIPT' scriptpos='#level'>"
. "<ltx:XMArg rule='Superscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_SUPER, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'),
$_[0]->getProperty('prevscript'), 'SUPERSCRIPT', 'post'); });
DefConstructor('\@@POSTSUBSCRIPT InScriptStyle',
"<ltx:XMApp role='POSTSUBSCRIPT' scriptpos='#level'>"
. "<ltx:XMArg rule='Subscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_SUB, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'),
$_[0]->getProperty('prevscript'),
'SUBSCRIPT', 'post'); });
DefConstructor('\@@FLOATINGSUPERSCRIPT InScriptStyle',
"<ltx:XMApp role='FLOATSUPERSCRIPT' scriptpos='#level'>"
. "<ltx:XMArg rule='Superscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_BEGIN, T_END, T_SUPER, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUPERSCRIPT', 'post'); });
DefConstructor('\@@FLOATINGSUBSCRIPT InScriptStyle',
"<ltx:XMApp role='FLOATSUBSCRIPT' scriptpos='#level'>"
. "<ltx:XMArg rule='Subscript'>#1</ltx:XMArg>"
. "</ltx:XMApp>",
reversion => sub { (T_BEGIN, T_END, T_SUB, revertScript($_[1])); },
sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUBSCRIPT', 'post'); });
## NOTE: Strictly speaking, these two should examine the scriptpos
# attribute of the base (it may have \limits,\nolimits)
## OR, alternatively, there could be separate \@OVERSCRIPT/\@UNDERSCRIPT macros ?
DefConstructor('\@SUPERSCRIPT{} InScriptStyle',
"?#2(<ltx:XMApp>"
. "<ltx:XMTok role='SUPERSCRIPTOP' scriptpos='#scriptpos'/>"
. "<ltx:XMArg>#1</ltx:XMArg>"
. "<ltx:XMArg rule='Superscript'>#2</ltx:XMArg>"
. "</ltx:XMApp>)"
. "(#1)",
reversion => sub {
my ($whatsit, $base, $sup) = @_;
($sup && $sup->unlist
? (T_BEGIN, Revert($base), T_END, T_SUPER, revertScript($sup))
: Revert($base)); },
properties => sub {
(scriptpos => "post" . $_[0]->getBoxingLevel); },
sizer => sub { scriptSizer($_[0]->getArg(2), $_[0]->getArg(1), undef, 'SUPERSCRIPT', 'post'); });
DefConstructor('\@SUBSCRIPT{} InScriptStyle',
"?#2(<ltx:XMApp>"
. "<ltx:XMTok role='SUBSCRIPTOP' scriptpos='#scriptpos'/>"
. "<ltx:XMArg>#1</ltx:XMArg>"
. "<ltx:XMArg rule='Subscript'>#2</ltx:XMArg>"
. "</ltx:XMApp>)"
. "(#1)",
reversion => sub {
my ($whatsit, $base, $sub) = @_;
($sub && $sub->unlist
? (T_BEGIN, Revert($base), T_END, T_SUB, revertScript($sub))
: Revert($base)); },
properties => sub {
(scriptpos => "post" . $_[0]->getBoxingLevel); },
sizer => sub { scriptSizer($_[0]->getArg(2), $_[0]->getArg(1), undef, 'SUBSCRIPT', 'post'); });
DefMacroI('\'', undef, sub {
my ($gullet) = @_;
my @sup = (T_CS('\prime'));
# Collect up all ', convering to \prime
while ($gullet->ifNext(T_OTHER('\''))) {
$gullet->readToken;
push(@sup, T_CS('\prime')); }
# Combine with any following superscript!
# However, this is semantically screwed up!
# We really need to set up separate superscripts, but at same level!
if ($gullet->ifNext(T_SUPER)) {
$gullet->readToken;
push(@sup, $gullet->readArg->unlist); }
(T_SUPER, T_BEGIN, @sup, T_END); },
mathactive => 1); # Only in math!
#======================================================================
# \choose & friends, also need VERY special argument handling
# After digesting the \choose (or whatever), grab the previous and following material
# and store as args in the whatsit.
# Increment the mathstyle stored in any boxes & whatsits.
# The tricky part is to know when NOT to increment!
# \displaystyle, constructors that set their own specific style,...
# And, any collateral adjustments that had been done in digestion depending on mathstyle
# WONT be adjusted!
# We don't have a clear API to find the displayable Boxes within;
# and we don't have a good handle on grouping...
# ARGH!!!!!!!!! RETHINK!!!!!!
sub adjustMathstyle {
my ($outerstyle, $adjusted, @boxes) = @_;
foreach my $box (@boxes) {
next unless defined $box;
next if $$adjusted{$box}; # since we do args AND props, be careful not to adjust twice!
$$adjusted{$box} = 1;
my $r = ref $box;
next unless $r and $r->isaBox;
return if $box->getProperty('explicit_mathstyle');
next if $box->getProperty('own_mathstyle');
if ($r eq 'LaTeXML::Core::Box') {
adjustMathStyle_internal($outerstyle, $box); }
elsif ($r eq 'LaTeXML::Core::List') {
adjustMathstyle($outerstyle, $adjusted, $box->unlist); }
elsif ($r eq 'LaTeXML::Core::Whatsit') {
my $style = adjustMathStyle_internal($outerstyle, $box) || $outerstyle;
# now recurse on contained boxes (args AND properties!)
adjustMathstyle($style, $adjusted, $box->getArgs);
adjustMathstyle($style, $adjusted, values %{ $box->getPropertiesRef }); } }
return; }
# Heursitic;
# we're wanting to adjust the style AS IF the numerator had been already in the next mathstyle
# This isn't the same as just shifting the mathstyle!
# we're sorta trying to infer WHY the box has a given style...?
our %mathstyle_adjust_map = (
display => { display => 'text', text => 'script', script => 'script', scriptscript => 'scriptscript' },
text => { display => 'text', text => 'script', script => 'scriptscript', scriptscript => 'scriptscript' },
script => { display => 'display', text => 'text', script => 'scriptscript', scriptscript => 'scriptscript' },
scriptscript => { display => 'display', text => 'text', script => 'scriptscript', scriptscript => 'scriptscript' });
sub adjustMathStyle_internal {
my ($outerstyle, $box) = @_;
$outerstyle = 'display' unless $outerstyle;
if (my $font = $box->getFont) {
my $origstyle = $font->getMathstyle || 'display';
my $newstyle = $mathstyle_adjust_map{$outerstyle}{$origstyle};
$box->setFont($font->merge(mathstyle => $newstyle));
if (my $recstyle = $box->getProperty('mathstyle')) { # And adjust here, if recorded.
$box->setProperty(mathstyle => $newstyle);
return $newstyle; } }
return; }
sub fracSizer {
my ($numerator, $denominator) = @_;
my $w = $numerator->getWidth->larger($denominator->getWidth);
my $d = $denominator->getTotalHeight->multiply(0.5);
my $h = $numerator->getTotalHeight->add($d);
return ($w, $h, $d); }
# \lx@generalized@over{reversion}{keyvals}{top}{bottom}
# keyvals: role,meaning, left,right, thickness
DefConstructor('\lx@generalized@over Undigested RequiredKeyVals',
"?#needXMDual("
. "<ltx:XMDual>"
. "<ltx:XMApp>"
. "<ltx:XMRef _xmkey='#xmkey0'/>"
. "<ltx:XMRef _xmkey='#xmkey1'/>"
. "<ltx:XMRef _xmkey='#xmkey2'/>"
. "</ltx:XMApp>"
. "<ltx:XMWrap>"
. "#left)()"
. "<ltx:XMApp>"
. "<ltx:XMTok _xmkey='#xmkey0' role='#role' meaning='#meaning' mathstyle='#mathstyle' thickness='#thickness'/>"
. "<ltx:XMArg _xmkey='#xmkey1'>#top</ltx:XMArg>"
. "<ltx:XMArg _xmkey='#xmkey2'>#bottom</ltx:XMArg>"
. "</ltx:XMApp>"
. "?#needXMDual(#right"
. "</ltx:XMWrap>"
. "</ltx:XMDual>)()",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $kv = $whatsit->getArg(2);
# Really, we want the mathstyle that was in effect BEFORE the group starting the numerator!
# (there could be a \displaystyle INSIDE the numerator, but that's not the one we want)
# Of course the group that started the numerator may be the start of the Math, itself!
# AND, the numerator, which was already digested, needs it's mathstyle ADJUSTED
my $font = ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?!
? $STATE->lookupValue('font') # then just use whatever font we've got
: ($STATE->isValueBound('font', 0) # else if font was set in numerator
&& $STATE->valueInFrame('font', 1))
|| $STATE->lookupValue('font') # then just use whatever font we've got
);
my $style = $font->getMathstyle;
my $role = ToString($kv->getValue('role'));
my $meaning = ToString($kv->getValue('meaning'));
my $thickness = ToString($kv->getValue('thickness'));
$role = 'FRACOP' unless $role;
$meaning = 'divide' if (!$meaning) && ($thickness ne '0pt');
# Unfortunately, the numerator's already digested! We have to adjust it's mathstyle
my @top = $stomach->regurgitate;
# really have to pass +/-1, +/-2 etc..!
adjustMathstyle($style, {}, @top);
MergeFont(fraction => 1);
my @bot = $stomach->digestNextBody();
my $closing = pop(@bot); # We'll leave whatever closed the list (endmath, endgroup...)
$whatsit->setProperties(
top => List(@top, mode => 'math'),
bottom => List(@bot, mode => 'math'),
role => $role,
meaning => $meaning,
thickness => $thickness,
mathstyle => $style);
if ($kv->getValue('left') || $kv->getValue('right')) {
$whatsit->setProperties(needXMDual => 1,
xmkey0 => LaTeXML::Package::getXMArgID(),
xmkey1 => LaTeXML::Package::getXMArgID(),
xmkey2 => LaTeXML::Package::getXMArgID()); }
return $closing; }, # and leave the closing bit, whatever it is.
properties => sub { %{ $_[2]->getKeyVals }; },
sizer => sub { fracSizer($_[0]->getProperty('top'), $_[0]->getProperty('bottom')); },
reversion => sub {
my ($whatsit) = @_;
(Revert($whatsit->getProperty('top')),
$whatsit->getArg(1)->unlist,
Revert($whatsit->getProperty('bottom'))); });
DefMacro('\choose',
'\lx@generalized@over{\choose}{meaning=binomial,thickness=0pt,left=\@left(,right=\@right)}');
DefMacro('\brace',
'\lx@generalized@over{\brace}{thickness=0pt,left=\@left\{,right=\@right\}}');
DefMacro('\brack',
'\lx@generalized@over{\brack}{thickness=0pt,left=\@left[,right=\@right]}');
DefMacro('\atop',
'\lx@generalized@over{\atop}{thickness=0pt}');
DefMacro('\atopwithdelims Token Token',
'\lx@generalized@over{\atopwithdelims #1 #2}{thickness=0pt,left={\@left#1},right={\@right#2}}');
DefMacro('\over',
'\lx@generalized@over{\over}{meaning=divide}');
DefMacro('\overwithdelims Token Token',
'\lx@generalized@over{\overwithdelims #1 #2}{left={\@left#1},right={\@right#2},meaning=divide}');
# My thinking was that this is a "fraction" providing the dimension is > 0!
DefMacro('\above Dimension',
'\lx@generalized@over{\above #1}{meaning=divide,thickness=#1}');
DefMacro('\abovewithdelims Token Token Dimension',
'\lx@generalized@over{\abovewithdelims #1 #2 #3}{left={\@left#1},right={\@right#2},meaning=divide,thickness=#3}');
#======================================================================
DefPrimitiveI('\cal', undef, undef,
font => { family => 'caligraphic', series => 'medium', shape => 'upright' });
# In principle, <ltx:emph> is a nice markup for emphasized.
# Unfortunately, TeX really just treats it as a font switch.
# Something like: \em et.al. \rm more stuff
# works in TeX, but in our case, since there is no explicit {},
# the <ltx:emph> stays open! Ugh!
# This could still be made to work, but merge font would
# need to look at any open <ltx:emph>, and then somehow close it!
DefPrimitiveI('\em', undef, undef,
beforeDigest => sub {
my $font = LookupValue('font');
my $shape = $font->getShape;
AssignValue(font => $font->merge(shape => ($shape eq 'italic' ? 'normal' : 'italic')),
'local'); });
# Change math font while still in text!
DefPrimitiveI('\boldmath', undef, undef,
beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 1), 'local'); },
forbidMath => 1);
DefPrimitiveI('\unboldmath', undef, undef,
beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 0), 'local'); },
forbidMath => 1);
#======================================================================
# Alignments
# & gives an error except within the right context
# (which should redefine it!)
DefConstructorI('&', undef, sub { Error('unexpected', '&', $_[0], "Stray alignment \"&\""); });
#**********************************************************************
# Plain; Extracted from Appendix B.
#**********************************************************************
#======================================================================
# TeX Book, Appendix B, p. 344
#======================================================================
# \dospecials ??
# Normally, the content branch contains the pure structure and meaning of a construct,
# and the presentation is generated from lower level TeX macros that only concern
# themselves with how to display the object.
# Nevertheless, it is sometimes useful to know where the tokens in the presentation branch
# came from; particularly what their presumed "meaning" is.
# For example, when search-indexing pmml, or providing links to definitions from the pmml.
#
# The following constructor (see how it's used in DefMath), adds meaning attributes
# whereever it seems sensible on the presentation branch, after it has been generated.
DefConstructor('\@ASSERT@MEANING{}{}', '#2',
reversion => '#2',
afterConstruct => sub {
my ($document, $whatsit) = @_;
my $node = $document->getNode; # This should be the wrapper just added.
my $meaning = ToString($whatsit->getArg(1));
addMeaningRec($document, $node, $meaning);
$node; });
#======================================================================
# Properties for plain characters.
# These are allowed in plain text, but need to act a bit special in math.
DefMathI('=', undef, '=', role => 'RELOP', meaning => 'equals');
DefMathI('+', undef, '+', role => 'ADDOP', meaning => 'plus');
DefMathI('-', undef, '-', role => 'ADDOP', meaning => 'minus');
DefMathI('*', undef, '*', role => 'MULOP', meaning => 'times');
DefMathI('/', undef, '/', role => 'MULOP', meaning => 'divide');
DefMathI('!', undef, '!', role => 'POSTFIX', meaning => 'factorial');
DefMathI(',', undef, ',', role => 'PUNCT');
DefMathI('.', undef, '.', role => 'PERIOD');
DefMathI(';', undef, ';', role => 'PUNCT');
DefMathI('(', undef, '(', role => 'OPEN', stretchy => 'false');
DefMathI(')', undef, ')', role => 'CLOSE', stretchy => 'false');
DefMathI('[', undef, '[', role => 'OPEN', stretchy => 'false');
DefMathI(']', undef, ']', role => 'CLOSE', stretchy => 'false');
DefMathI('|', undef, '|', role => 'VERTBAR', stretchy => 'false');
DefMathI(':', undef, ':', role => 'METARELOP', name => 'colon'); # Seems like good default role
DefMathI('<', undef, '<', role => 'RELOP', meaning => 'less-than');
DefMathI('>', undef, '>', role => 'RELOP', meaning => 'greater-than');
# NOTE: Need to evolve Ligatures to be easier to write.
# rough draft of tool to make ligatures more sane to write...
# It is tempting to handle these with macros,
# But that tends to run afoul of tricky packages like babel that make : active as well!
# Even using mathactive doesn't help.
sub TestNode {
my ($node, $qname, $content, %attrib) = @_;
return $node
&& ($LaTeXML::DOCUMENT->getModel->getNodeQName($node) eq $qname)
&& ((!defined $content) || (($node->textContent || '') eq $content))
&& !grep { $node->getAttribute($_) ne $attrib{$_} } keys %attrib; }
# Recognize !!
DefMathLigature("!!" => "!!", role => 'POSTFIX', meaning => 'double-factorial');
# Recognize :=
DefMathLigature(":=" => ":=", role => 'RELOP', meaning => 'assign');
# Recognize: <=, << and <<<
# # Note that in <<<, the leading << will already have been converted to \ll !!!
DefMathLigature("<=" => "\x{2264}", role => 'RELOP', meaning => 'less-than-or-equals');
DefMathLigature("<<" => "\x{226A}", role => 'RELOP', meaning => 'much-less-than');
DefMathLigature("\x{226A}<" => "\x{22D8}", role => 'RELOP', meaning => 'very-much-less-than');
# Recognize: >=, >> and >>>
DefMathLigature(">=" => "\x{2265}", role => 'RELOP', meaning => 'greater-than-or-equals');
DefMathLigature(">>" => "\x{226B}", role => 'RELOP', meaning => 'much-greater-than');
DefMathLigature("\x{226B}>" => "\x{22D8}", role => 'RELOP', meaning => 'very-much-greater-than');
#======================================================================
# Combine letters, when the fonts are right. (sorta related to mathcode)
# well, maybe a letter followed by letters & digits?
DefMathLigature(matcher => sub { my ($document, $node) = @_;
my @chars = ();
my $font = $document->getNodeFont($node);
if ($font->isSticky) {
my $n = 0;
my $string = '';
my $s = '';
while ($node
&& ($document->getModel->getNodeQName($node) eq 'ltx:XMTok')
&& ($document->getNodeFont($node)->equals($font))
&& (!$node->hasAttribute('name'))
&& ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')
|| (($node->getAttribute('role') || 'UNKNOWN') eq 'NUMBER'))
&& (($s = $node->textContent . $s) =~ /^[0-9a-zA-Z]+$/)) {
$n++; $string = $s;
$node = $node->previousSibling; }
(($string =~ /^[a-zA-Z]/) && ($n > 1) ? ($n, $string, role => 'UNKNOWN', meaning => undef) : undef);
} });
#======================================================================
# Combine digits in math.
foreach my $digit (qw(0 1 2 3 4 5 6 7 8 9)) {
DefMathI($digit, undef, $digit, role => 'NUMBER', meaning => $digit); }
# Would probably be best to collapse all XMHint/spaces at the earliest stage.
our %space_chars = (negthinspace => '', thinspace => "\x{2009}",
medspace => "\x{2005}", thickspace => "\x{2004}");
# This is getting out-of-hand;
# (1) this gets done after document build, so we query the document/node for language
# rather than using something specified during digestion (eg. macros, roles...)
# (2) the way we've specified the decimal & thousands separators (language dependent)
# is completely insufficient; should leverage numprint or babel or ... ?
# (3) the way we're detecting the chars is a mess: a mix of string content & role!
# If we could accommodate multiple roles, maybe a separate role could be set on the tokens
# (a period could be a PERIOD or a DECIMAL_SEPARATOR, eg)
my %decimal_separator = (en => '.', de => ',', fr => ',', nl => ',', pt => ',', es => ',');
my %thousands_separator = (en => ',', de => '.', fr => '.', nl => '.', pt => '.', es => '.');
DefMathLigature(matcher => sub { my ($document, $node) = @_;
my $lang = $document->getNodeLanguage($node);
$lang =~ s/-\w+$// if $lang; # strip off region code, if any.
my $dec = ($lang && $decimal_separator{$lang}) || '.';
my $thou = ($lang && $thousands_separator{$lang}) || ',';
my $decrole = ($dec eq '.' ? 'PERIOD' : '');
# my $skip = Dimension('5mu')->valueOf;
my @chars = ();
my ($n, $string, $number, $w) = (0, '', '', 0);
# NOTE: We're scanning chars from END!
while ($node) {
my $qn = $document->getModel->getNodeQName($node);
if ($qn =~ /^(ltx:XMTok|ltx:XMWrap)$/) {
my $r = ($node->getAttribute('role') || '');
my $text = $node->textContent;
if ($r eq 'NUMBER') {
$string = $text . $string;
$number = $node->getAttribute('meaning') . $number; }
elsif (!$n) { # any following cases are not allowed as LAST char
last; }
# if thousands separator (but NOT simultaneously PUNCT!!!! Be paranoid about lists)
elsif (($text eq $thou) && ($r ne 'PUNCT')) {
$string = $text . $string; } # Add to string, but omit from number
# if decimal separator, turn it into "standard" "."
elsif (($text eq $dec) || ($r eq $decrole)) { # was $r eq 'PERIOD'
$string = $node->textContent . $string;
$number = '.' . $number; }
else {
last; } }
# OR if XMHint with 0 <= width <= thickmuskip (5mu == ?)
elsif ($qn eq 'ltx:XMHint') {
## if (($w = $node->getAttribute('width')) && ($w=Dimension($w)->valueOf) && ($w >= 0) && ($w <= $skip)) {
## $string = $text . $string; } # Add to string, but omit from number
my $s;
if (($s = $node->getAttribute('name')) && ($s = $space_chars{$s})) {
$string = $s . $string; }
else {
last; } }
else {
last; }
$n++; $node = $node->previousSibling; }
if (($n > 1) && ($number =~ /\d/)) {
($n, $string, meaning => $number, role => 'NUMBER'); } });
# This needs to be applied AFTER numbers have been resolved!
# If we have a non-negative integer (no signs, decimals,...)
# followed by a fraction dividing two non-negative integers,
# Figure it's a mixed fraction --- ADDING the fraction to the number, not multiplying!
DefRewrite(select => ['descendant-or-self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]'
. '[ following-sibling::*[1][self::ltx:XMApp]'
. ' [child::*[1][self::ltx:XMTok[@meaning="divide"]]]'
. ' [child::*[2]['
. 'self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]'
. 'or self::ltx:XMArg[count(child::*)=1]/ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]'
. ']]'
. ' [child::*[3]['
. 'self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]'
. 'or self::ltx:XMArg[count(child::*)=1]/ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]'
. ']]'
. ']',
2],
replace => sub { my ($document, $number, $frac) = @_;
my $box = $document->getNodeBox($number);
$document->openElement('ltx:XMApp', _box => $box);
$document->insertMathToken("\x{2064}", # Invisible Plus!
meaning => 'plus', role => "ADDOP", _box => $box);
$document->getNode->appendChild($number);
$document->getNode->appendChild($frac);
$document->closeElement('ltx:XMApp'); });
#======================================================================
# TeX Book, Appendix B, p. 345
RawTeX(<<'EoTeX');
\chardef\active=13
\chardef\@ne=1
\chardef\tw@=2
\chardef\thr@@=3
\chardef\sixt@@n=16
\chardef\@cclv=255
\mathchardef\@cclvi=256
\mathchardef\@m=1000
\mathchardef\@M=10000
\mathchardef\@MM=20000
\countdef\m@ne=21\relax
\m@ne=-1
EoTeX
#======================================================================
# TeX Book, Appendix B, p. 346
RawTeX(<<'EoTeX');
\countdef\count@=255
\toksdef\toks@=0
\skipdef\skip@=0
\dimendef\dimen@=0
\dimendef\dimen@i=1
\dimendef\dimen@ii=2
EoTeX
# Various \count's are set; should we?
#======================================================================
# TeX Book, Appendix B, p. 347
# \wlog ??
# From plain.tex
DefPrimitive('\newcount Token', sub { DefRegisterI($_[1], undef, Number(0)); });
DefPrimitive('\newdimen Token', sub { DefRegisterI($_[1], undef, Dimension(0)); });
DefPrimitive('\newskip Token', sub { DefRegisterI($_[1], undef, Glue(0)); });
DefPrimitive('\newmuskip Token', sub { DefRegisterI($_[1], undef, MuGlue(0)); });
AssignValue(allocated_boxes => 0);
DefPrimitive('\newbox Token', sub {
my $n = LookupValue('allocated_boxes');
AssignValue(allocated_boxes => $n + 1, 'global');
AssignValue("box$n", List());
DefRegisterI($_[1], undef, Number($n)); });
DefPrimitive('\newhelp Token {}', sub { AssignValue(ToString($_[1]) => $_[2]); });
DefPrimitive('\newtoks Token', sub { DefRegisterI($_[1], undef, Tokens()); });
# the next 4 actually work by doing a \chardef instead of \countdef, etc.
# which means they actually work quite differently
DefRegister('\allocationnumber' => Number(0));
DefMacro('\alloc@@ {}', sub {
my ($gullet, $type) = @_;
my $c = 'allocation @' . ToString($type);
my $n = LookupValue($c) || '0';
$n = $n->valueOf if ref $n;
AssignValue($c => $n + 1, 'global');
AssignValue('\allocationnumber' => Number($n), 'global'); });
DefMacro('\newread Token', '\alloc@@{read}\global\chardef#1=\allocationnumber');
DefMacro('\newwrite Token', '\alloc@@{write}\global\chardef#1=\allocationnumber');
DefMacro('\newfam Token', '\alloc@@{fam}\global\chardef#1=\allocationnumber');
DefMacro('\newlanguage Token', '\alloc@@{language}\global\chardef#1=\allocationnumber');
# This implementation is quite wrong
DefPrimitive('\newinsert Token', sub { DefRegisterI($_[1], undef, Number(0)); });
# \alloc@, \ch@ck
# TeX plain uses \newdimen, etc. for these.
# Is there any advantage to that?
DefRegister('\maxdimen', Dimension(16383.99999 * 65536));
DefRegister('\hideskip', Glue(-1000 * 65536, '1fill'));
DefRegister('\centering', Glue('0pt plus 1000pt minus 1000pt'));
DefRegister('\p@', Dimension(65536));
DefRegister('\z@', Dimension(0));
DefRegister('\z@skip', Glue(0, 0, 0));
# First approximation. till I figure out \newbox
RawTeX('\newbox\voidb@x');
#======================================================================
# TeX Book, Appendix B, p. 348
DefMacro('\newif DefToken', sub {
my ($ignore, $cs) = @_;
DefConditionalI($cs, undef);
return; });
# See the section Registers & Parameters, above for setting default values.
#======================================================================
# TeX Book, Appendix B, p. 349
# See the section Registers & Parameters, above for setting default values.
# These are originally defined with \newskip, etc
DefRegister('\smallskipamount' => Glue('3pt plus1pt minus1pt'));
DefRegister('\medskipamount' => Glue('6pt plus2pt minus2pt'));
DefRegister('\bigskipamount' => Glue('12pt plus4pt minus4pt'));
DefRegister('\normalbaselineskip' => Glue('12pt'));
DefRegister('\normallineskip' => Glue('1pt'));
DefRegister('\normallineskiplimit' => Dimension('0pt'));
DefRegister('\jot' => Dimension('3pt'));
DefRegister('\lx@default@jot' => LookupRegister('\jot'));
DefRegister('\interdisplaylinepenalty' => Number(100));
DefRegister('\interfootnotelinepenalty' => Number(100));
DefMacroI('\magstephalf', undef, '1095');
our @mags = (1000, 1200, 1440, 1728, 2074, 2488);
DefMacro('\magstep{}', sub { Explode($mags[ToString($_[1])]); });
#======================================================================
# TeX Book, Appendix B, p. 350
# Font stuff ...
RawTeX(<<'EoTeX');
\font\tenrm=cmr10
\font\sevenrm=cmr7
\font\fiverm=cmr5
\font\teni=cmmi10
\font\seveni=cmmi7
\font\fivei=cmmi7
\font\tensy=cmsy10
\font\sevensy=cmsy7
\font\fivesy=cmsy5
\font\tenex=cmex10
\font\tenbf=cmbx10
\font\sevenbf=cmbx7
\font\fivebf=cmbx5
\font\tensl=cmsl10
\font\tentt=cmtt10
\font\tenit=cmti10
\newfam\itfam
\newfam\slfam
\newfam\bffam
\newfam\ttfam
\textfont0=\tenrm\scriptfont0=\sevenrm\scriptscriptfont0=\fiverm
\textfont1=\teni\scriptfont1=\seveni\scriptscriptfont1=\fivei
\textfont2=\tensy\scriptfont2=\sevensy\scriptscriptfont2=\fivesy
\textfont3=\tenex
EoTeX
# Note: \newfam in math should be font switching(?)
#======================================================================
# TeX Book, Appendix B, p. 351
# Old style font styles.
# The trick is to create an empty Whatsit preserved till assimilation (for reversion'ing)
# but to change the current font used in boxes.
# (some of these were defined on different pages? or even latex...)
Tag('ltx:text', autoOpen => 1, autoClose => 1);
# Note that these, unlike \rmfamily, should set the other attributes to the defaults!
DefPrimitiveI('\rm', undef, undef,
font => { family => 'serif', series => 'medium', shape => 'upright' });
DefPrimitiveI('\sf', undef, undef,
font => { family => 'sansserif', series => 'medium', shape => 'upright' });
DefPrimitiveI('\bf', undef, undef,
font => { series => 'bold', family => 'serif', shape => 'upright' });
DefPrimitiveI('\it', undef, undef,
font => { shape => 'italic', family => 'serif', series => 'medium' });
DefPrimitiveI('\tt', undef, undef,
font => { family => 'typewriter', series => 'medium', shape => 'upright' });
# No effect in math for the following 2 ?
DefPrimitiveI('\sl', undef, undef,
font => { shape => 'slanted', family => 'serif', series => 'medium' });
DefPrimitiveI('\sc', undef, undef,
font => { shape => 'smallcaps', family => 'serif', series => 'medium' });
# Ideally, we should set these sizes from class files
AssignValue(NOMINAL_FONT_SIZE => 10);
DefPrimitiveI('\tiny', undef, undef, font => { size => 5 });
DefPrimitiveI('\scriptsize', undef, undef, font => { size => 7 });
DefPrimitiveI('\footnotesize', undef, undef, font => { size => 8 });
DefPrimitiveI('\small', undef, undef, font => { size => 9 });
DefPrimitiveI('\normalsize', undef, undef, font => { size => 10 });
DefPrimitiveI('\large', undef, undef, font => { size => 12 });
DefPrimitiveI('\Large', undef, undef, font => { size => 14.4 });
DefPrimitiveI('\LARGE', undef, undef, font => { size => 17.28 });
DefPrimitiveI('\huge', undef, undef, font => { size => 20.74 });
DefPrimitiveI('\Huge', undef, undef, font => { size => 29.8 });
DefPrimitiveI('\mit', undef, undef, requireMath => 1, font => { family => 'italic' });
DefPrimitiveI('\frenchspacing', undef, undef);
DefPrimitiveI('\nonfrenchspacing', undef, undef);
DefMacroI('\normalbaselines', undef,
'\lineskip=\normallineskip\baselineskip=\normalbaselineskip\lineskiplimit=\normallineskiplimit');
DefMacroI('\space', undef, Tokens(T_SPACE));
DefMacroI('\lq', undef, "`");
DefMacroI('\rq', undef, "'");
Let('\empty', '\@empty');
DefMacroI('\null', undef, '\hbox{}');
Let('\bgroup', T_BEGIN);
Let('\egroup', T_END);
Let('\endgraf', '\par');
Let('\endline', '\cr');
DefPrimitiveI('\endline', undef, undef);
# Use \r for the newline from TeX!!!
DefMacroI("\\\r", undef, '\ '); # \<cr> == \<space> Interesting (see latex.ltx)
Let(T_ACTIVE("\r"), '\par'); # (or is this just LaTeX?)
Let("\\\t", "\\\r"); # \<tab> == \<space>, also
#======================================================================
# TeX Book, Appendix B, p. 352
DefPrimitiveI('\obeyspaces', undef, sub {
AssignCatcode(" " => 13);
Let(T_ACTIVE(" "), '\space');
return });
# Curiously enough, " " (a space) is ALREADY defined to be the same as "\space"
# EVEN before it is made active. (see p.380)
Let(T_ACTIVE(" "), '\space');
DefPrimitiveI('\obeylines', undef, sub {
AssignCatcode("\r" => 13);
Let(T_ACTIVE("\r"), '\@break'); # More appropriate than \par, I think?
return });
DefConstructor('\@break', "<ltx:break/>");
RawTeX(<<'EoTeX');
\def\loop#1\repeat{\def\body{#1}\iterate}
\def\iterate{\body \let\next=\iterate \else\let\next=\relax\fi \next}
\let\repeat=\fi
EoTeX
DefMacroI('\enskip', undef, '\ifmmode\@math@enskip\else\@text@enskip\fi');
DefConstructorI('\@math@enskip', undef,
"<ltx:XMHint name='enskip' width='#width'/>",
alias => '\enskip',
properties => { isSpace => 1, width => sub { Dimension('0.5em'); } });
DefPrimitiveI('\@text@enskip', undef, "\x{2002}", alias => '\enskip');
DefMacroI('\enspace', undef, '\ifmmode\@math@espace\else\@text@enspace\fi');
DefConstructorI('\@math@enspace', undef,
"<ltx:XMHint name='enskip' width='#width'/>",
alias => '\enspace',
properties => { isSpace => 1, width => sub { Dimension('0.5em'); } });
DefPrimitiveI('\@text@enspace', undef, "\x{2002}", alias => '\enspace');
DefMacroI('\quad', undef, '\ifmmode\@math@quad\else\@text@quad\fi');
DefConstructorI('\@math@quad', undef,
"<ltx:XMHint name='quad' width='#width'/>",
alias => '\quad',
properties => { isSpace => 1, width => sub { Dimension('1em'); } });
DefPrimitiveI('\@text@quad', undef, "\x{2003}", alias => '\quad');
# Conceivably should be treated as punctuation! (but maybe even \quad should !?!)
DefMacroI('\qquad', undef, '\ifmmode\@math@qquad\else\@text@qquad\fi');
DefConstructorI('\@math@qquad', undef,
"<ltx:XMHint name='qquad' width='#width'/>",
alias => '\qquad',
properties => { isSpace => 1, width => sub { Dimension('2em'); } });
DefPrimitiveI('\@text@qquad', undef, "\x{2003}\x{2003}", alias => '\qquad');
DefMacroI('\thinspace', undef, '\ifmmode\@math@thinspace\else\@text@thinspace\fi');
DefConstructorI('\@math@thinspace', undef,
"<ltx:XMHint name='thinspace' width='#width'/>",
alias => '\thinspace',
properties => { isSpace => 1, width => sub { Dimension('0.16667em'); } });
DefPrimitiveI('\@text@thinspace', undef, "\x{2009}", alias => '\thinspace');
DefMacroI('\negthinspace', undef, '\ifmmode\@math@negthinspace\else\@text@negthinspace\fi');
DefConstructorI('\@math@negthinspace', undef,
"<ltx:XMHint name='negthinspace' width='#width'/>",
alias => '\negthinspace',
properties => { isSpace => 1, width => sub { Dimension('-0.16667em'); } });
DefPrimitiveI('\@text@negthinspace', undef, "", alias => '\negthinspace');
DefConstructor('\hglue Glue', "?#isMath(<ltx:XMHint name='hglue' width='#width'/>)(\x{2003})",
properties => sub { (isSpace => 1, width => $_[1]); });
DefPrimitive('\vglue Glue', undef);
DefPrimitiveI('\topglue', undef, undef);
DefPrimitiveI('\nointerlineskip', undef, undef);
DefPrimitiveI('\offinterlineskip', undef, undef);
DefMacroI('\smallskip', undef, '\vskip\smallskipamount');
DefMacroI('\medskip', undef, '\vskip\medskipamount');
DefMacroI('\bigskip', undef, '\vskip\bigskipamount');
#======================================================================
# TeX Book, Appendix B, p. 353
DefPrimitiveI('\break', undef, undef);
DefPrimitiveI('\nobreak', undef, undef);
DefPrimitiveI('\allowbreak', undef, undef);
DefMacroI('\nobreakspace', undef, '\ifmmode\math@nobreakspace\else\text@nobreakspace\fi');
DefPrimitiveI('\text@nobreakspace', undef, UTF(0xA0), alias => '~');
DefConstructorI('\math@nobreakspace', undef,
"<ltx:XMHint name='nobreakspace' width='#width'/>",
properties => { isSpace => 1, width => sub { Dimension('0.333em'); } },
alias => '~');
DefMacro("~", '\nobreakspace{}');
DefMacroI('\slash', undef, '/');
DefPrimitiveI('\filbreak', undef, undef);
DefMacroI('\goodbreak', undef, '\par');
DefMacroI('\eject', undef, '\par\LTX@newpage');
Let('\newpage', '\eject');
DefConstructorI('\LTX@newpage', undef, "^<ltx:pagination role='newpage'/>");
DefMacroI('\supereject', undef, '\par\LTX@newpage');
DefPrimitiveI('\removelastskip', undef, undef);
DefMacroI('\smallbreak', undef, '\par');
DefMacroI('\medbreak', undef, '\par');
DefMacroI('\bigbreak', undef, '\par');
DefMacroI('\line', undef, '\hbox to \hsize');
DefConstructor('\leftline{}', sub {
alignLine($_[0], $_[1], 'left'); },
bounded => 1);
DefConstructor('\rightline{}', sub {
alignLine($_[0], $_[1], 'right'); },
bounded => 1);
DefConstructor('\centerline{}', sub {
alignLine($_[0], $_[1], 'center'); },
bounded => 1);
sub alignLine {
my ($document, $line, $alignment) = @_;
if ($document->isOpenable('ltx:p')) {
$document->insertElement('ltx:p', $line, class => 'ltx_align_' . $alignment); }
elsif ($document->isOpenable('ltx:text')) {
$document->insertElement('ltx:text', $line, class => 'ltx_align_' . $alignment);
$document->insertElement('ltx:break'); }
else {
$document->absorb($line); }
return; }
# These should be 0 width, but perhaps also shifted?
DefMacro('\llap{}', '\hbox to 0pt{#1}');
DefMacro('\rlap{}', '\hbox to 0pt{#1}');
DefMacroI('\m@th', undef, '\mathsurround=0pt ');
# \strutbox
DefMacroI('\strut', undef, Tokens());
RawTeX('\newbox\strutbox');
#======================================================================
# TeX Book, Appendix B. p. 354
# TODO: Not yet done!!
# tabbing stuff!!!
DefMacroI('\settabs', undef, undef);
#======================================================================
# TeX Book, Appendix B. p. 355
DefPrimitive('\hang', undef);
# TODO: \item, \itemitem not done!
# This could probably be adopted from LaTeX, if the <itemize> could auto-open
# and close!
DefConstructor('\item{}', '#1');
DefConstructor('\itemitem{}', '#1');
DefMacro('\textindent{}', '#1');
# Conceivably this should enclose the next para in a block?
# Or add attribute to it? Or...
DefPrimitiveI('\narrower', undef, undef);
#----------------------------------------------------------------------
# General support for Front Matter.
# Not (yet) used by TeX (finish plain?)
# But provides support for LaTeX (and other formats?) for handling frontmatter.
#
# The idea is to accumulate any frontmatter material (title, author,...)
# rather than directly drop it into the digested stream.
# When we begin constructing the document, all accumulated material is output.
# See LaTeX.ltxml for usage.
# Note: could be circumstances where you'd want modular frontmatter?
# (ie. frontmatter for each sectional unit)
AssignValue(frontmatter => {}, 'global');
# Add a new frontmatter item that will be enclosed in <$tag %attr>...</$tag>
# The content is the result of digesting $tokens.
# \@add@frontmatter[keys]{tag}[attributes]{content}
# keys can have
# replace (to replace the current entry, if any)
# ifnew (only add if no previous entry)
DefPrimitive('\@add@frontmatter OptionalKeyVals {} OptionalKeyVals {}', sub {
my ($stomach, $keys, $tag, $attr, $tokens) = @_;
# Digest this as if we're already in the document body!
my $frontmatter = LookupValue('frontmatter');
my $inpreamble = LookupValue('inPreamble');
AssignValue(inPreamble => 0);
# Be careful since the contents may also want to add frontmatter
# (which should be inside or after this one!)
# So, we append this entry before digesting
$tag = ToString($tag);
if ($keys && $keys->hasKey('replace') && $$frontmatter{$tag}) { # if replace and previous entries
$$frontmatter{$tag} = []; } # Remove previous entries
if ($keys && $keys->hasKey('ifnew') && $$frontmatter{$tag}) { # if ifnew and previous entries
return; } # Skip this one.
my $entry = [$tag, undef, 'to-be-filled-in'];
push(@{ $$frontmatter{$tag} }, $entry);
if ($attr) {
$$entry[1] = { $attr->beDigested($stomach)->getHash }; }
$$entry[2] = Digest(Tokens(T_BEGIN, $tokens, T_END));
AssignValue(inPreamble => $inpreamble);
return; });
# Append a piece of data to an existing frontmatter item that is contained in <$tag>
# If $label is given, look for an item which has label=>$label,
# otherwise, just append to the last item in $tag.
# \@add@to@frontmatter{tag}[label]{content}
DefPrimitive('\@add@to@frontmatter {} [] {}', sub {
my ($stomach, $tag, $label, $tokens) = @_;
$tag = ToString($tag);
$label = ToString($label) if $label;
my $frontmatter = LookupValue('frontmatter');
my $inpreamble = LookupValue('inPreamble');
AssignValue(inPreamble => 0);
my $datum = Digest(Tokens(T_BEGIN, $tokens, T_END));
AssignValue(inPreamble => $inpreamble);
if ($label) {
my $entry;
foreach my $item (@{ $$frontmatter{$tag} || [] }) {
my ($itag, $iattr, @stuff) = @$item;
if ($label eq ($$iattr{label} || '')) {
push(@$item, $datum);
return; } } }
elsif (my $list = $$frontmatter{$tag}) {
push(@{ $$list[-1] }, $datum);
return; }
push(@{ $$frontmatter{$tag} }, [$tag, ($label ? { label => $label } : undef), $datum]);
return; });
# This is called by afterOpen (by default on <ltx:document>) to
# output any frontmatter that was accumulated.
my @frontmatter_elements = (qw(ltx:title ltx:toctitle ltx:subtitle
ltx:creator ltx:date
ltx:abstract ltx:keywords ltx:classification ltx:acknowledgements));
my %frontmatter_elements = map { ($_ => 1) } @frontmatter_elements;
sub insertFrontMatter {
my ($document) = @_;
my $frontmatter = LookupValue('frontmatter');
foreach my $key (@frontmatter_elements, grep { !$frontmatter_elements{$_} } keys %$frontmatter) {
if (my $list = $$frontmatter{$key}) {
# Dubious, but assures that frontmatter appears in text mode...
local $LaTeXML::BOX = Box('', $STATE->lookupValue('font'), '', T_SPACE);
foreach my $item (@$list) {
my ($tag, $attr, @stuff) = @$item;
$document->openElement($tag, ($attr ? %$attr : ()),
(scalar(@stuff) && $document->canHaveAttribute($tag, 'font')
? (font => $stuff[0]->getFont, _force_font => 'true') : ()));
map { $document->absorb($_) } @stuff;
$document->closeElement($tag); } } }
return; }
Tag('ltx:document', 'afterOpen:late' => \&insertFrontMatter);
# Maintain a list of classes that apply to the document root.
# This might involve global style options, like leqno.
Tag('ltx:document', 'afterOpen:late' => sub {
my ($document, $root) = @_;
if (my $classes = join(' ', LookupMappingKeys('DOCUMENT_CLASSES'))) {
$document->addClass($root, $classes); } });
DefConstructor('\beginsection Until:\par',
"<ltx:section><ltx:title>#1</ltx:title>");
# POSSIBLY #1 is a name or reference number and #2 is the theoremm TITLE
# If so, how do know when the theorem ends?
DefConstructorI('\proclaim', parseDefParameters('\proclaim', Tokenize('#1. #2\par')),
"<ltx:theorem>"
. "<ltx:title font='#titlefont' _force_font='true' >#title</ltx:title>"
. "#2"
. "</ltx:theorem>",
properties => sub {
my $title = $_[1];
(title => $title, titlefont => $title->getFont); });
#======================================================================
# Support for reference numbers, references to reference numbers, etc.
# The design reflects LaTeX needs, more than TeX, but support starts here!
# The lowest level reference number for an object is typically \the<counter>
# for whatever counter is keeping track of the object.
# This will typically go into the refnum attribute.
# However, the way that number is displayed as part of the object,
# as well as the way that number is shown when the object is referenced,
# can be different from that plain refnum.
# \lx@fnum@@{type} Gets the formatted form of the refnum
# LaTeX defines \fnum@table, \fnum@figure for some types
# but \labelenumi, etc for others; we'll bind the latter to \fnum@enumi for simplicity.
# Otherwise, we'll construct a formatted number using a name prefix and the number.
# This allows for localization by defining eg. \tablename
DefMacro('\lx@fnum@@{}',
'\@ifundefined{fnum@#1}{\lx@@fnum@@{#1}}{\csname fnum@#1\endcsname}');
# \lx@fnum@toc@{type} calls \fnum@toc@<type>, if defined, else composes using \lx@@fnum@@{type}
DefMacro('\lx@fnum@toc@@{}',
'\@ifundefined{fnum@toc@#1}{\csname the#1\endcsname}{\csname fnum@toc@#1\endcsname}');
#[An abbreviated form seems potentially useful... (ie. \chapter@abbr)]
DefPrimitive('\lx@@fnum@@ {}', sub {
my ($gullet, $type) = @_;
my $cs = T_CS('\\' . ToString($type) . 'name');
my @name = (LookupDefinition($cs) ? Digest($cs)->unlist : ());
my @num = Digest(T_CS('\the' . ToString($type)))->unlist;
my $letter = @name && $name[-1]->can('getString') && $name[-1]->getString;
(@name,
(scalar(@name) && scalar(@num) && ($letter =~ /\w/)
? (Digest(T_CS('\text@nobreakspace'))) : ()),
@num); });
# \lx@refnum@@{type} Gets the referencing form of the refnum.
# Look for a \refnum@type and use it, else just \thetype
DefMacro('\lx@refnum@@{}',
'\@ifundefined{refnum@#1}{\lx@fnum@@{#1}}{\csname refnum@#1\endcsname}');
#======================================================================
# TeX Book, Appendix B. p. 356
DefPrimitiveI('\raggedright', undef, undef);
DefPrimitiveI('\raggedleft', undef, undef); # this is actually LaTeX
DefPrimitiveI('\ttraggedright', undef, undef);
DefPrimitiveI('\leavevmode', undef, undef);
#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.2. Non-English Symbols, p.39
# The following shouldn't appear in math.
DefPrimitiveI('\OE', undef, "\x{0152}"); # LATIN CAPITAL LIGATURE OE
DefPrimitiveI('\oe', undef, "\x{0153}"); # LATIN SMALL LIGATURE OE
DefPrimitiveI('\AE', undef, UTF(0xC6)); # LATIN CAPITAL LETTER AE
DefPrimitiveI('\ae', undef, UTF(0xE6)); # LATIN SMALL LETTER AE
DefPrimitiveI('\AA', undef, UTF(0xC5)); # LATIN CAPITAL LETTER A WITH RING ABOVE
DefPrimitiveI('\aa', undef, UTF(0xE5)); # LATIN SMALL LETTER A WITH RING ABOVE
DefPrimitiveI('\O', undef, UTF(0xD8)); # LATIN CAPITAL LETTER O WITH STROKE
DefPrimitiveI('\o', undef, UTF(0xF8)); # LATIN SMALL LETTER O WITH STROKE
DefPrimitiveI('\L', undef, "\x{0141}"); # LATIN CAPITAL LETTER L WITH STROKE
DefPrimitiveI('\l', undef, "\x{0142}"); # LATIN SMALL LETTER L WITH STROKE
DefPrimitiveI('\ss', undef, UTF(0xDF)); # LATIN SMALL LETTER SHARP S
# apparently the rest can appear in math.
DefPrimitiveI('\dag', undef, "\x{2020}"); # DAGGER
DefPrimitiveI('\ddag', undef, "\x{2021}"); # DOUBLE DAGGER
DefPrimitiveI('\S', undef, UTF(0xa7)); # SECTION SIGN
DefPrimitiveI('\P', undef, UTF(0xB6)); # PILCROW SIGN
DefPrimitiveI('\copyright', undef, UTF(0xA9)); # COPYRIGHT SIGN
DefPrimitiveI('\pounds', undef, UTF(0xA3)); # POUND SIGN
#----------------------------------------------------------------------
# Accents. LaTeX Table 3.1, p.38
#----------------------------------------------------------------------
# All of TeX's accents can (sorta) be handled by Unicode's combining accents
# (which follow the character to be accented).
# We'll let unicode normalization do the combination, if needed.
# Also, note that \t is intended to combine multiple chars, but it appears to
# work (via mozilla !?) best when the combining char is after the 1st char.
# Further, the accents \d and \b seem to center the under dot or bar under multiple
# chars --- how should this be handled in Unicode?
# Since people sometimes try to get fancy by using an empty argument,
# for each, I'm providing the combining code and an equivalent(?) spacing one.
# (doesn't look quite the same to use a combining char after a space)
# Create a box applying an accent to a letter
# Hopefully, we'll get a Box from digestion with a plain string.
# Then we can apply combining accents to it.
sub applyAccent {
my ($stomach, $letter, $combiningchar, $standalonechar, $reversion) = @_;
my $box = Digest($letter);
my $locator = $box->getLocator;
my $font = $box->getFont;
my $string = $box->toString;
$string =~ tr/\x{0131}\x{0237}/ij/;
$string =~ s/\s/ /g;
my @letters = split(//, $string);
return Box(($string =~ /^\s*$/
? $standalonechar
: NFC($letters[0] . $combiningchar . join('', @letters[1 .. $#letters]))),
$font, $locator, $reversion); }
# Defines an accent command using a combining char that follows the
# 1st char of the argument. In cases where there is no argument, $standalonechar is used.
sub DefAccent {
my ($accent, $combiningchar, $standalonechar, %options) = @_;
$options{above} = 1 if !(defined $options{above}) && !$options{below};
# Used for converting a char used as an above-accent to a combining char (See \accent)
AssignMapping('accent_combiner_above', $standalonechar => $combiningchar) if $options{above};
AssignMapping('accent_combiner_below', $standalonechar => $combiningchar) unless $options{above};
DefPrimitive($accent . "{}", sub {
my ($stomach, $letter) = @_;
applyAccent($stomach, $letter, $combiningchar, $standalonechar, Invocation($accent, $letter)); },
mode => 'text');
return; }
DefAccent('\`', "\x{0300}", UTF(0x60)); # COMBINING GRAVE ACCENT & GRAVE ACCENT
DefAccent("\\'", "\x{0301}", UTF(0xB4)); # COMBINING ACUTE ACCENT & ACUTE ACCENT
DefAccent('\^', "\x{0302}", UTF(0x5E)); # COMBINING CIRCUMFLEX ACCENT & CIRCUMFLEX ACCENT
DefAccent('\"', "\x{0308}", UTF(0xA8)); # COMBINING DIAERESIS & DIAERESIS
DefAccent('\~', "\x{0303}", "~"); # COMBINING TILDE
DefAccent('\=', "\x{0304}", UTF(0xAF)); # COMBINING MACRON & MACRON
DefAccent('\.', "\x{0307}", "\x{02D9}"); # COMBINING DOT ABOVE & DOT ABOVE
DefAccent('\u', "\x{0306}", "\x{02D8}"); # COMBINING BREVE & BREVE
DefAccent('\v', "\x{030C}", "\x{02C7}"); # COMBINING CARON & CARON
DefAccent('\@ringaccent', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining
DefAccent('\r', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining
DefAccent('\H', "\x{030B}", "\x{02DD}"); # COMBINING DOUBLE ACUTE ACCENT & non-combining
DefAccent('\c', "\x{0327}", UTF(0xB8), below => 1); # COMBINING CEDILLA & CEDILLA
# NOTE: The next two get define for math, as well; See below
DefAccent('\@text@daccent', "\x{0323}", '.', below => 1); # COMBINING DOT BELOW & DOT (?)
DefAccent('\@text@baccent', "\x{0331}", UTF(0xAF), below => 1); # COMBINING MACRON BELOW & MACRON
DefAccent('\t', "\x{0361}", "-"); # COMBINING DOUBLE INVERTED BREVE & ???? What????
# this one's actually defined in mathscinet.sty, but just stick it here!
DefAccent('\lfhook', "\x{0326}", ",", below => 1); # COMBINING COMMA BELOW
# I doubt that latter covers multiple chars...?
#DefAccent('\bar',"\x{0304}", ?); # COMBINING MACRON or is this the longer overbar?
# This will fail if there really are "assignments" after the number!
# We're given a number pointing into the font, from which we can derive the standalone char.
# From that, we want to figure out the combining character, but there could be one for
# both the above & below cases! We'll prefer the above case.
DefPrimitive('\accent Number {}', sub {
my ($stomach, $num, $letter) = @_;
my $n = $num->valueOf;
my $fam = 0; # ?
my $font = LookupValue('fontinfo_' . $fam . '_text');
my $fontinfo = LookupValue('fontinfo_' . ToString($font));
my $acc = ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : chr($n));
my $reversion = Invocation(T_CS('\accent'), $num, $letter);
# NOTE: REVERSE LOOKUP in above accent list for the non-spacing accent char
# BUT, \accent always (?) makes an above type accent... doesn't it?
if (my $combiner = LookupMapping('accent_combiner_above', $acc)
|| LookupMapping('accent_combiner_below', $acc)) {
applyAccent($stomach, $letter, $combiner, $acc, $reversion); }
else {
Warn('unexpected', "accent$n", $stomach, "Accent '$n' not recognized");
Box(ToString($letter), undef, undef, $reversion); } });
# Note that these two apparently work in Math? BUT the argument is treated as text!!!
DefMacro('\d{}', '\ifmmode\@math@daccent{#1}\else\@text@daccent{#1}\fi');
DefMacro('\b{}', '\ifmmode\@math@baccent{#1}\else\@text@baccent{#1}\fi');
DefConstructor('\@math@daccent {}',
"<ltx:XMApp><ltx:XMTok role='UNDERACCENT'>\x{22c5}</ltx:XMTok>"
. "?#textarg(<ltx:XMText>#textarg</ltx:XMText>)(<ltx:XMArg>#matharg</ltx:XMArg>)"
. "</ltx:XMApp>",
mode => 'text', alias => '\d',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
if ($arg->isMath) {
$whatsit->setProperty(matharg => $arg->getBody); }
else {
$whatsit->setProperty(textarg => $arg); }
return; });
DefConstructor('\@math@baccent {}',
"<ltx:XMApp><ltx:XMTok role='UNDERACCENT'>" . UTF(0xAF) . "</ltx:XMTok>"
. "?#textarg(<ltx:XMText>#textarg</ltx:XMText>)(<ltx:XMArg>#matharg</ltx:XMArg>)"
. "</ltx:XMApp>",
mode => 'text', alias => '\b',
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
if ($arg->isMath) {
$whatsit->setProperty(matharg => $arg->getBody); }
else {
$whatsit->setProperty(textarg => $arg); }
return; });
#======================================================================
# TeX Book, Appendix B. p. 357
foreach my $op ('\hrulefill', '\dotfill', '\rightarrowfill', '\leftarrowfill',
'\upbracefill', '\downbracefill') {
DefPrimitive($op, undef); }
Let('\bye', '\end');
Let('\sp', T_SUPER);
Let('\sb', T_SUB);
DefMacroI('\,', undef, '\ifmmode\@math@thinmuskip\else\@text@thinmuskip\fi');
DefConstructorI('\@math@thinmuskip', undef,
"<ltx:XMHint name='thinspace' width='#width'/>",
alias => '\,',
properties => { isSpace => 1, width => sub { LookupValue('\thinmuskip'); } });
DefPrimitiveI('\@text@thinmuskip', undef, "\x{2009}", alias => '\,');
DefMacroI('\!', undef, '\ifmmode\@math@negthinmuskip\else\@text@negthinmuskip\fi');
DefConstructorI('\@math@negthinmuskip', undef,
"<ltx:XMHint name='negthinspace' width='#width'/>",
alias => '\!',
properties => { isSpace => 1,
width => sub { LookupValue('\thinmuskip')->negate; } });
DefPrimitiveI('\@text@negthinmuskip', undef, "", alias => '\!');
DefMacroI('\>', undef, '\ifmmode\@math@medmuskip\else\@text@medmuskip\fi');
DefConstructorI('\@math@medmuskip', undef,
"<ltx:XMHint name='medspace' width='#width'/>",
alias => '\>',
properties => { isSpace => 1,
width => sub { LookupValue('\medmuskip'); } });
DefPrimitiveI('\@text@medmuskip', undef, "", alias => '\>');
DefMacroI('\;', undef, '\ifmmode\@math@thickmuskip\else\@text@thickmuskip\fi');
DefConstructorI('\@math@thickmuskip', undef,
"<ltx:XMHint name='thickspace' width='#width'/>",
alias => '\;',
properties => { isSpace => 1,
width => sub { LookupValue('\thickmuskip'); } });
DefPrimitiveI('\@text@thickmuskip', undef, "\x{2004}", alias => '\;');
Let('\:', '\>');
DefMacroI('\ ', undef, '\ifmmode\@math@nbspace\else\@text@nbspace\fi');
DefConstructorI('\@math@nbspace', undef,
"<ltx:XMHint name='medspace' width='#width'/>",
alias => '\ ',
properties => { isSpace => 1,
width => sub { Dimension('0.5em'); } });
DefPrimitiveI('\@text@nbspace', undef, UTF(0xA0), alias => '\ ');
DefMacroI("\\\t", undef, '\ifmmode\@math@tab\else\@text@tab\fi');
DefConstructorI('\@math@tab', undef, # Tab!!
"<ltx:XMHint name='medspace' width='#width'/>",
alias => "\\\t", # TAB
properties => { isSpace => 1,
width => sub { Dimension('1em'); } });
DefPrimitiveI('\@text@tab', undef, UTF(0xA0), alias => "\\\t"); # TAB!!! What else?
DefMacroI('\/', undef, '\ifmmode\@math@italiccorr\else\@text@italiccorr\fi');
DefConstructorI('\@math@italiccorr', undef,
"<ltx:XMHint name='italiccorr'/>",
alias => '\/',
properties => { isSpace => 1 });
DefPrimitiveI('\@text@italiccorr', undef, "", alias => '\/');
# What kind of magic might allow \mskip to translate these back into the above?
DefMacroI('\thinmuskip', undef, "3 mu");
DefMacroI('\medmuskip', undef, "4mu plus 2mu minus 4mu");
DefMacroI('\thickmuskip', undef, "5mu plus 5mu");
#======================================================================
# TeX Book, Appendix B. p. 358
#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.3, Greek, p.41
#----------------------------------------------------------------------
DefMathI('\alpha', undef, "\x{03B1}");
DefMathI('\beta', undef, "\x{03B2}");
DefMathI('\gamma', undef, "\x{03B3}");
DefMathI('\delta', undef, "\x{03B4}");
DefMathI('\epsilon', undef, "\x{03F5}");
DefMathI('\varepsilon', undef, "\x{03B5}");
DefMathI('\zeta', undef, "\x{03B6}");
DefMathI('\eta', undef, "\x{03B7}");
DefMathI('\theta', undef, "\x{03B8}");
DefMathI('\vartheta', undef, "\x{03D1}");
DefMathI('\iota', undef, "\x{03B9}");
DefMathI('\kappa', undef, "\x{03BA}");
DefMathI('\lambda', undef, "\x{03BB}");
DefMathI('\mu', undef, "\x{03BC}");
DefMathI('\nu', undef, "\x{03BD}");
DefMathI('\xi', undef, "\x{03BE}");
DefMathI('\pi', undef, "\x{03C0}");
DefMathI('\varpi', undef, "\x{03D6}");
DefMathI('\rho', undef, "\x{03C1}");
DefMathI('\varrho', undef, "\x{03F1}");
DefMathI('\sigma', undef, "\x{03C3}");
DefMathI('\varsigma', undef, "\x{03C2}");
DefMathI('\tau', undef, "\x{03C4}");
DefMathI('\upsilon', undef, "\x{03C5}");
DefMathI('\phi', undef, "\x{03D5}");
DefMathI('\varphi', undef, "\x{03C6}");
DefMathI('\chi', undef, "\x{03C7}");
DefMathI('\psi', undef, "\x{03C8}");
DefMathI('\omega', undef, "\x{03C9}");
DefMathI('\Gamma', undef, "\x{0393}");
DefMathI('\Delta', undef, "\x{0394}");
DefMathI('\Theta', undef, "\x{0398}");
DefMathI('\Lambda', undef, "\x{039B}");
DefMathI('\Xi', undef, "\x{039E}");
DefMathI('\Pi', undef, "\x{03A0}");
DefMathI('\Sigma', undef, "\x{03A3}");
DefMathI('\Upsilon', undef, "\x{03A5}");
DefMathI('\Phi', undef, "\x{03A6}");
DefMathI('\Psi', undef, "\x{03A8}");
DefMathI('\Omega', undef, "\x{03A9}");
#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.7. Miscellaneous Symbols, p.43
#----------------------------------------------------------------------
# Some should be differential operators, qualifiers, ...
DefMathI('\aleph', undef, "\x{2135}");
DefMathI('\hbar', undef, "\x{210F}", role => 'ID', meaning => 'Planck-constant-over-2-pi');
DefMathI('\imath', undef, "\x{0131}");
DefMathI('\jmath', undef, "\x{0237}");
DefMathI('\ell', undef, "\x{2113}");
DefMathI('\wp', undef, "\x{2118}", meaning => 'Weierstrass-p');
DefMathI('\Re', undef, "\x{211C}", role => 'OPFUNCTION', meaning => 'real-part');
DefMathI('\Im', undef, "\x{2111}", role => 'OPFUNCTION', meaning => 'imaginary-part');
DefMathI('\mho', undef, "\x{2127}");
DefMathI('\prime', undef, "\x{2032}", role => 'SUPOP', locked => 1);
DefMathI('\emptyset', undef, "\x{2205}", role => 'ID', meaning => 'empty-set');
DefMathI('\nabla', undef, "\x{2207}", role => 'OPERATOR');
DefMathI('\surd', undef, "\x{221A}", role => 'OPERATOR', meaning => 'square-root');
DefMathI('\top', undef, "\x{22A4}", role => 'ADDOP', meaning => 'top');
DefMathI('\bot', undef, "\x{22A5}", role => 'ADDOP', meaning => 'bottom');
DefMathI('\|', undef, "\x{2225}", role => 'VERTBAR', name => '||', meaning => 'parallel-to');
DefMathI('\angle', undef, "\x{2220}");
# NOTE: This is probably the wrong role.
# Also, should probably carry info about Binding for OpenMath
DefMathI('\forall', undef, "\x{2200}", role => 'BIGOP', meaning => 'for-all');
DefMathI('\exists', undef, "\x{2203}", role => 'BIGOP', meaning => 'exists');
DefMathI('\neg', undef, UTF(0xAC), role => 'FUNCTION', meaning => 'not');
DefMathI('\lnot', undef, UTF(0xAC), role => 'FUNCTION', meaning => 'not');
DefMathI('\flat', undef, "\x{266D}");
DefMathI('\natural', undef, "\x{266E}");
DefMathI('\sharp', undef, "\x{266F}");
DefMathI('\backslash', undef, UTF(0x5C), role => 'MULOP');
DefMathI('\partial', undef, "\x{2202}", role => 'OPERATOR', meaning => 'partial-differential');
DefMathI('\infty', undef, "\x{221E}", role => 'ID', meaning => 'infinity');
DefMathI('\Box', undef, "\x{25A1}");
DefMathI('\Diamond', undef, "\x{25C7}");
DefMathI('\triangle', undef, "\x{25B3}");
DefMathI('\clubsuit', undef, "\x{2663}");
DefMathI('\diamondsuit', undef, "\x{2662}");
DefMathI('\heartsuit', undef, "\x{2661}");
DefMathI('\spadesuit', undef, "\x{2660}");
#----------------------------------------------------------------------
DefMath('\smallint', "\x{222B}", meaning => 'integral', role => 'INTOP',
font => { size => 9 }, scriptpos => \&doScriptpos, mathstyle => 'text'); # INTEGRAL
#----------------------------------------------------------------------
# Actually LaTeX; Table 3.8. Variable-sized Symbols, p.44.
#----------------------------------------------------------------------
sub doScriptpos {
return (LookupValue('font')->getMathstyle eq 'display' ? 'mid' : 'post'); }
sub doVariablesizeOp {
return (LookupValue('font')->getMathstyle eq 'display' ? 'display' : 'text'); }
DefMathI('\sum', undef, "\x{2211}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'sum',
mathstyle => \&doVariablesizeOp);
DefMathI('\prod', undef, "\x{220F}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'product',
mathstyle => \&doVariablesizeOp);
DefMathI('\coprod', undef, "\x{2210}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'coproduct',
mathstyle => \&doVariablesizeOp);
DefMathI('\int', undef, "\x{222B}",
role => 'INTOP',
meaning => 'integral',
mathstyle => \&doVariablesizeOp);
DefMathI('\oint', undef, "\x{222E}",
role => 'INTOP',
meaning => 'contour-integral',
mathstyle => \&doVariablesizeOp);
DefMathI('\bigcap', undef, "\x{22C2}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'intersection',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigcup', undef, "\x{22C3}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'union',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigsqcup', undef, "\x{2294}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'square-union',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigvee', undef, "\x{22C1}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'or',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigwedge', undef, "\x{22C0}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'and',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigodot', undef, "\x{2299}",
role => 'SUMOP', #meaning=> ?
scriptpos => \&doScriptpos,
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigotimes', undef, "\x{2297}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'tensor-product',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\bigoplus', undef, "\x{2295}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'direct-sum',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefMathI('\biguplus', undef, "\x{228E}",
role => 'SUMOP',
scriptpos => \&doScriptpos,
meaning => 'symmetric-difference',
mathstyle => \&doVariablesizeOp,
font => { size => 'Big' });
DefConstructorI('\limits', undef, sub {
my $node = $_[0]->getElement;
$_[0]->setAttribute($_[0]->getLastChildElement($node) || $node, scriptpos => 'mid'); });
DefConstructorI('\nolimits', undef, sub {
my $node = $_[0]->getElement;
$node = $_[0]->getLastChildElement($node) || $node;
$node->removeAttribute('scriptpos'); }); # default is 'post', so we can just remove the attrib.
DefConstructorI('\displaylimits', undef, sub {
my ($document, %props) = @_;
my $node = $_[0]->getElement;
$node = $_[0]->getLastChildElement($node) || $node;
if (($props{mathstyle} || 'text') eq 'display') {
$document->setAttribute($node, scriptpos => 'mid'); }
else {
$node->removeAttribute('scriptpos'); } },
properties => sub { (mathstyle => LookupValue('font')->getMathstyle); });
#----------------------------------------------------------------------
# Actually from LaTeX; Table 3.4. Binary Operation Symbols, p.42
#----------------------------------------------------------------------
DefMathI('\pm', undef, UTF(0xB1), role => 'ADDOP', meaning => 'plus-or-minus');
DefMathI('\mp', undef, "\x{2213}", role => 'ADDOP', meaning => 'minus-or-plus');
DefMathI('\times', undef, UTF(0xD7), role => 'MULOP', meaning => 'times');
DefMathI('\div', undef, UTF(0xF7), role => 'MULOP', meaning => 'divide');
DefMathI('\ast', undef, "\x{2217}", role => 'MULOP');
DefMathI('\star', undef, "\x{22C6}", role => 'MULOP');
DefMathI('\circ', undef, "\x{2218}", role => 'MULOP', meaning => 'compose');
DefMathI('\bullet', undef, "\x{2219}", role => 'MULOP');
DefMathI('\cdot', undef, "\x{22C5}", role => 'MULOP');
## , meaning=>'inner-product'); that's pushing it a bit far...
# Need to classify set operations more carefully....
DefMathI('\cap', undef, "\x{2229}", role => 'ADDOP', meaning => 'intersection');
DefMathI('\cup', undef, "\x{222A}", role => 'ADDOP', meaning => 'union');
DefMathI('\uplus', undef, "\x{228E}", role => 'ADDOP');
DefMathI('\sqcap', undef, "\x{2293}", role => 'ADDOP', meaning => 'square-intersection');
DefMathI('\sqcup', undef, "\x{2294}", role => 'ADDOP', meaning => 'square-union');
DefMathI('\vee', undef, "\x{2228}", role => 'ADDOP', meaning => 'or');
DefMathI('\lor', undef, "\x{2228}", role => 'ADDOP', meaning => 'or');
DefMathI('\wedge', undef, "\x{2227}", role => 'ADDOP', meaning => 'and');
DefMathI('\land', undef, "\x{2227}", role => 'ADDOP', meaning => 'and');
DefMathI('\setminus', undef, "\x{2216}", role => 'ADDOP', meaning => 'set-minus');
DefMathI('\wr', undef, "\x{2240}", role => 'MULOP');
# Should this block be ADDOP or something else?
DefMathI('\diamond', undef, "\x{22C4}", role => 'ADDOP');
DefMathI('\bigtriangleup', undef, "\x{25B3}", role => 'ADDOP');
DefMathI('\bigtriangledown', undef, "\x{25BD}", role => 'ADDOP');
DefMathI('\triangleleft', undef, "\x{25C1}", role => 'ADDOP');
DefMathI('\triangleright', undef, "\x{25B7}", role => 'ADDOP');
DefMathI('\lhd', undef, "\x{22B2}", role => 'ADDOP', meaning => 'subgroup-of');
DefMathI('\rhd', undef, "\x{22B3}", role => 'ADDOP', meaning => 'contains-as-subgroup');
DefMathI('\unlhd', undef, "\x{22B4}", role => 'ADDOP', meaning => 'subgroup-of-or-equals');
DefMathI('\unrhd', undef, "\x{22B5}", role => 'ADDOP', meaning => 'contains-as-subgroup-or-equals');
DefMathI('\oplus', undef, "\x{2295}", role => 'ADDOP', meaning => 'direct-sum');
DefMathI('\ominus', undef, "\x{2296}", role => 'ADDOP', meaning => 'symmetric-difference');
DefMathI('\otimes', undef, "\x{2297}", role => 'MULOP', meaning => 'tensor-product');
DefMathI('\oslash', undef, "\x{2298}", role => 'MULOP');
DefMathI('\odot', undef, "\x{2299}", role => 'MULOP', meaning => 'direct-product');
DefMathI('\bigcirc', undef, "\x{25CB}", role => 'MULOP');
DefMathI('\dagger', undef, "\x{2020}", role => 'MULOP');
DefMathI('\ddagger', undef, "\x{2021}", role => 'MULOP');
DefMathI('\amalg', undef, "\x{2210}", role => 'MULOP', meaning => 'coproduct');
#----------------------------------------------------------------------
# LaTeX; Table 3.5. Relation Symbols, p.43
#----------------------------------------------------------------------
DefMathI('\leq', undef, "\x{2264}", role => 'RELOP', meaning => 'less-than-or-equals');
DefMathI('\prec', undef, "\x{227A}", role => 'RELOP', meaning => 'precedes');
DefMathI('\preceq', undef, "\x{2AAF}", role => 'RELOP', meaning => 'precedes-or-equals');
DefMathI('\ll', undef, "\x{226A}", role => 'RELOP', meaning => 'much-less-than');
DefMathI('\subset', undef, "\x{2282}", role => 'RELOP', meaning => 'subset-of');
DefMathI('\subseteq', undef, "\x{2286}", role => 'RELOP', meaning => 'subset-of-or-equals');
DefMathI('\sqsubset', undef, "\x{228F}", role => 'RELOP', meaning => 'square-image-of');
DefMathI('\sqsubseteq', undef, "\x{2291}", role => 'RELOP', meaning => 'square-image-of-or-equals');
DefMathI('\in', undef, "\x{2208}", role => 'RELOP', meaning => 'element-of');
DefMathI('\vdash', undef, "\x{22A2}", role => 'METARELOP', meaning => 'proves');
DefMathI('\geq', undef, "\x{2265}", role => 'RELOP', meaning => 'greater-than-or-equals');
DefMathI('\succ', undef, "\x{227B}", role => 'RELOP', meaning => 'succeeds');
DefMathI('\succeq', undef, "\x{2AB0}", role => 'RELOP', meaning => 'succeeds-or-equals');
DefMathI('\gg', undef, "\x{226B}", role => 'RELOP', meaning => 'much-greater-than');
DefMathI('\supset', undef, "\x{2283}", role => 'RELOP', meaning => 'superset-of');
DefMathI('\supseteq', undef, "\x{2287}", role => 'RELOP', meaning => 'superset-of-or-equals');
DefMathI('\sqsupset', undef, "\x{2290}", role => 'RELOP', meaning => 'square-original-of');
DefMathI('\sqsupseteq', undef, "\x{2292}", role => 'RELOP', meaning => 'square-original-of-or-equals');
DefMathI('\ni', undef, "\x{220B}", role => 'RELOP', meaning => 'contains');
DefMathI('\dashv', undef, "\x{22A3}", role => 'METARELOP', meaning => 'does-not-prove');
# I have the impression think that "identical" is a stronger notion than "equivalence"
# Note that the unicode here is called "Identical To",
# and that the notion of "equivalent to" usually involves the tilde operator.
DefMathI('\equiv', undef, "\x{2261}", role => 'RELOP', meaning => 'equivalent-to');
DefMathI('\sim', undef, "\x{223C}", role => 'RELOP', meaning => 'similar-to');
DefMathI('\simeq', undef, "\x{2243}", role => 'RELOP', meaning => 'similar-to-or-equals');
DefMathI('\asymp', undef, "\x{224D}", role => 'RELOP', meaning => 'asymptotically-equals');
DefMathI('\approx', undef, "\x{2248}", role => 'RELOP', meaning => 'approximately-equals');
DefMathI('\cong', undef, "\x{2245}", role => 'RELOP', meaning => 'approximately-equals');
DefMathI('\neq', undef, "\x{2260}", role => 'RELOP', meaning => 'not-equals');
DefMathI('\doteq', undef, "\x{2250}", role => 'RELOP', meaning => 'approaches-limit');
DefMathI('\notin', undef, "\x{2209}", role => 'RELOP', meaning => 'not-element-of');
DefMathI('\models', undef, "\x{22A7}", role => 'RELOP', meaning => 'models');
DefMathI('\perp', undef, "\x{27C2}", role => 'RELOP', meaning => 'perpendicular-to');
DefMathI('\mid', undef, "\x{2223}", role => 'VERTBAR'); # DIVIDES (RELOP?) ?? well, sometimes...
DefMathI('\parallel', undef, "\x{2225}", role => 'VERTBAR', meaning => 'parallel-to');
DefMathI('\bowtie', undef, "\x{22C8}", role => 'RELOP'); # BOWTIE
DefMathI('\Join', undef, "\x{2A1D}", role => 'RELOP', meaning => 'join');
DefMathI('\smile', undef, "\x{2323}", role => 'RELOP'); # SMILE
DefMathI('\frown', undef, "\x{2322}", role => 'RELOP'); # FROWN
DefMathI('\propto', undef, "\x{221D}", role => 'RELOP', meaning => 'proportional-to');
# TeX defines these as alternate names...
Let('\le', '\leq');
Let('\ge', '\geq');
Let('\ne', '\neq');
# And it defines some others as alternate names, but they seem to
# potentially imply slightly different meanings??? Leave them out for now..
#----------------------------------------------------------------------
# Not; (Is fullwidth solidus appropriate for when \not appears in isolation?)
DefMathI('\not', undef, "\x{FF0F}", role => 'OPFUNCTION', meaning => 'not');
# Match negations of many operators
our %NOTS = ('=' => "\x{2260}", '<' => "\x{226E}", '>' => "\x{226F}",
"\x{2208}" => "\x{2209}", #\in=>\notin
"\x{2264}" => "\x{2270}", "\x{2265}" => "\x{2271}", # Less eq, greater eq.
"\x{227A}" => "\x{2280}", "\x{227B}" => "\x{2281}", # prec, succ
"\x{2AAF}" => "\x{22E0}", "\x{2AB0}" => "\x{22E1}", # preceq, succeq
"\x{2282}" => "\x{2284}", "\x{2283}" => "\x{2285}", # subset, supset
"\x{2286}" => "\x{2288}", "\x{2287}" => "\x{2289}", # subseteq, supseteq
"\x{2291}" => "\x{22E2}", "\x{2290}" => "\x{22E3}", # sqsubseteq, sqsupseteq
"\x{2261}" => "\x{2262}", # equiv
"\x{224D}" => "\x{226D}", "\x{2248}" => "\x{2249}", # asymp, approx
"\x{22B2}" => "\x{22EA}", "\x{22B3}" => "\x{22EB}", # lhd, rhd
"\x{22B4}" => "\x{22EC}", "\x{22B5}" => "\x{22ED}", # unlhd, unrhd
"\x{2203}" => "\x{2204}", # Exists
);
# For a \not operator that is followed by anything, concoct an appropriate not or cancelation.
DefRewrite(select => ["descendant-or-self::ltx:XMTok[text()='\x{FF0F}' and \@meaning='not']"
. "[ following-sibling::*]", 2],
replace => sub {
my ($doc, $not, $thing) = @_;
my $text = ($doc->getModel->getNodeQName($thing) eq 'ltx:XMTok')
&& $thing->textContent;
if ((!defined $text) || (length($text) != 1)) { # Not simple char token.
my $box = $doc->getNodeBox($not);
$doc->openElement('ltx:XMApp', _box => $box); # Wrap with a cancel op
my $strike = $doc->insertMathToken(undef, role => 'ENCLOSE', enclose => 'updiagonalstrike',
meaning => 'not', _box => $box);
if (my $id = $not->getAttribute('xml:id')) {
$not->removeAttribute('xml:id');
$doc->unRecordID($id);
$doc->setAttribute($strike, 'xml:id' => $id); }
$doc->getNode->appendChild($thing);
$doc->closeElement('ltx:XMApp'); }
else {
# For simple tokens, we'll modify the relevant content & attributes
# [children removed, id's presumably ignorable]
map { $_->unbindNode() } $thing->childNodes;
my $new = defined $NOTS{$text} ? $NOTS{$text} : $text . "\x{0338}";
$thing->appendText($new);
if (my $meaning = $thing->getAttribute('meaning')) {
$doc->setAttribute($thing, meaning => "not-$meaning"); }
if (my $name = $thing->getAttribute('name') || $text) {
$doc->setAttribute($thing, name => "not-$name"); }
# and put the node back in
$doc->getNode->appendChild($thing);
# Since the <not> element is disappearing, if it had an id that was referenced...!?!?
if (my $id = $not->getAttribute('xml:id')) {
foreach my $n ($doc->findnodes("descendant-or-self::ltx:XMRef[\@idref='$id']")) {
$doc->removeNode($n); } } # ? Hopefully this is safe.
} });
#----------------------------------------------------------------------
# \joinrel
DefMathI('\relbar', undef, "-", role => 'RELOP'); # ???
DefMathI('\Relbar', undef, "=", role => 'RELOP'); # ???
# \joinrel is \mathrel{\mkern-3\mu}
# Ah, but the Effect is to join 2 "relations" into one!
DefPrimitiveI('\joinrel', undef, sub {
my ($stomach, $op) = @_;
my $gullet = $stomach->getGullet;
$gullet->skipSpaces;
my $left = $LaTeXML::LIST[-1];
if (!$left) { # Nothing there?...
return (); } # I guess this becomes a no-op???
else {
pop(@LaTeXML::LIST);
my @stuff = ();
while (my $tok = $gullet->readXToken(0)) {
@stuff = $stomach->invokeToken($tok);
last if @stuff; }
return () unless @stuff; # no-op ????
my $right = shift(@stuff);
(@stuff,
LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS('\@@joinrel')), [$left, $right],
locator => $gullet->getLocator,
font => $right->getFont, isMath => 1)); } });
DefConstructor('\@@joinrel{}{}', sub {
my ($document, $left, $right) = @_;
$document->absorb($left);
$document->absorb($right);
# Now if last 2 items are XMTok, replace by a single token with joined content (& attr?)
my $node = $document->getNode;
my @nodes = $document->getChildElements($node);
if (scalar(@nodes) >= 2) {
my @rels = ($nodes[-2], $nodes[-1]);
if (grep { $document->getNodeQName($_) eq 'ltx:XMTok' } @rels) {
my %roles = ();
map { $roles{ $_->getAttribute('role') } = 1 } @rels;
my $role = (scalar(keys %roles) == 1 ? [keys %roles]->[0] : ($roles{ARROW} ? 'ARROW' : 'RELOP'));
map { $node->removeChild($_) } @rels;
$document->insertElement('ltx:XMTok', [map { $_->textContent } @rels], role => $role);
} } },
reversion => '#1\joinrel #2');
#----------------------------------------------------------------------
# LaTeX; Table 3.6. Arrow Symbols, p.43
#----------------------------------------------------------------------
# Arrows get treated somewhat like relations (or meta-relations),
# but it's hard to associate any particular "meaning" to them.
DefMathI('\leftarrow', undef, "\x{2190}", role => 'ARROW'); # LEFTWARDS ARROW
DefMathI('\Leftarrow', undef, "\x{21D0}", role => 'ARROW'); # LEFTWARDS DOUBLE ARROW
DefMathI('\rightarrow', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW
DefMathI('\Rightarrow', undef, "\x{21D2}", role => 'ARROW'); # RIGHTWARDS DOUBLE ARROW
DefMathI('\leftrightarrow', undef, "\x{2194}", role => 'METARELOP'); # LEFT RIGHT ARROW
DefMathI('\Leftrightarrow', undef, "\x{21D4}", role => 'METARELOP'); # LEFT RIGHT DOUBLE ARROW
DefMathI('\iff', undef, "\x{21D4}", role => 'METARELOP', meaning => 'iff'); # LEFT RIGHT DOUBLE ARROW
DefMathI('\mapsto', undef, "\x{21A6}", role => 'ARROW', meaning => 'maps-to');
DefMathI('\hookleftarrow', undef, "\x{21A9}", role => 'ARROW'); # LEFTWARDS ARROW WITH HOOK
DefMathI('\leftharpoonup', undef, "\x{21BC}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB UPWARDS
DefMathI('\leftharpoondown', undef, "\x{21BD}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB DOWNWARDS
DefMathI('\rightleftharpoons', undef, "\x{21CC}", role => 'METARELOP'); # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
DefMathI('\longleftarrow', undef, "\x{27F5}", role => 'ARROW'); # LONG LEFTWARDS ARROW
DefMathI('\Longleftarrow', undef, "\x{27F8}", role => 'ARROW'); # LONG LEFTWARDS DOUBLE ARROW
DefMathI('\longrightarrow', undef, "\x{27F6}", role => 'ARROW'); # LONG RIGHTWARDS ARROW
DefMathI('\Longrightarrow', undef, "\x{27F9}", role => 'ARROW'); # LONG RIGHTWARDS DOUBLE ARROW
DefMathI('\longleftrightarrow', undef, "\x{27F7}", role => 'METARELOP'); # LONG LEFT RIGHT ARROW
DefMathI('\Longleftrightarrow', undef, "\x{27FA}", role => 'METARELOP'); # LONG LEFT RIGHT DOUBLE ARROW
DefMathI('\longmapsto', undef, "\x{27FC}", role => 'ARROW'); # LONG RIGHTWARDS ARROW FROM BAR
DefMathI('\hookrightarrow', undef, "\x{21AA}", role => 'ARROW'); # RIGHTWARDS ARROW WITH HOOK
DefMathI('\rightharpoonup', undef, "\x{21C0}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB UPWARDS
DefMathI('\rightharpoondown', undef, "\x{21C1}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
DefMathI('\leadsto', undef, "\x{219D}", role => 'ARROW', meaning => 'leads-to');
DefMathI('\uparrow', undef, "\x{2191}", role => 'ARROW'); # UPWARDS ARROW
DefMathI('\Uparrow', undef, "\x{21D1}", role => 'ARROW'); # UPWARDS DOUBLE ARROW
DefMathI('\downarrow', undef, "\x{2193}", role => 'ARROW'); # DOWNWARDS ARROW
DefMathI('\Downarrow', undef, "\x{21D3}", role => 'ARROW'); # DOWNWARDS DOUBLE ARROW
DefMathI('\updownarrow', undef, "\x{2195}", role => 'ARROW'); # UP DOWN ARROW
DefMathI('\Updownarrow', undef, "\x{21D5}", role => 'ARROW'); # UP DOWN DOUBLE ARROW
DefMathI('\nearrow', undef, "\x{2197}", role => 'ARROW'); # NORTH EAST ARROW
DefMathI('\searrow', undef, "\x{2198}", role => 'ARROW'); # SOUTH EAST ARROW
DefMathI('\swarrow', undef, "\x{2199}", role => 'ARROW'); # SOUTH WEST ARROW
DefMathI('\nwarrow', undef, "\x{2196}", role => 'ARROW'); # NORTH WEST ARROW
# \mapstochar (3237), \lhook(312C), \rhook(312D)
# These are really wrong; I can't find the right Unicode Glyphs.
# These are only fragments intended to be assembled into meaningful(?) symbols.
DefMathI('\mapstochar', undef, "\x{2E20}"); # TeX 3237
DefMathI('\lhook', undef, "\x{2E26}"); # TeX 312C
DefMathI('\rhook', undef, "\x{2E27}"); # TeX 312D
#======================================================================
# TeX Book, Appendix B. p. 359
# Ah, since \ldots can appear in text and math....
DefConstructorI('\ldots', undef,
"?#isMath(<ltx:XMTok name='ldots' font='#font' role='ID'>\x{2026}</ltx:XMTok>)(\x{2026})",
properties => sub {
(LookupValue('IN_MATH')
? (font => LookupValue('font')->merge(family => 'serif',
series => 'medium', shape => 'upright')->specialize("\x{2026}"))
: ()); }); # Since not DefMath!
# And so can \vdots
DefConstructorI('\vdots', undef,
"?#isMath(<ltx:XMTok name='vdots' font='#font' role='ID'>\x{22EE}</ltx:XMTok>)(\x{22EE})",
properties => sub {
(LookupValue('IN_MATH')
? (font => LookupValue('font')->merge(family => 'serif',
series => 'medium', shape => 'upright')->specialize("\x{22EE}"))
: ()); }); # Since not DefMath!
# But not these!
DefMathI('\cdots', undef, "\x{22EF}", role => 'ID'); # MIDLINE HORIZONTAL ELLIPSIS
DefMathI('\ddots', undef, "\x{22F1}", role => 'ID'); # DOWN RIGHT DIAGONAL ELLIPSIS
DefMathI('\colon', undef, ':', role => 'METARELOP'); # Seems like good default role
# Note that amsmath redefines \dots to be `smart'.
# Aha, also can be in text...
DefConstructorI('\dots', undef,
"?#isMath(<ltx:XMTok name='dots' font='#font' role='ID'>\x{2026}</ltx:XMTok>)(\x{2026})",
properties => sub {
(LookupValue('IN_MATH')
? (font => LookupValue('font')->merge(family => 'serif',
series => 'medium', shape => 'upright')->specialize("\x{2026}"))
: ()); }); # Since not DefMath!
# And while we're at it...
# Pretest for XMath to keep from interpreting math that the DOM may not allow!!
##DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'\cdot\cdot\cdot',replace=>'\cdots');
DefMathLigature("\x{22C5}\x{22C5}\x{22C5}" => "\x{22EF}", role => 'ID', name => 'cdots');
DefLigature(qr{\.\.\.}, "\x{2026}", fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # ldots
#DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'...',replace=>'\ldots');
DefMathLigature("..." => "\x{2026}", role => 'ID', name => 'ldots');
#----------------------------------------------------------------------
# Math Accents.
#----------------------------------------------------------------------
# LaTeX; Table 3.11. Math Mode Accents, p.50.
# Are these all TeX (or LaTeX)?
# Note that most of these should NOT be stretchy, by default!
DefMath('\hat Digested', UTF(0x5E),
operator_role => 'OVERACCENT', operator_stretchy => 'false');
DefMath('\check Digested', "\x{02C7}",
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # CARON
DefMath('\breve Digested', "\x{02D8}", operator_role => 'OVERACCENT'); # BREVE
DefMath('\acute Digested', UTF(0xB4), operator_role => 'OVERACCENT'); # ACUTE ACCENT
DefMath('\grave Digested', UTF(0x60), operator_role => 'OVERACCENT'); # GRAVE ACCENT
DefMath('\tilde Digested', UTF(0x7E),
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # TILDE
DefMath('\bar Digested', UTF(0xAF),
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # MACRON
DefMath('\vec Digested', "\x{2192}",
operator_role => 'OVERACCENT', operator_stretchy => 'false'); # RIGHTWARDS ARROW
DefMath('\dot Digested', "\x{02D9}", operator_role => 'OVERACCENT'); # DOT ABOVE
DefMath('\ddot Digested', UTF(0xA8), operator_role => 'OVERACCENT'); # DIAERESIS
DefMath('\overline Digested', UTF(0xAF), operator_role => 'OVERACCENT'); # MACRON
DefMath('\overbrace Digested', "\x{23DE}", operator_role => 'OVERACCENT', # TOP CURLY BRACKET
scriptpos => 'mid');
DefMath('\widehat Digested', UTF(0x5E), operator_role => 'OVERACCENT'); # CIRCUMFLEX ACCENT [plain? also amsfonts]
DefMath('\widetilde Digested', UTF(0x7E), operator_role => 'OVERACCENT'); # TILDE [plain? also amsfonts]
DefMath('\underbrace Digested', "\x{23DF}", operator_role => 'UNDERACCENT', # BOTTOM CURLY BRACKET
scriptpos => 'mid');
# NOTE that all the above accents REQUIRE math mode
# EXCEPT underline, overrightarrow and overleftarrow!
DefMath('\math@underline{}', UTF(0xAF), operator_role => 'UNDERACCENT',
name => 'underline', alias => '\underline');
DefConstructor('\text@underline{}', "<ltx:text framed='underline' _noautoclose='1'>#1</ltx:text>");
DefMath('\math@overrightarrow{}', "\x{2192}", operator_role => 'OVERACCENT',
name => 'overrightarrow', alias => '\overrightarrow');
DefMath('\math@overleftarrow{}', "\x{2190}", operator_role => 'OVERACCENT',
name => 'overleftarrow', alias => '\overleftarrow');
# Careful: Use \protect so that it doesn't expand too early in alignments, etc.
DefMacro('\underline{}', '\protect\ifmmode\math@underline{#1}\else\text@underline{#1}\fi');
Let('\underbar', '\underline'); # Will anyone notice?
DefMacro('\overrightarrow{}', '\protect\ifmmode\math@overrightarrow{#1}\else$\math@overrightarrow{#1}$\fi');
DefMacro('\overleftarrow{}', '\protect\ifmmode\math@overleftarrow{#1}\else$\math@overleftarrow{#1}$\fi');
#----------------------------------------------------------------------
# LaTeX; Table 3.10. Delimiters, p.47
#----------------------------------------------------------------------
# The meaning of OPEN/CLOSE tends to depend upon the pairing,
# rather than the individual tokens.
# This meaning is handled in MathParser (for now)
DefMacroI('\{', undef, '\ifmmode\lx@math@lbrace\else\lx@text@lbrace\fi');
DefMacroI('\}', undef, '\ifmmode\lx@math@rbrace\else\lx@text@rbrace\fi');
DefMathI('\lx@math@lbrace', undef, '{', role => 'OPEN', stretchy => 'false', alias => '\{');
DefMathI('\lx@math@rbrace', undef, '}', role => 'CLOSE', stretchy => 'false', alias => '\}');
DefPrimitiveI('\lx@text@lbrace', undef, '{', alias => '\{',
# font => { specialize => "{" });
font => { shape => 'upright' }, bounded => 1); # Since not DefMath!
DefPrimitiveI('\lx@text@rbrace', undef, '}', alias => '\}',
# font => { specialize => "}" }); # Since not DefMath!
font => { shape => 'upright' }, bounded => 1); # Since not DefMath!
Let('\lbrace', '\{');
Let('\lbrack', T_OTHER('['));
Let('\rbrace', '\}');
Let('\rbrack', T_OTHER(']'));
DefMathI('\lceil', undef, "\x{2308}", role => 'OPEN', stretchy => 'false'); # LEFT CEILING
DefMathI('\rceil', undef, "\x{2309}", role => 'CLOSE', stretchy => 'false'); # RIGHT CEILING
DefMathI('\lfloor', undef, "\x{230A}", role => 'OPEN', stretchy => 'false'); # LEFT FLOOR
DefMathI('\rfloor', undef, "\x{230B}", role => 'CLOSE', stretchy => 'false'); # RIGHT FLOOR
# Note: We should be using 27E8,27E9, which are "mathematical", not 2329,232A
DefMathI('\langle', undef, "\x{27E8}", role => 'OPEN', stretchy => 'false'); # LEFT-POINTING ANGLE BRACKET
DefMathI('\rangle', undef, "\x{27E9}", role => 'CLOSE', stretchy => 'false'); # RIGHT-POINTING ANGLE BRACKET
# Not sure these should be defined here, or latex, or even latex compat mode.
DefMathI('\lgroup', undef, "(", font => { series => 'bold' }, role => 'OPEN', stretchy => 'false');
DefMathI('\rgroup', undef, ")", font => { series => 'bold' }, role => 'CLOSE', stretchy => 'false');
DefMathI('\bracevert', undef, "|", font => { series => 'bold' }, role => 'VERTBAR');
## DefMath('\lmoustache',"???", font=>{series=>'bold'}, role=>'OPEN');
## DefMath('\rmoustache',"???", font=>{series=>'bold'}, role=>'OPEN');
# TeX marks some symbols as delimiters which can be used with \left,\right,
# but many of which have different grammatical roles otherwise, eg. arrows, <, >.
# Short of setting up TeX's complicated encoding machinery, I need an explicit
# mapping. Unfortunately, this doesn't (yet) support people declaring thier own delimiters!
# This duplicates in slightly different way what DefMath has put together.
our %DELIMITER_MAP =
('(' => { char => "(", lrole => 'OPEN', rrole => 'CLOSE' },
')' => { char => ")", lrole => 'OPEN', rrole => 'CLOSE' },
'[' => { char => "[", lrole => 'OPEN', rrole => 'CLOSE' },
']' => { char => "]", lrole => 'OPEN', rrole => 'CLOSE' },
'\{' => { char => "{", lrole => 'OPEN', rrole => 'CLOSE' },
'\}' => { char => "}", lrole => 'OPEN', rrole => 'CLOSE' },
'\lfloor' => { char => "\x{230A}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lfloor' },
'\rfloor' => { char => "\x{230B}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rfloor' },
'\lceil' => { char => "\x{2308}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lceil' },
'\rceil' => { char => "\x{2309}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rceil' },
'\langle' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' },
'\rangle' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' },
'<' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' },
'>' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' },
'/' => { char => "/", lrole => 'MULOP', rrole => 'MULOP' },
'\backslash' => { char => UTF(0x5C), lrole => 'MULOP', rrole => 'MULOP', name => 'backslash' },
'|' => { char => "|", lrole => 'VERTBAR', rrole => 'VERTBAR' },
'\|' => { char => "\x{2225}", lrole => 'VERTBAR', rrole => 'VERTBAR' },
'\uparrow' => { char => "\x{2191}", lrole => 'OPEN', rrole => 'CLOSE', name => 'uparrow' }, # ??
'\Uparrow' => { char => "\x{21D1}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Uparrow' }, # ??
'\downarrow' => { char => "\x{2193}", lrole => 'OPEN', rrole => 'CLOSE', name => 'downarrow' }, # ??
'\Downarrow' => { char => "\x{21D3}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Downarrow' }, # ??
'\updownarrow' => { char => "\x{2195}", lrole => 'OPEN', rrole => 'CLOSE', name => 'updownarrow' }, # ??
'\Updownarrow' => { char => "\x{21D5}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Updownarrow' }, # ??
);
# With new treatment of Simple Symbols as just Box's with assigned attributes,
# we're not getting whatsits, and so we're not looking them up the same way!!!
# TEMPORARILY (?) hack the Delimiter map
foreach my $entry (values %DELIMITER_MAP) {
$DELIMITER_MAP{ $$entry{char} } = $entry; }
sub lookup_delimiter {
my ($delim) = @_;
return $DELIMITER_MAP{$delim}; }
# This is a little messier than you'd think.
# These effectively create a group between the \left,\right.
# And this also gives us a single list of things to parse separately.
# Since \left,\right are TeX, primitives and must be paired up,
# we use a bit of macro trickery to simulate.
# [The \@hidden@bgroup/egroup keep from putting a {} into the UnTeX]
# HOWEVER, an additional complication is that it is a common mistake to omit the balancing \right!
# Using an \egroup (or hidden) makes it hard to recover, so use a special egroup
DefMacro('\left XToken', '\@left #1\@hidden@bgroup');
# Like \@hidden@egroup, but softer about missing \left
DefConstructor('\right@hidden@egroup', '',
afterDigest => sub {
my ($stomach) = @_;
if ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?!
|| $STATE->lookupValue('groupNonBoxing')) { # or group was opened with \begingroup
Error('unexpected', '\right', undef, "Unbalanced \\right, no balancing \\left."); }
else {
$stomach->egroup; } },
reversion => '');
DefMacro('\right XToken', '\right@hidden@egroup\@right #1');
DefConstructor('\@left Token',
"?#char(<ltx:XMTok role='#role' name='#name' stretchy='#stretchy'>#char</ltx:XMTok>)"
. "(?#hint(<ltx:XMHint/>)(#1))",
afterDigest => sub { my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
my $delim = ToString($arg);
if ($delim eq '.') {
$whatsit->setProperty(hint => 1); }
elsif (my $entry = $DELIMITER_MAP{$delim}) {
$whatsit->setProperties(role => $$entry{lrole},
char => $$entry{char},
name => $$entry{name},
stretchy => 'true');
$whatsit->setFont($arg->getFont()); }
elsif (($arg->getProperty('role') || '') eq 'OPEN') {
$arg->setProperty(stretchy => 'true'); }
else {
Warn('unexpected', $delim, $stomach,
"Missing delimiter; '.' inserted"); }
return; },
alias => '\left');
DefConstructor('\@right Token',
"?#char(<ltx:XMTok role='#role' name='#name' stretchy='#stretchy'>#char</ltx:XMTok>)"
. "(?#hint(<ltx:XMHint/>)(#1))",
afterDigest => sub { my ($stomach, $whatsit) = @_;
my $arg = $whatsit->getArg(1);
my $delim = ToString($arg);
if ($delim eq '.') {
$whatsit->setProperty(hint => 1); }
elsif (my $entry = $DELIMITER_MAP{$delim}) {
$whatsit->setProperties(role => $$entry{rrole},
char => $$entry{char},
name => $$entry{name},
stretchy => 'true');
$whatsit->setFont($arg->getFont()); }
elsif (($arg->getProperty('role') || '') eq 'CLOSE') {
$arg->setProperty(stretchy => 'true'); }
else {
Warn('unexpected', $delim, $stomach,
"Missing delimiter; '.' inserted)"); }
return; },
alias => '\right');
# These originally had Token as parameter, rather than {}..... Why?
# Note that in TeX, \big{((} will only enlarge the 1st paren!!!
DefConstructor('\big {}', '#1', bounded => 1, font => { size => 'big' });
DefConstructor('\Big {}', '#1', bounded => 1, font => { size => 'Big' });
DefConstructor('\bigg {}', '#1', bounded => 1, font => { size => 'bigg' });
DefConstructor('\Bigg {}', '#1', bounded => 1, font => { size => 'Bigg' });
sub addDelimiterRole {
my ($document, $role) = @_;
my $current = $document->getNode;
my $delim = $document->getLastChildElement($current) || $current;
my $delim_role = (($delim && ($delim->nodeType == XML_ELEMENT_NODE) && $delim->getAttribute('role')) || '<none>');
# if there is some delimiter-like role on the "delimiter", switch it, otherwise, leave it alone!
if ($delim && ($delim_role =~ /^(OPEN|MIDDLE|CLOSE|VERTBAR|<none>)$/)) {
## Maybe we shouldn't switch VERTBAR ?
## The catch is that occasionally people use a single \Bigl (or whatever)
## where they should have used a \Big
$document->setAttribute($delim, role => $role); }
return; }
# The "m" versions are defined in e-Tex and other places.
DefConstructor('\bigl {}', '#1', bounded => 1, font => { size => 'big' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\bigm {}', '#1', bounded => 1, font => { size => 'big' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\bigr {}', '#1', bounded => 1, font => { size => 'big' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
DefConstructor('\Bigl {}', '#1', bounded => 1, font => { size => 'Big' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\Bigm {}', '#1', bounded => 1, font => { size => 'Big' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\Bigr {}', '#1', bounded => 1, font => { size => 'Big' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
DefConstructor('\biggl {}', '#1', bounded => 1, font => { size => 'bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\biggm {}', '#1', bounded => 1, font => { size => 'bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\biggr {}', '#1', bounded => 1, font => { size => 'bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
DefConstructor('\Biggl {}', '#1', bounded => 1, font => { size => 'Bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); });
DefConstructor('\Biggm {}', '#1', bounded => 1, font => { size => 'Bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); });
DefConstructor('\Biggr {}', '#1', bounded => 1, font => { size => 'Bigg' },
afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); });
Let('\vert', T_OTHER('|'));
Let('\Vert', '\|');
#======================================================================
# TeX Book, Appendix B. p. 360
# \choose, et al, already handle above.
# Note that in TeX, all 4 args get digested(!)
# and the choice is made when absorbing!
DefConstructor('\mathchoice Digested Digested Digested Digested', sub {
my ($document, $d, $t, $s, $ss, %props) = @_;
my $style = $props{mathstyle};
my $choice = ($style eq 'display' ? $d
: ($style eq 'text' ? $t
: ($style eq 'script' ? $s
: $ss)));
$document->absorb($choice); },
properties => { mathstyle => sub { LookupValue('font')->getMathstyle; } });
DefMacro('\mathpalette{}{}',
'\mathchoice{#1\displaystyle{#2}}{#1\textstyle{#2}}'
. '{#1\scriptstyle{#2}}{#1\scriptscriptstyle{#2}}');
DefConstructor('\phantom{}',
"?#isMath(<ltx:XMHint name='phantom'/>)"
. "(<ltx:text class='ltx_phantom'>#1</ltx:text>)", # !?!?!?!
properties => { isSpace => 1 });
DefConstructor('\hphantom{}', "?#isMath(<ltx:XMHint name='hphantom'/>)"
. "(<ltx:text class='ltx_phantom'>#1</ltx:text>)", # !?!?!?!
properties => { isSpace => 1 });
DefConstructor('\vphantom{}', "?#isMath(<ltx:XMHint name='vphantom'/>)"
. "(<ltx:text class='ltx_phantom'>#1</ltx:text>)", # !?!?!?!
properties => { isSpace => 1 });
DefConstructor('\mathstrut', "?#isMath(<ltx:XMHint name='mathstrut'/>)()",
properties => { isSpace => 1 });
DefConstructor('\smash{}', "#1"); # well, what?
#======================================================================
# TeX Book, Appendix B. p. 361
# This is actually LaTeX's definition, but let's just do it this way.
DefConstructor('\sqrt OptionalInScriptStyle Digested',
"?#1(<ltx:XMApp><ltx:XMTok meaning='nth-root'/>"
. "<ltx:XMArg>#1</ltx:XMArg><ltx:XMArg>#2</ltx:XMArg>"
. "</ltx:XMApp>)"
. "(<ltx:XMApp><ltx:XMTok meaning='square-root'/>"
. "<ltx:XMArg>#2</ltx:XMArg></ltx:XMApp>)");
DefConstructor('\root Until:\of {}',
"<ltx:XMApp><ltx:XMTok meaning='nth-root'/>"
. "<ltx:XMArg>#1</ltx:XMArg><ltx:XMArg>#2</ltx:XMArg>"
. "</ltx:XMApp>");
#----------------------------------------------------------------------
# LaTeX; Table 3.9. Log-like Functions, p.44.
#----------------------------------------------------------------------
# NOTE: Classifying some as TRIGFUNCTION might clarify 'pi' ambiguities ?
DefMathI('\arccos', undef, "arccos", role => 'OPFUNCTION', meaning => 'inverse-cosine');
DefMathI('\arcsin', undef, "arcsin", role => 'OPFUNCTION', meaning => 'inverse-sine');
DefMathI('\arctan', undef, "arctan", role => 'OPFUNCTION', meaning => 'inverse-tangent');
DefMathI('\arg', undef, "arg", role => 'OPFUNCTION', meaning => 'argument');
DefMathI('\cos', undef, "cos", role => 'TRIGFUNCTION', meaning => 'cosine');
DefMathI('\cosh', undef, "cosh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-cosine');
DefMathI('\cot', undef, "cot", role => 'TRIGFUNCTION', meaning => 'cotangent');
DefMathI('\coth', undef, "coth", role => 'TRIGFUNCTION', meaning => 'hyperbolic-cotangent');
DefMathI('\csc', undef, "csc", role => 'TRIGFUNCTION', meaning => 'cosecant');
DefMathI('\deg', undef, "deg", role => 'OPFUNCTION', meaning => 'degree');
DefMathI('\det', undef, "det", role => 'LIMITOP', meaning => 'determinant',
scriptpos => \&doScriptpos);
DefMathI('\dim', undef, "dim", role => 'LIMITOP', meaning => 'dimension');
DefMathI('\exp', undef, "exp", role => 'OPFUNCTION', meaning => 'exponential');
DefMathI('\gcd', undef, "gcd", role => 'OPFUNCTION', meaning => 'gcd',
scriptpos => \&doScriptpos);
DefMathI('\hom', undef, "hom", role => 'OPFUNCTION');
DefMathI('\inf', undef, "inf", role => 'LIMITOP', meaning => 'infimum',
scriptpos => \&doScriptpos);
DefMathI('\ker', undef, "ker", role => 'OPFUNCTION', meaning => 'kernel');
DefMathI('\lg', undef, "lg", role => 'OPFUNCTION');
DefMathI('\lim', undef, "lim", role => 'LIMITOP', meaning => 'limit',
scriptpos => \&doScriptpos);
DefMathI('\liminf', undef, "lim inf", role => 'LIMITOP', meaning => 'limit-infimum',
scriptpos => \&doScriptpos);
DefMathI('\limsup', undef, "lim sup", role => 'LIMITOP', meaning => 'limit-supremum',
scriptpos => \&doScriptpos);
DefMathI('\ln', undef, "ln", role => 'OPFUNCTION', meaning => 'natural-logarithm');
DefMathI('\log', undef, "log", role => 'OPFUNCTION', meaning => 'logarithm');
DefMathI('\max', undef, "max", role => 'OPFUNCTION', meaning => 'maximum',
scriptpos => \&doScriptpos);
DefMathI('\min', undef, "min", role => 'OPFUNCTION', meaning => 'minimum',
scriptpos => \&doScriptpos);
DefMathI('\Pr', undef, "Pr", role => 'OPFUNCTION', scriptpos => \&doScriptpos);
DefMathI('\sec', undef, "sec", role => 'TRIGFUNCTION', meaning => 'secant');
DefMathI('\sin', undef, "sin", role => 'TRIGFUNCTION', meaning => 'sine');
DefMathI('\sinh', undef, "sinh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-sine');
DefMathI('\sup', undef, "sup", role => 'LIMITOP', meaning => 'supremum',
scriptpos => \&doScriptpos);
DefMathI('\tan', undef, "tan", role => 'TRIGFUNCTION', meaning => 'tangent');
DefMathI('\tanh', undef, "tanh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-tangent');
#----------------------------------------------------------------------
# Modulo
DefMath('\pmod{}', '\;\;(\mathop{{\rm mod}} #1)', role => 'MODIFIER'); # , meaning=>'modulo');
DefMath('\bmod', 'mod', role => 'MODIFIEROP', meaning => 'modulo');
#======================================================================
# TeX Book, Appendix B. p. 362
#----------------------------------------------------------------------
# Matrices; Generalized
# The delimiters around a matrix may simply be notational, or for readability,
# and don't affect the "meaning" of the array structure as a matrix.
# In that case, we'll use an XMDual to indidate the content is simply the matrix,
# but the presentation includes the delimiters.
# HOWEVER, the delimeters may also signify an OPERATION on the matrix
# in which case the application & meaning of that operator must be supplied.
# keys are
# name : the name of the environment (for reversion)
# datameaning: the (presumed) meaning of the array construct (typically 'matrix')
# delimitermeaning : the meaning of an operator to be applied to the array (if any, eg, 'norm')
# style : typically \displaystyle, \textstyle...
# left : TeX code for left of matrix
# right : TeX code for right
# ncolumns : the number of columns (default is not limited)
DefKeyVal('lx@GEN', 'style', 'UndigestedKey');
DefPrimitive('\lx@gen@matrix@bindings RequiredKeyVals:lx@GEN', sub {
my ($stomach, $kv) = @_;
$stomach->bgroup;
my $style = $kv->getValue('style') || T_CS('\textstyle');
my $align = ToString($kv->getValue('alignment')) || 'c';
# We really should be using ReadAlignmentTemplate (LaTeXML::Core::Alignment)
# but we'd have to convert it to a repeating spec somehow.
my @colspec = (before => Tokens(($align =~ /^(?:c|r)/ ? (T_CS('\hfil')) : ()),
Invocation(T_CS('\@hidden'), $style)),
after => Tokens(($align =~ /^(?:c|l)/ ? (T_CS('\hfil')) : ())));
my $ncols = ToString($kv->getValue('ncolumns'));
alignmentBindings(LaTeXML::Core::Alignment::Template->new(
($ncols ? (columns => [map { { @colspec } } 1 .. $ncols])
: (repeated => [{@colspec}]))),
'math',
attributes => { meaning => $kv->getValue('datameaning') }); });
DefPrimitive('\lx@end@gen@matrix', sub { $_[0]->egroup; });
DefMacro('\lx@gen@plain@matrix{}{}',
'\lx@gen@matrix@bindings{#1}'
. '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\@finish@alignment}'
. '\lx@end@gen@matrix');
# The delimiters on a matrix are presumably just for notation or readability (not an operator);
# the array data itself is the matrix.
DefConstructor('\lx@gen@plain@matrix@ RequiredKeyVals:lx@GEN {}',
"?#needXMDual("
. "<ltx:XMDual>"
. "?#delimitermeaning("
. "<ltx:XMApp>"
. "<ltx:XMTok meaning='#delimitermeaning'/>"
. "<ltx:XMRef _xmkey='#xmkey'/>"
. "</ltx:XMApp>"
. ")("
. "<ltx:XMRef _xmkey='#xmkey'/>"
. ")"
. "<ltx:XMWrap>#left<ltx:XMArg _xmkey='#xmkey'>#2</ltx:XMArg>#right</ltx:XMWrap>"
. "</ltx:XMDual>"
. ")("
. "#2"
. ")",
properties => sub { %{ $_[1]->getKeyVals }; },
reversion => sub {
my ($whatsit, $kv, $body) = @_;
my $name = ToString($kv->getValue('name'));
(T_CS('\\' . $name), T_BEGIN, Revert($body), T_END); },
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $kv = $whatsit->getArg(1);
if ($kv->getValue('left') || $kv->getValue('right')) {
$whatsit->setProperties(
needXMDual => 1,
xmkey => LaTeXML::Package::getXMArgID()); }
return; });
DefMacro('\matrix{}',
'\lx@gen@plain@matrix{name=matrix,datameaning=matrix}{#1}');
# TODO: This is wrong; I'm not even clear how this differs from \matrix
DefMacro('\bordermatrix{}',
'\lx@gen@plain@matrix{name=bordermatrix,datameaning=matrix}{#1}');
DefMacro('\pmatrix{}',
'\lx@gen@plain@matrix{name=pmatrix,datameaning=matrix,left=\@left(,right=\@right)}{#1}');
#----------------------------------------------------------------------
# Cases: Generalized
# keys are
# name : the name of the command (for reversion)
# meaning: the (presumed) meaning of the construct
# style : \textstyle or \displaystyle
# conditionmode : mode of 2nd column, text or math
# left : TeX code for left of cases
# right : TeX code for right
DefConstructorI('\lx@cases@condition', undef,
"<ltx:XMText>#body</ltx:XMText>",
alias => '', beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1);
DefConstructorI('\lx@cases@end@condition', undef, "", alias => '',
beforeDigest => sub { $_[0]->endMode('text'); });
DefPrimitive('\lx@gen@cases@bindings RequiredKeyVals:lx@GEN', sub {
my ($stomach, $kv) = @_;
$stomach->bgroup;
my $style = $kv->getValue('style') || T_CS('\textstyle');
my @mode = (ToString($kv->getValue('conditionmode')) eq 'text'
? (Invocation(T_CS('\@hidden'), T_MATH)) : ());
my $condtext = ToString($kv->getValue('conditionmode')) eq 'text';
alignmentBindings(LaTeXML::Core::Alignment::Template->new(
columns => [
{ before => Invocation(T_CS('\@hidden'), $style), after => Tokens(T_CS('\hfil')) },
{ before => Tokens(Invocation(T_CS('\@hidden'), $style),
($condtext ? (T_CS('\lx@cases@condition')) : ())),
after => Tokens(($condtext ? (T_CS('\lx@cases@end@condition')) : ()),
T_CS('\hfil')) }]),
'math'); });
DefMacro('\lx@gen@plain@cases{}{}',
'\lx@gen@cases@bindings{#1}'
. '\lx@gen@plain@cases@{#1}{\@start@alignment#2\@finish@alignment}'
. '\lx@end@gen@cases');
DefPrimitive('\lx@end@gen@cases', sub { $_[0]->egroup; });
# The logical structure for cases extracts the columns of the alignment
# to give alternating value,condition (an empty condition is replaced by "otherwise" !?!?!)
DefConstructor('\lx@gen@plain@cases@ RequiredKeyVals:lx@GEN {}',
'<ltx:XMWrap>#left#2#right</ltx:XMWrap>',
properties => sub { %{ $_[1]->getKeyVals }; },
afterConstruct => sub {
my ($document) = @_;
my $point = $document->getElement->lastChild;
# Get the sequence of alternating (case, condition).
# Expecting ltx:XMArray/ltx:XMRow/ltx:XMCell [should have /ltx:XMArg, but could be empty!!!]
my @cells = $document->findnodes('ltx:XMArray/ltx:XMRow/ltx:XMCell', $point);
my @stuff = map { ($_->hasChildNodes ? createXMRefs($document, element_nodes($_))
: ['ltx:XMText', {}, 'otherwise']) } @cells;
$document->replaceTree(['ltx:XMDual', {},
['ltx:XMApp', {}, ['ltx:XMTok', { meaning => 'cases' }], @stuff],
$point],
$point); },
reversion => sub {
my ($whatsit, $kv, $body) = @_;
my $name = $kv->getValue('name');
(T_CS('\cases'), T_BEGIN, Revert($body), T_END); });
# Note that 2nd column in \cases is in text mode!
DefMacro('\cases{}',
'\lx@gen@plain@cases{meaning=cases,left=\@left\{,conditionmode=text,style=\textstyle}{#1}');
#----------------------------------------------------------------------
DefPrimitive('\openup Dimension', undef);
# What should this do? (needs to work with alignments..)
DefMacro('\displaylines{}', '#1');
DefMacro('\eqalign{}',
'\@@eqalign{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@eqalign{}',
'#1',
reversion => '\eqalign{#1}', bounded => 1,
beforeDigest => sub { alignmentBindings('rl', 'math',
attributes => { vattach => 'baseline' }); });
DefMacro('\eqalignno{}',
'\@@eqalignno{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@eqalignno{}',
'#1',
reversion => '\eqalignno{#1}', bounded => 1,
beforeDigest => sub { alignmentBindings('rll', 'math',
attributes => { vattach => 'baseline' }); });
DefMacro('\leqalignno{}',
'\@@leqalignno{\@start@alignment#1\@finish@alignment}');
DefConstructor('\@@leqalignno{}',
'#1',
reversion => '\leqalignno{#1}', bounded => 1,
beforeDigest => sub { alignmentBindings('rll', 'math',
attributes => { vattach => 'baseline' }); });
DefRegister('\pageno' => Number(0));
DefRegister('\headline' => Tokens());
DefRegister('\footline' => Tokens());
DefMacroI('\folio', undef, "1"); # What else?
DefPrimitiveI('\nopagenumbers', undef, undef);
DefMacroI('\advancepageno', undef, '\advance\pageno1\relax');
#======================================================================
# TeX Book, Appendix B. p. 363
DefPrimitive('\raggedbottom', undef);
DefPrimitive('\normalbottom', undef);
# if the mark is not simple, we add it to the content of the note
# otherwise, to the attribute.
DefConstructor('\footnote{}{}',
"<ltx:note role='foot' ?#mark(mark='#mark')()>?#prenote(#prenote )()#2</ltx:note>",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my $mark = $whatsit->getArg(1);
my $change = 0;
foreach my $token (Revert($mark)) {
unless ($token->getCatcode == CC_LETTER || $token->getCatcode == CC_SPACE ||
$token->getCatcode == CC_OTHER) {
$change = 1; last; } }
$whatsit->setProperty(($change ? 'prenote' : 'mark') => $mark);
return; });
DefPrimitiveI('\footstrut', undef, undef);
DefRegister('\footins' => Dimension(0));
DefPrimitiveI('\topinsert', undef, undef);
DefPrimitiveI('\midinsert', undef, undef);
DefPrimitiveI('\pageinsert', undef, undef);
DefPrimitiveI('\endinsert', undef, undef);
# \topins ?
#======================================================================
# TeX Book, Appendix B. p. 364
# Let's hope nobody is messing with the output routine...
DefPrimitiveI('\footnoterule', undef, undef);
#======================================================================
# End of TeX Book definitions.
#======================================================================
#**********************************************************************
# Stray stuff .... where to ?
#**********************************************************************
# Mostly ignorable, although it could add an attribute to an ancestor
# to record the desired justification.
# Spacing stuff
DefConstructor('\@', '');
# Math spacing.
# Math style.
# Also record that this explicitly sets the mathstyle (support for \over, etal)
DefPrimitiveI('\displaystyle', undef, sub {
MergeFont(mathstyle => 'display');
Box(undef, undef, undef, T_CS('\displaystyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\textstyle', undef, sub {
MergeFont(mathstyle => 'text');
Box(undef, undef, undef, T_CS('\textstyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\scriptstyle', undef, sub {
MergeFont(mathstyle => 'script');
Box(undef, undef, undef, T_CS('\scriptstyle'), explicit_mathstyle => 1); });
DefPrimitiveI('\scriptscriptstyle', undef, sub {
MergeFont(mathstyle => 'scriptscript');
Box(undef, undef, undef, T_CS('\scriptscriptstyle'), explicit_mathstyle => 1); });
#======================================================================
# Special Characters.
# Try to give them some sense in math...
DefMacroI('\#', undef, '\ifmmode\lx@math@hash\else\lx@text@hash\fi');
DefMacroI('\&', undef, '\ifmmode\lx@math@amp\else\lx@text@amp\fi');
DefMacroI('\%', undef, '\ifmmode\lx@math@percent\else\lx@text@percent\fi');
DefMacroI("\\\$", undef, '\ifmmode\lx@math@dollar\else\lx@text@dollar\fi');
DefMacroI('\_', undef, '\ifmmode\lx@math@underscore\else\lx@text@underscore\fi');
DefPrimitiveI('\lx@text@hash', undef, '#', alias => '\#');
DefPrimitiveI('\lx@text@amp', undef, '&', alias => '\&');
DefPrimitiveI('\lx@text@percent', undef, '%', alias => '\%');
DefPrimitiveI('\lx@text@dollar', undef, "\$", alias => "\\\$");
DefPrimitiveI('\lx@text@underscore', undef, '_', alias => '\_');
DefMathI('\lx@math@hash', undef, '#', alias => '\#');
DefMathI('\lx@math@amp', undef, '&', role => 'ADDOP', meaning => 'and', alias => '\&');
DefMathI('\lx@math@percent', undef, '%', role => 'POSTFIX', meaning => 'percent', alias => '\%');
DefMathI('\lx@math@dollar', undef, "\$", role => 'OPERATOR', meaning => 'currency-dollar',
alias => "\\\$");
DefMathI('\lx@math@underscore', undef, '_', alias => '\_');
# Discretionary times; just treat as invisible ?
DefMathI('\*', undef, "\x{2062}", role => 'MULOP', name => '', meaning => 'times'); # INVISIBLE TIMES (or MULTIPLICATION SIGN = 00D7)
# These 3 should have some `name' assigned ... but what???
# Is XMWrap the right thing to wrap with (instead of XMArg)?
# We can't really assume that the stuff inside is sensible math.
# NOTE that \mathord and \mathbin aren't really right here.
# We need a finer granularity than TeX does: an ORD could be several things,
# a BIN could be a MULOP or ADDOP.
DefConstructor('\mathord{}', "<ltx:XMWrap role='ID' >#1</ltx:XMWrap>", bounded => 1);
DefConstructor('\mathop{}', "<ltx:XMWrap role='BIGOP' scriptpos='#scriptpos'>#1</ltx:XMWrap>",
bounded => 1, properties => { scriptpos => \&doScriptpos });
DefConstructor('\mathbin{}', "<ltx:XMWrap role='BINOP'>#1</ltx:XMWrap>", bounded => 1);
DefConstructor('\mathrel{}', "<ltx:XMWrap role='RELOP'>#1</ltx:XMWrap>", bounded => 1);
DefConstructor('\mathopen{}', "<ltx:XMWrap role='OPEN' >#1</ltx:XMWrap>", bounded => 1);
DefConstructor('\mathclose{}', "<ltx:XMWrap role='CLOSE'>#1</ltx:XMWrap>", bounded => 1);
DefConstructor('\mathpunct{}', "<ltx:XMWrap role='PUNCT'>#1</ltx:XMWrap>", bounded => 1);
# If an XMWrap (presumably from \mathop, \mathbin, etc)
# has multiple children, ALL are XMTok, within a restricted set of roles,
# we want to concatenate the text content into a single XMTok.
DefMathRewrite(xpath => 'descendant-or-self::ltx:XMWrap['
# Only XMWrap's from the above class of operators
. '(@role="OP" or @role="BIGOP" or @role="RELOP" '
. 'or @role="ADDOP" or @role="MULOP" or @role="BINOP" '
. 'or @role="OPEN" or @role="CLOSE")'
. ' and count(child::*) > 1 '
# with only XMTok as children with the roles in (roughly) the same set
. ' and not(child::*[local-name() != "XMTok"])'
. ' and not(ltx:XMTok['
. '@role !="OP" and @role!="BIGOP" and @role!="RELOP" and @role!="METARELOP" '
. 'and @role!="ADDOP" and @role!="MULOP" and @role!="BINOP" '
. 'and @role!="OPEN" and @role!="CLOSE"'
. '])]',
replace => sub {
my ($document, $node) = @_;
my $replacement = $node->cloneNode(0);
my $content = $node->textContent;
$replacement->appendText($content);
$replacement->setName('ltx:XMTok');
$document->getNode->appendChild($replacement);
});
DefMacro('\hiderel{}', "#1"); # Just ignore, for now...
DefMathI('\to', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW??? a bit more explicitly relation-like?
# TeX's ligatures handled by rewrite regexps.
# Note: applied in reverse order of definition (latest defined applied first!)
# Note also, these area only applied in text content, not in attributes!
DefPrimitive('\@@endash', sub { Box("\x{2013}", undef, undef, T_CS('\@@endash')); });
DefPrimitive('\@@emdash', sub { Box("\x{2014}", undef, undef, T_CS('\@@emdash')); });
DefLigature(qr{--}, "\x{2013}",
fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH})
DefLigature(qr{---}, "\x{2014}",
fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # EM DASH
# Ligatures for doubled single left & right quotes to convert to double quotes
# [should ligatures be part of a font, in the first place? (it is in TeX!)
DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}",
fontTest => sub { ($_[0]->getFamily ne 'typewriter') && (($_[0]->getEncoding || 'OT1') eq 'OT1'); });
DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}",
fontTest => sub { ($_[0]->getFamily ne 'typewriter') && (($_[0]->getEncoding || 'OT1') eq 'OT1'); });
DefPrimitiveI('\TeX', undef, 'TeX');
DefPrimitiveI('\i', undef, "\x{0131}"); # LATIN SMALL LETTER DOTLESS I
DefPrimitiveI('\j', undef, "\x{0237}");
DefConstructor('\buildrel Until:\over {}',
"<ltx:XMApp role='RELOP'>"
. "<ltx:XMTok role='SUPERSCRIPTOP' scriptpos='#scriptpos'/>"
. "<ltx:XMArg>#2</ltx:XMArg>"
. "<ltx:XMArg>#1</ltx:XMArg>"
. "</ltx:XMApp>",
properties => { scriptpos => sub { "mid" . $_[0]->getBoxingLevel; } });
#**********************************************************************
# LaTeX Hook
#**********************************************************************
# This is used for plain TeX, but needs to be undone for LaTeX (or...)!
RelaxNGSchema("LaTeXML");
Tag('ltx:section', autoClose => 1);
Tag('ltx:document', autoClose => 1, autoOpen => 1);
Tag('ltx:document', afterOpen => sub {
my ($document, $root) = @_;
if (my $font = $document->getNodeFont($root)) {
if (my $bg = $font->getBackground) {
if ($bg ne 'white') {
$document->setAttribute($root, backgroundcolor => $bg); } } } });
# No, \documentclass isn't really a primitive -- It's not even TeX!
# But we define a number of stubs here that will automatically load
# the LaTeX pool (or AmSTeX.pool) (which will presumably redefine them), and then
# stuff the token back to be reexecuted.
foreach my $ltxtrigger (qw(documentclass
newcommand renewcommand newenvironment renewenvironment
NeedsTeXFormat ProvidesPackage RequirePackage ProvidesFile
makeatletter makeatother
typeout begin listfiles)) {
DefAutoload($ltxtrigger, 'LaTeX.pool.ltxml'); }
# Seemingly good candidates to trigger AmSTeX ??
foreach my $amstrigger (qw(BlackBoxes NoBlackBoxes
TagsAsMath TagsAsText TagsOnLeft TagsOnRight CenteredTagsOnSplits TopOrBottomTagsOnSplits
LimitsOnInts NoLimitsOnInts LimitsOnNames NoLimitsOnNames LimitsOnSums NoLimitsOnSums
loadbold loadeufb loadeufm loadeurb loadeurm loadeusb
loadeusm loadmathfont loadmsam loadmsbm)) {
DefAutoload($amstrigger, 'AmSTeX.pool.ltxml'); }
# Darn; we need to be even more clever, since we need to simulate an amstex command, as well.
# For example \documentstyle[...]{amsppt} must switch to AMSTeX mode, _NOT_ LaTeX mode!!!!
DefMacro('\documentstyle OptionalSemiverbatim SkipSpaces Semiverbatim', sub {
my ($gullet, $options, $class) = @_;
LoadPool((ToString($class) =~ /^amsppt$/ ? "AmSTeX" : "LaTeX"));
(T_CS('\\documentstyle'),
($options ? (T_OTHER('['), $options->unlist, T_OTHER(']')) : ()),
T_BEGIN, $class->unlist, T_END); });
#**********************************************************************
# LaTeXML Specific.
# Support for Declarations & Presentation/Semantic Duality
#**********************************************************************
DefConstructor('\DUAL OptionalKeyVals {}{}',
"<ltx:XMDual role='&GetKeyVal(#1,role)'>#2<ltx:XMWrap>#3</ltx:XMWrap></ltx:XMDual>",
reversion => sub {
my ($whatsit) = @_;
my ($kv, $content, $presentation) = $whatsit->getArgs;
my $role = GetKeyVal($kv, 'role');
if (!$LaTeXML::DUAL_BRANCH) {
$whatsit->getDefinition->invocation($role, $content,
$presentation); }
elsif (($LaTeXML::DUAL_BRANCH eq 'content')
|| !GetKeyVal($kv, 'hide_content_reversion')) {
Revert($content); }
elsif ($LaTeXML::DUAL_BRANCH eq 'presentation') {
Revert($presentation); }
else {
Warn('misdefined', 'DUAL', $_[0],
"$LaTeXML::DUAL_BRANCH Unknown DUAL_BRANCH: $LaTeXML::DUAL_BRANCH");
$whatsit->getDefinition->invocation($role, $content,
$presentation); }
},
sizer => '#3'); # size according to presentation
sub addMeaningRec {
my ($document, $node, $meaning) = @_;
if ($node->nodeType == XML_ELEMENT_NODE) {
my $qname = $document->getModel->getNodeQName($node);
if ($qname eq 'ltx:XMArg') { } # DONT cross through into arguments!
elsif ($qname eq 'ltx:XMTok') {
if ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')
&& !$node->getAttribute('meaning')) {
$document->setAttribute($node, meaning => $meaning); } }
else {
foreach my $c ($node->childNodes) {
addMeaningRec($document, $c, $meaning); } } }
return; }
DefConstructor('\FCN{}', "<ltx:XMWrap role='FUNCTION'>#1</ltx:XMWrap>", reversion => '#1', alias => '');
DefConstructor('\ROLE{}{}', "<ltx:XMWrap role='#1'>#2</ltx:XMWrap>", reversion => '#2', alias => '');
# NOTE: work through this systematically!
DefConstructor('\@SYMBOL{}', "<ltx:XMWrap role='ID'>#1</ltx:XMWrap>", reversion => '#1');
DefConstructor('\@APPLY{}', "<ltx:XMApp>#1</ltx:XMApp>", reversion => '#1');
DefConstructor('\@MAYBEAPPLY{}{}',
"?#2(<ltx:XMApp>#1#2</ltx:XMApp>)(#1)",
reversion => '#1#2');
DefConstructor('\@WRAP{}', "<ltx:XMWrap>#1</ltx:XMWrap>", reversion => '#1');
DefConstructor('\@TOKEN{}', "<ltx:XMTok name='#1'/>", reversion => '');
DefConstructor('\lx@kludged{}',
"?#isMath(<ltx:XMWrap rule='kludge'>#1</ltx:XMWrap>)(#1)",
reversion => '#1');
DefConstructor('\@PADDED[MuDimension]{MuDimension}{}',
'#3',
afterConstruct => sub {
my ($document, $whatsit) = @_;
my $node = $document->getLastChildElement($document->getNode);
if (my $lpadding = $whatsit->getArg(1)) {
$document->setAttribute($node, lpadding => $lpadding); }
if (my $rpadding = $whatsit->getArg(2)) {
$document->setAttribute($node, rpadding => $rpadding); } },
reversion => '#3');
DefMathI('\@APPLYFUNCTION', undef, "\x{2061}", reversion => '', name => '', role => 'APPLYOP');
DefMathI('\@INVISIBLETIMES', undef, "\x{2062}", reversion => '', name => '', meaning => 'times', role => 'MULOP');
DefMathI('\@INVISIBLECOMMA', undef, "\x{2063}", reversion => '', name => '', role => 'PUNCT');
# We OUGHT to be able to do this using \llap,\rlap,\hss...
DefMacro('\lx@tweaked{}{}', '\ifmmode\lx@math@tweaked{#1}{#2}\else\lx@text@tweaked{#1}{#2}\fi');
DefConstructor('\lx@math@tweaked RequiredKeyVals {}',
"<ltx:XMWrap %&GetKeyVals(#1)>#2</ltx:XMWrap>",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my ($kv, $body) = $whatsit->getArgs;
$whatsit->setProperties($kv->getPairs);
$whatsit->setFont($body->getFont);
return; },
reversion => '#2');
DefConstructor('\lx@text@tweaked RequiredKeyVals {}',
"<ltx:text _noautoclose='1' %&GetKeyVals(#1)>#2</ltx:text>",
afterDigest => sub {
my ($stomach, $whatsit) = @_;
my ($kv, $body) = $whatsit->getArgs;
$whatsit->setProperties($kv->getPairs); });
DefMacro('\lx@nounicode {}', '\ifmmode\lx@math@nounicode#1\else\lx@text@nounicode#1\fi');
DefConstructor('\lx@framed[]{}',
"<ltx:text framed='#frame' _noautoclose='1'>#2</ltx:text>",
properties => { frame => sub { ToString($_[1] || 'rectangle'); } });
DefConstructor('\lx@hflipped{}',
"<ltx:text class='ltx_hflipped' _noautoclose='1'>#1</ltx:text>");
sub reportNoUnicode {
my ($cs) = @_;
$cs = ToString($cs);
if (!LookupMapping('missing_unicode' => $cs)) {
Warn('expected', 'unicode', $cs,
"There's no Unicode equivalent for the symbol '$cs'");
AssignMapping('missing_unicode' => $cs => 1); }
return; }
# Slightly contrived so that this can be used within a DefMath
# and still declare & get the semantic properties.
DefPrimitive('\lx@math@nounicode DefToken', sub {
my ($stomach, $cs) = @_;
reportNoUnicode($cs);
Box(ToString($cs), undef, undef, $cs, class => 'ltx_nounicode'); });
DefConstructor('\lx@text@nounicode DefToken',
"<ltx:text _no_autoclose='true' class='ltx_nounicode'>#1</ltx:text>",
afterDigest => sub {
reportNoUnicode(ToString($_[1]->getArg(0))); });
# These are used (primarily?) within XMDual
# The XMDual represents both a content & presentation representation of some
# possibly exotic structure ("Transfix notation"),
# or just a somewhat complex presentation that corresponds (often) to a simpler
# applicative content structure.
# Invoking such a mathematical object to "arguments" requires that both the
# content & presentation branches contain those arguments.
# There will be an XMArg, with an ID, containing the actual markup, and an XMRef that referrs to it.
# The XMArg will usually be in the presentation branch (so that it inherits appropriate style),
# unless the arg is "hidden" (ie. semantic, but not displayed).
# This means that we don't know which one appears first! (See Package's dualize_arglist)
#
# To get a "proper id", we'll use a temporary label-like attribute (_xmkey)
# and establish an id and idref later.
DefConstructor('\@XMArg{}{}', "<ltx:XMArg _xmkey='#1'>#2</ltx:XMArg>",
reversion => '#2',
bounded => 1,
afterDigest => sub {
AssignValue('xref:' . ToString($_[1]->getArg(1)) => $_[1], 'global'); });
DefConstructor('\@XMRef{}', "<ltx:XMRef _xmkey='#1'/>",
reversion => sub {
Revert(LookupValue('xref:' . ToString($_[1]))); },
sizer => sub { LookupValue('xref:' . ToString($_[0]->getArg(1)))->getSize; });
# Connect up the XMRef/XMwhatever pairs (actually can be multiple XMRef's)
# We want to set the idref of the XMRef's to point to the id of the XMArg (or other XM element),
# but usually the XMRef is created first, and we want to let the referred to element
# get it's id computed by whatever means it prefers.
# so we have to work both ways (use state to record associations, to avoid expensive xpath)
Tag('ltx:*', 'afterOpen:late' => sub {
my ($document, $node) = @_;
if (my $key = $node->getAttribute('_xmkey')) {
my $qname = $document->getNodeQName($node);
# If this is an XMRef w/o idref & referred node already has id; update the XMRef
if ($qname eq 'ltx:XMRef') {
if (!$node->getAttribute('idref')) { # If not yet set
if (my $id = LookupValue('xmkey:' . $key)) { # If XMArg already has an id
$document->setAttribute($node, idref => $id); } # Note that id here
else {
PushValue('xmkey_referrers:' . $key, $node); } } } # else, note that we still need an i
# If this is any other node w/_xmkey, make sure it has an id
# and record it on the corresponding XMRef
elsif ($qname =~ /^ltx:XM/) {
GenerateID($document, $node, undef, ''); # Generate id if none already.
my $id = $node->getAttribute('xml:id');
AssignValue('xmkey:' . $key => $id, 'global'); # for future use by XMRef
# but if we came after any XMRef's record their idref's as well.
if (my $referrers = LookupValue('xmkey_referrers:' . $key)) {
map { $document->setAttribute($_, idref => $id) } @$referrers;
AssignValue('xmkey_referrers:' . $key, undef, 'global'); } }
} });
DefConstructor('\@ERROR{}{}', "<ltx:ERROR class='ltx_#1'>#2</ltx:ERROR>");
#**********************************************************************
DefConstructorI('\WildCard', undef, "<_WildCard_/>");
DefConstructorI('\WildCardA', undef, "<_WildCard_/>");
DefConstructorI('\WildCardB', undef, "<_WildCard_/>");
DefConstructorI('\WildCardC', undef, "<_WildCard_/>");
#**********************************************************************
# This constructor allows you to stick things in expansions for effect
# but their reversion disappears, so they don't appear in the tex attribute (eg).
DefConstructor('\@hidden{}', '', reversion => '');
#**********************************************************************
# After all other rewrites have acted, a little cleanup
DefRewrite(xpath => 'descendant-or-self::ltx:XMWrap[count(child::*)=1]',
replace => sub { my ($document, $wrap) = @_;
if (my $node = $document->getFirstChildElement($wrap)) {
# Copy attributes but NOT internal ones,
# NOR xml:id, else we get clashes
foreach my $attribute ($wrap->attributes) {
if ($attribute->nodeType == XML_ATTRIBUTE_NODE) {
my $attr = $attribute->nodeName;
$document->setAttribute($node, $attr => $attribute->getValue)
unless ($attr eq 'xml:id') || $attr =~ /^_/;
if ($attr =~ /^_/) { }
elsif ($attr eq 'xml:id') {
my $id = $attribute->getValue;
if (my $previd = $node->getAttribute('xml:id')) { # Keep original id
# but swap any references to the one on the wrapper!
foreach my $ref ($document->findnodes("//*[\@idref='$id']")) {
$ref->setAttribute(idref => $previd); }
$wrap->removeAttribute('xml"id');
$document->unRecordID($id); }
else {
$wrap->removeAttribute('xml:id');
$document->unRecordID($id);
$document->setAttribute($node, 'xml:id' => $id); } }
else {
$document->setAttribute($node, $attr => $attribute->getValue); } } }
# But keep $node's font from being overwritten.
$document->setNodeFont($wrap, $document->getNodeFont($node));
## WHY THIS????
$document->getNode->appendChild($node);
} });
#======================================================================
sub aligningEnvironment {
my ($align, $class, $document, %props) = @_;
map { setAlignOrClass($document, $_, $align, $class) }
insertBlock($document, $props{body}); # Add class attribute to new nodes.
return; }
# should be obsolete!!!
sub addClass {
my ($node, $class) = @_;
if ($node && $class && ($node->nodeType == XML_ELEMENT_NODE)) {
if ($node->hasAttribute('class')) {
$node->setAttribute(class => $node->getAttribute('class') . ' ' . $class); }
else {
$node->setAttribute(class => $class); } }
return; }
DefConstructor('\@ADDCLASS Semiverbatim', sub {
$_[0]->addClass($_[0]->getElement, ToString($_[1])); },
sizer => 0);
sub setAlignOrClass {
my ($document, $node, $align, $class) = @_;
my $model = $document->getModel;
my $qname = $model->getNodeQName($node);
if ($align && $document->canHaveAttribute($qname, 'align')) {
$node->setAttribute(align => $align); }
elsif ($class && $document->canHaveAttribute($qname, 'class')) {
$document->addClass($node, $class); }
return; }
#======================================================================
# A random collection of utility functions.
# [maybe need to do some reorganization?]
# Since this is used for textual tokens, typically to split author lists,
# we don't split within braces or math
sub SplitTokens {
my ($tokens, @delims) = @_;
my @items = ();
my @toks = ();
if ($tokens) {
my @tokens = $tokens->unlist;
my $t;
while ($t = shift(@tokens)) {
if (grep { Equals($t, $_) } @delims) {
push(@items, [@toks]); @toks = (); }
elsif (Equals($t, T_BEGIN)) {
push(@toks, $t);
my $level = 1;
while ($level && defined($t = shift(@tokens))) {
my $cc = $t->getCatcode;
$level++ if $cc == CC_BEGIN;
$level-- if $cc == CC_END;
push(@toks, $t); } }
elsif (Equals($t, T_MATH)) {
push(@toks, $t);
while (defined($t = shift(@tokens))) {
my $cc = $t->getCatcode;
push(@toks, $t);
last if $cc == CC_MATH; } }
else {
push(@toks, $t); } } }
return (@items, [@toks]); }
sub andSplit {
my ($cs, $tokens) = @_;
return map { ($cs, T_BEGIN, @$_, T_END) } SplitTokens($tokens, T_CS('\and')); }
sub orNull {
return (grep { defined } @_) ? @_ : undef; }
#**********************************************************************
LoadPool('eTeX'); # unless.... ?
LoadPool('pdfTeX'); # unless.... ?
#**********************************************************************
1;