# /=====================================================================\ #
# | LaTeXML::MathGrammar | #
# | LaTeXML's Math Grammar for postprocessing | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
# ================================================================================
# LaTeXML's MathGrammar.
# To compile :
# perl -MParse::RecDescent - MathGrammar LaTeXML::MathGrammar
# ================================================================================
# Startup actions: import the constructors
{ BEGIN{ use LaTeXML::MathParser qw(:constructors);
#### $::RD_TRACE=1;
}}
# Rules section
# ========================================
# Naming Conventions:
# UPPERCASE : is for terminals, ie. classes of TeX tokens.
# Initial Cap : for non-terminal rules that can possibly be invoked externally.
# Initial lowercase : internal rules.
# ========================================
# For internal rules
# moreFoos[$foo] : Looks for more Foo's w/appropriate punctuation or operators,
# whatever is appropriate, and combines it with whatever was passed in
# as pattern arg. Typically, the last clause would be simply
# | { $arg[0]; }
# to return $foo without having found any more foo's.
# In such a case, it appears to be advantageous to have the first clause be
# : /^\Z/ { $arg[0]; }
# which will return immediately if there is no additional input.
# addFoo[$bar] : Check for a following Foo and add it, as appropriate to
# the $bar.
# ========================================
# Note that Parse:RecDescent does NOT backtrack within a rule:
# If a given production succeeds, the rule succeeds, but even if the ultimate
# parse fails, the parser will NOT go back and try another production within
# that same rule!!! Of course, if a production fails, it goes on to the next,
# and if that rule fails, etc...
#
# For example ||a|-|b|| won't work (in spite of various attempts to control it)
# After seeing the initial || and attempting to parse an Expression, it gets
# a * abs( - abs(b))
# without anything to match the initial ||; and it will NOT backtrack to try
# a shorter Expression!
#
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Top Level expressions; Just about anything?
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Note in particular, that many inline formula contain `half' a formula,
# with the lead-in text effectively being the LHS. eg. function $=foo$;
# similarly you can end up with a missing RHS, $x=$ even.
Start : Anything /^\Z/ { $item[1]; }
#======================================================================
Anything : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Anything : AnythingAny /^\Z/ { $item[1]; }
#======================================================================
AnythingAny :
Formulae
| OPEN Formulae CLOSE { Fence($item[1],$item[2],$item[3]); }
| modifierFormulae
| OPEN modifierFormula CLOSE { Fence($item[1],$item[2],$item[3]); }
| MODIFIER
| MODIFIEROP Expression { Apply($item[1],Absent(),$item[2]);}
| METARELOP Formula { Apply($item[1],Absent(),$item[2]); }
| AnyOp (PUNCT(?) AnyOp {[$item[1]->[0]||InvisibleComma(), $item[2]]})(s)
{ NewList($item[1],map(@$_,@{$item[2]})); }
| FLOATSUPERSCRIPT POSTSUBSCRIPT { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
| FLOATSUBSCRIPT POSTSUPERSCRIPT { NewScript(NewScript(Absent(),$item[1]),$item[2]); }
| FLOATSUPERSCRIPT { NewScript(Absent(),$item[1]); }
| FLOATSUBSCRIPT { NewScript(Absent(),$item[1]); }
| AnyOp Expression { Apply($item[1],Absent(),$item[2]);}
# a top level rule for sub and superscripts that can accept all sorts of junk.
Subscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Subscript :
aSubscript (PUNCT(?) aSubscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
{ NewList($item[1],map(@$_,@{$item[2]})); }
Superscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Superscript :
aSuperscript (PUNCT(?) aSuperscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
{ NewList($item[1],map(@$_,@{$item[2]})); }
aSubscript :
Formulae
| AnyOp Expression { Apply($item[1],Absent(),$item[2]);}
| AnyOp
aSuperscript :
supops
| Formulae
| AnyOp Expression { Apply($item[1],Absent(),$item[2]);}
| AnyOp
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Formulae (relations or grouping of expressions or relations)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# This maze attempts to recognize the various meaningful(?) alternations of
# Expression(s) separated by punctuation, relational operators or metarelational
# operators [Think of $a=b=c$ vs $a=b, c=d$ vs. $a=b,c,d$ .. ]
# and group them into Formulae (collections of relations), including relations
# which have punctuated collections of Expression(s) on either the LHS or RHS,
# as well as `multirelation' like a = b = c, or simply punctuated collections of
# Expression(s)
Formulae : Formula moreFormulae[$item[1]]
# moreFormulae[$formula]; Got a Formula, what can follow?
moreFormulae :
/^\Z/ { $arg[0];} # short circuit!
| (endPunct Formula { [$item[1],$item[2]]; })(s)
{ NewFormulae($arg[0],map(@$_,@{$item[1]})); }
| metarelopFormula(s) { NewFormula($arg[0],map(@$_,@{$item[1]})); }
| { $arg[0]; }
# Punctuation that ends a formula
endPunct : PUNCT | PERIOD
Formula : Expression extendFormula[$item[1]]
# extendFormula[$expression] ; expression might be followed by punct Expression...
# or relop Expression... or arrow Expression or nothing.
extendFormula :
/^\Z/ { $arg[0];} # short circuit!
| punctExpr(s) maybeRHS[$arg[0],map(@$_,@{$item[1]})]
| relop Expression moreRHS[$arg[0],$item[1],$item[2]]
| relop /^\Z/ { NewFormula($arg[0],$item[1], Absent()); }
| { $arg[0]; }
# maybeRHS[$expr,(punct,$expr)*];
# Could have RELOP Expression (which means the (collected LHS) relation RHS)
# or done (just collection)
maybeRHS :
/^\Z/ { NewList(@arg); }
| relopExpr(s) { NewFormula(NewList(@arg),map(@$_,@{$item[1]})); }
| { NewList(@arg); }
# --- either line could be followed by (>0)
# For the latter, does a,b,c (<0) mean c<0 or all of them are <0 ????
# moreRHS[$expr,$relop,$expr]; Could have more (relop Expression)
# or (punct Expression)*
moreRHS :
/^\Z/ { NewFormula($arg[0],$arg[1],$arg[2]); } # short circuit!
| PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
| relopExpr(s?) { NewFormula($arg[0],$arg[1],$arg[2],
map(@$_,@{$item[1]})); }
# --- 1st line could be preceded by (>0) IF it ends up end of formula
# --- 2nd line could be followed by (>0)
# maybeColRHS[$expr,$relop,$expr,(punct, $expr)*];
# Could be done, get punct (collection) or rel Expression (another formula)
maybeColRHS :
/^\Z/ { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
| relop Expression moreRHS[$arg[$#arg],$item[1],$item[2]]
{ NewFormulae(NewFormula($arg[0],$arg[1],
NewList(@arg[2..$#arg-2])),$arg[$#arg-1],$item[3]); }
| PUNCT Expression maybeColRHS[@arg,$item[1],$item[2]]
| { NewFormula($arg[0],$arg[1],NewList(@arg[2..$#arg])); }
# --- 1st line handles it through more RHS ???
# --- 2nd line could be preceded by (>0) if it ends formula
# --- 3rd line could be followed by (>0)
punctExpr : PUNCT Expression { [$item[1],$item[2]]; }
relopExpr : relop Expression { [$item[1],$item[2]]; }
| relop /^\Z/ { [$item[1], Absent()]; }
metarelopFormula :
METARELOP Formula { [$item[1],$item[2]]; }
| METARELOP /^\Z/ { [$item[1], Absent()]; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# `Modifier' formula, things like $<0$, that might follow another formula or text.
# Absent() is a placeholder for the missing thing... (?)
# [and also when the LHS is moved away, due to alignment rearrangement]
modifierFormulae : modifierFormula moreFormulae[$item[1]]
modifierFormula : relop Expression moreRHS[Absent(),$item[1],$item[2]]
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Expressions; sums of terms
# Abstractly, things combined by operators binding tighter than relations
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expressions : Expression punctExpr(s?)
{ NewList($item[1],map(@$_,@{$item[2]})); }
Expression : SignedTerm moreTerms[[],$item[1]] addExpressionModifier[$item[2]]
# # very tentatively allow an operator as a complete expression
# # BUT, this should only suceed if at end, or followed by punctuation!!!!!!!
# # (or CLOSE, or... ?!?!?!?)
| AnyOp ...anyOpIsolator { $item[1]; }
anyOpIsolator : /^\Z/ | PUNCT | CLOSE
# moreTerms[ [($term,$addop)*], $term]; Check for more addop & term's
moreTerms :
/^\Z/ { LeftRec(@{$arg[0]},$arg[1]); } # short circuit!
| AddOp moreTerms2[$arg[0],$arg[1],$item[1]]
| { LeftRec(@{$arg[0]},$arg[1]); }
# moreTerms2[ [($term,$addop)*], $term, $addop]; Check if addop is followed
# by another term, or if not, it presumably represents a limiting form
# like "a+" (ie a from above)
moreTerms2 : Term moreTerms[ [@{$arg[0]},$arg[1],$arg[2]],$item[1] ]
| { LeftRec(@{$arg[0]},Apply(New('limit-from'),$arg[1],$arg[2])); }
# addExpressionModifier[$expr]
addExpressionModifier :
/^\Z/ { $arg[0];} # short circuit!
| PUNCT(?) OPEN relop Expression balancedClose[$item[2]]
{ Apply(New('annotated'),$arg[0],
Fence($item[2], Apply($item[3],Absent(),$item[4]),$item[5])); }
# An alternative form would have OPEN Expression relop...
# but that seems less like a "modifier" and more like a relation as argument!
### | PUNCT(?) OPEN Expression relop Expression
### moreRHS[$item[3],$item[4],$item[5]] balancedClose[$item[2]]
### { Apply(New('annotated'),$arg[0],Fence($item[2],$item[6],$item[7])); }
| PUNCT(?) OPEN MODIFIEROP Expression balancedClose[$item[2]]
{ Apply($item[3],$arg[0],$item[4]); } # Is the punctuation Lost here?
| MODIFIER
{ Apply(New('annotated'),$arg[0],$item[1]); }
| MODIFIEROP Expression
{ Apply($item[1],$arg[0],$item[2]); }
| { $arg[0]; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terms: products of factors
# Abstractly, things combined by operators binding tighter than addition
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
SignedTerm : AddOp Term { Apply($item[1],$item[2]); }
| Term
Term : Factor moreFactors[$item[1]]
# moreFactors[$factor]
moreFactors :
/^\Z/ { $arg[0];} # short circuit!
| MulOp Factor moreFactors[ApplyNary($item[1],$arg[0],$item[2])]
# Given an explicit COMPOSEOP, we'll assume the preceding is
# an implicit lambda of some sort(?)
| COMPOSEOP makeComposition[$arg[0],$item[1]]
| { ($forbidEvalAt ? undef : 1); }
evalAtOp maybeEvalAt[$arg[0],$item[2]]
| Factor moreFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
| { $arg[0]; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Factors: function applications, postfix on atoms, etc.
# Abstractly, things combined by operators binding tighter than multiplication
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Factor :
# These 2nd two are Iffy; hopefully the 1st rule will protect from backtrack?
OPEN ARRAY CLOSE addScripts[Fence($item[1],$item[2],$item[3])]
# perhaps only when OPEN or CLOSED is { or } ??
# should be explicitly {, and moreover the array should be only 1 or 2 columns!
| LBRACE ARRAY { InterpretDelimited(New('cases'),$item[1],$item[2],Absent()); }
| ARRAY RBRACE { InterpretDelimited(New('cases'),Absent(),$item[1],$item[2]); }
| preScripted['FUNCTION'] addArgs[$item[1]]
| preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
| preScripted['TRIGFUNCTION'] addTrigFunArgs[$item[1]]
| preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
| preScripted['UNKNOWN'] doubtArgs[$item[1]]
| NUMBER addScripts[$item[1]]
| SCRIPTOPEN scriptFactorOpen[$item[1]]
| OPEN factorOpen[$item[1]]
# handle INTOP seperately, since it recognizes d as diff
| preScripted['INTOP'] addIntOpArgs[$item[1]]
| preScripted['bigop'] addOpArgs[$item[1]]
| { ($forbidVertBar ? undef : 1); }
SINGLEVERTBAR SINGLEVERTBAR absExpression SINGLEVERTBAR SINGLEVERTBAR # || exp || ==> norm
addScripts[Fence(CatSymbols($item[2],$item[3],undef,'||',role=>'OPEN'),
$item[4],
CatSymbols($item[5],$item[6],undef,'||',role=>'CLOSE'))]
| { ($forbidVertBar ? undef : 1); }
VERTBAR absExpression VERTBAR # | exp | => absolute-value
addScripts[Fence($item[2],$item[3],$item[4])]
| { ($forbidVertBar ? undef : IsNotationAllowed('QM')); }
MIDBAR ketExpression RANGLE { SawNotation('QM'); } # | exp > ==> ket
addScripts[InterpretDelimited(New('ket'),
Annotate($item[2],role=>'OPEN'),$item[3],Annotate($item[4],role=>'CLOSE'))] # ket
| { IsNotationAllowed('QM'); }
LANGLE ketExpression MIDBAR maybeBra[$item[2],$item[3],$item[4]]
| { IsNotationAllowed('QM'); }
LANGLE absExpression RANGLE
addScripts[Fence(Annotate($item[2],role=>'OPEN'),
$item[3],
Annotate($item[4],role=>'CLOSE'))]
| OPERATOR addScripts[$item[1]] nestOperators[$item[2]]
addOpFunArgs[$item[3]]
ATOM_OR_ID : ATOM | ID | ARRAY
# A restricted sort of Factor for the unparenthesized argument to a function.
# Note f g h => f*g*h, but f g h x => f(g(h(x))) Seems like what people mean...
# Should there be a special case for trigs?
barearg : aBarearg moreBareargs[$item[1]]
aBarearg :
preScripted['FUNCTION'] addArgs[$item[1]]
| preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
| preScripted['TRIGFUNCTION'] addTrigFunArgs[$item[1]]
| preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
| preScripted['UNKNOWN'] doubtArgs[$item[1]]
| NUMBER addScripts[$item[1]]
| VERTBAR Expression VERTBAR addScripts[Fence($item[1],$item[2],$item[3])]
# moreBareargs[$argpart]
moreBareargs :
/^\Z/ { $arg[0];} # short circuit!
| MulOp aBarearg moreBareargs[ApplyNary($item[1],$arg[0],$item[2])]
| aBarearg moreBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
| { $arg[0]; }
# A variation that does not allow a bare trig function
trigBarearg : aTrigBarearg moreTrigBareargs[$item[1]]
aTrigBarearg :
preScripted['FUNCTION'] addArgs[$item[1]]
| preScripted['OPFUNCTION'] addOpFunArgs[$item[1]]
| preScripted['ATOM_OR_ID'] maybeArgs[$item[1]]
| preScripted['UNKNOWN'] doubtArgs[$item[1]]
| NUMBER addScripts[$item[1]]
| VERTBAR Expression VERTBAR addScripts[Fence($item[1],$item[2],$item[3])]
# moreTrigBareargs[$argpart]
moreTrigBareargs :
/^\Z/ { $arg[0];} # short circuit!
| MulOp aTrigBarearg
moreTrigBareargs[ApplyNary($item[1],$arg[0],$item[2])]
| aTrigBarearg
moreTrigBareargs[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
| { $arg[0]; }
# maybeEvalAt[$thing,$vertbar]
maybeEvalAt :
POSTSUBSCRIPT moreEvalAt[$arg[0],$arg[1],$item[1]]
| POSTSUPERSCRIPT POSTSUBSCRIPT moreFactors[NewEvalAt($arg[0],$arg[1],$item[2],$item[1])]
# moreEvalAt[$thing,$vertbar,$sub]
moreEvalAt :
POSTSUPERSCRIPT moreFactors[NewEvalAt($arg[0],$arg[1],$arg[2],$item[1])]
| moreFactors[NewEvalAt($arg[0],$arg[1],$arg[2],undef)]
#======================================================================
# After < a | we might be done, or get <a|b> or <a|H|b>
# <$expr | maybeBra[$langle,$expr,$bar]
maybeBra :
ketExpression maybeBraket[$arg[0],$arg[1],$arg[2],$item[1]]
| { SawNotation('QM'); }
addScripts[InterpretDelimited(New('bra'),
Annotate($arg[0],role=>'OPEN'),$arg[1],Annotate($arg[2],role=>'CLOSE'))]
# <$expr1|$expr2 maybeBraket[$langle,$expr1,$bar,$expr2]
maybeBraket :
RANGLE { SawNotation('QM'); }
addScripts[InterpretDelimited(New('inner-product', undef,role=>'MIDDLE'),
Annotate($arg[0],role=>'OPEN'),$arg[1],
Annotate($arg[2],role=>'MIDDLE'),
$arg[3],Annotate($item[1],role=>'CLOSE'))]
| MIDBAR ketExpression RANGLE { SawNotation('QM'); }
addScripts[InterpretDelimited(New('quantum-operator-product',undef), # Is this a good representation?
Annotate($arg[0],role=>'OPEN'),$arg[1],
Annotate($arg[2],role=>'CLOSE'),
$arg[3],
Annotate($item[1],role=>'OPEN'),$item[2],
Annotate($item[3],role=>'CLOSE'))]
# bra's and ket's (ie <foo| & |foo>) can contain a rather wide variety of things
# from simple symbols to full (but typically short) formula, and so we
# want to use the Formulae production. However, for that to work,
# we need to keep |, < and > (which delimit the bra & ket) from being
# interpreted as usual, otherwise the parse will walk off the end, or
# fail at a level that precludes backtracking.
ketExpression : <rulevar: local $forbidVertBar = 1>
ketExpression : <rulevar: local $forbidLRAngle = 1>
ketExpression : Formulae
| METARELOP | ARROW | AddOp | MulOp | MODIFIEROP
#======================================================================
# absExpression; need to be careful about misinterpreting the next |
# since we can't backtrack across productions.
# Disable evalAt notation ( |_{x=0} ) and explicitly control abs nesting.
absExpression : <rulevar: local $forbidEvalAt = 1>
absExpression : <rulevar: local $MaxAbsDepth = $MaxAbsDepth-1>
absExpression : { ($MaxAbsDepth >= 0 ? 1 : (SawNotation('AbsFail')&& undef)); } Expression
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Adding pre|post sub|super scripts to various things.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# addScripts[$base] ; adds any following sub/super scripts to $base.
addScripts :
/^\Z/ { $arg[0];} # short circuit!
| POSTSUPERSCRIPT addScripts[NewScript($arg[0],$item[1])]
| POSTSUBSCRIPT addScripts[NewScript($arg[0],$item[1])]
| POSTFIX addScripts[Apply($item[1],$arg[0])]
| { $arg[0]; }
# ================================================================================
# preScripted['RULE']; match a RULE possibly preceded by sub/super prescripts,
# possibly followed by sub/superscripts. The initial prescript can only be FLOAT
# but the following ones can be either POST (which combine) or FLOAT (which don't)
preScripted :
FLOATSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
| FLOATSUBSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
| <matchrule:$arg[0]> addScripts[$item[1]]
# inpreScripted[$prescript]
inpreScripted :
POSTSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
| POSTSUBSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
| FLOATSUPERSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
| FLOATSUBSCRIPT inpreScripted[$arg[0]] { NewScript($item[2],$item[1], 'pre');}
| <matchrule:$arg[0]> addScripts[$item[1]]
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Parenthetical: Things wrapped in OPEN .. CLOSE
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ================================================================================
# Factors that begin with OPEN; grouped expressions and objects like sets,
# intervals, etc.
# factorOpen[$open] : Dealing with various things that start with an open.
factorOpen :
AddOp balancedClose[$arg[0]] addScripts[Fence($arg[0],$item[1],$item[2])] # For (-)
# Parenthesized Operator possibly w/scripts
| preScripted['bigop'] balancedClose[$arg[0]]
addScripts[Fence($arg[0],$item[1],$item[2])] Factor
{ Apply($item[3],$item[4]); }
# Parenthesized Operator including a pre-factor
| Factor preScripted['bigop'] balancedClose[$arg[0]]
addScripts[Fence($arg[0],
Apply(InvisibleTimes(),$item[1],$item[2]),$item[3])] Factor
{ Apply($item[4],$item[5]); }
# read expression too? match subcases.
| Expression factorOpenExpr[$arg[0],$item[1]]
# Empty OPEN CLOSE ?
| balancedClose[$arg[0]] addScripts[Fence($arg[0],$item[1])]
# Sequence starting with an operator ?
| AnyOp factorOpenExpr[$arg[0],$item[1]]
# factorOpenExpr[$open,$expr]; Try to recognize various things that start
# this way. Need some extra productions for sets (w/possible middle '|' )
# and vectors; all n-ary.
factorOpenExpr :
# 2nd expression; some kind of pair, interval, set, whatever [Any CLOSE, NOT balancedClose]
(PUNCT Expression { [$item[1],$item[2]]; })(s) CLOSE
addScripts[Fence($arg[0],$arg[1],map(@$_,@{$item[1]}),$item[2])]
# only 2 things and 2nd one is an op?; some kind of group???
| PUNCT AnyOp balancedClose[$arg[0]]
addScripts[InterpretDelimited(New('group'),
$arg[0],$arg[1],$item[1],$item[2],$item[3])]
# parenthesized expression.
| balancedClose[$arg[0]] addScripts[Fence($arg[0],$arg[1],$item[1])]
# ================================================================================
# Sets special cases
# A conditionalized set
# scriptFactorOpen[$open]
scriptFactorOpen :
Formula suchThatOp Formulae balancedClose[$arg[0]]
addScripts[InterpretDelimited(New('conditional-set'),
$arg[0], $item[1],$item[2], $item[3],$item[4])]
# Else fall through to normal factorOpen
| factorOpen[$arg[0]]
# The "such that" that can appear in a sets like {a "such that" predicate(a)}
# accept vertical bars, and colon
suchThatOp : MIDDLE | VERTBAR
| /METARELOP:colon:\d+/ { Lookup($item[1]); }
# ================================================================================
# Function args, etc.
# maybeArgs[$function] ; Add arguments to an identifier, but only if made explict.
maybeArgs :
/^\Z/ { $arg[0];} # short circuit!
| APPLYOP requireArgs[$arg[0]]
| { $arg[0]; }
# doubtArgs[$unknown]; Check for apparent arguments following an
# Unknown (unclassified) item. If an explicit APPLYOP follows,
# it seemingly asserts that the preceding _is_ a function,
# otherwise Warn if there seems to be an arglist.
doubtArgs :
/^\Z/ { $arg[0];} # short circuit!
| APPLYOP requireArgs[$arg[0]]
| { IsNotationAllowed('MaybeFunctions'); } OPEN forbidArgs[$arg[0],$item[2]]
| { $arg[0]; }
# forbidArgs[$unknown,$open]; Got a suspicious pattern: an unknown and open.
# If the following seems to be an argument list, warn.
forbidArgs :
Argument (argPunct Argument)(s) balancedClose[$arg[1]]
{ MaybeFunction($arg[0]); undef; }
# Term really could be Argument, but that gives a "possible function" warning
# even for a(b+c) which has a good reason for the parentheses; These patterns FAIL anyway!!
| Term balancedClose[$arg[1]] { MaybeFunction($arg[0]); undef; }
# requireArgs[$function]; Add arguments following a known function, failing if it
# isn't there! Typically this follows an explicit applyop
requireArgs :
OPEN Argument (argPunct Argument {[$item[1],$item[2]];})(s?)
balancedClose[$item[1]]
{ ApplyDelimited($arg[0],$item[1],$item[2],
map(@$_,@{$item[3]}),$item[4]); }
# Hmm, should only be applicable to _some_ functions ???
| barearg { Apply($arg[0],$item[1]); }
# addArgs[$function]; We've got a function; Add following arguments to a
# function, if present. Also recognizes compostion type ops (something
# combining two functions into a function)
addArgs :
/^\Z/ { $arg[0];} # short circuit!
| addEasyArgs[$arg[0]]
# Accept bare arg (w/o parens) ONLY if an explicit APPLYOP
| APPLYOP barearg { Apply($arg[0],$item[2]);}
| { $arg[0]; } # Just return the function itself,then.
# addOpFunArgs[$function]; Same as above but for functions classified as
# OPFUNCTION. Ie operator-like functions such as \sin, that don't
# absolutely require parens around args.
addOpFunArgs :
/^\Z/ { $arg[0];} # short circuit!
| addEasyArgs[$arg[0]]
# Accept bare arg (w/o parens) for this class of functions.
| APPLYOP(?) barearg { Apply($arg[0],$item[2]);}
| { $arg[0]; } # Just return the function itself,then.
# addTrigFunArgs[$function]; Yet another variation;
# It differs in the barearg is restricted to non-trig
addTrigFunArgs :
/^\Z/ { $arg[0];} # short circuit!
| addEasyArgs[$arg[0]]
# Accept bare arg (w/o parens) for this class of functions.
| APPLYOP(?) trigBarearg { Apply($arg[0],$item[2]);}
| { $arg[0]; } # Just return the function itself,then.
# addEasyArgs[$function]; gets unambiguous compositions or parenthesized arguments
# These are the "easy" cases for addArgs and addOpFunArgs.
addEasyArgs :
COMPOSEOP makeComposition[$arg[0],$item[1]]
| APPLYOP(?) OPEN Argument
(argPunct Argument {[$item[1],$item[2]];})(s?)
balancedClose[$item[2]]
{ ApplyDelimited($arg[0],$item[2],$item[3],
map(@$_,@{$item[4]}),$item[5]); }
# A function (or other) argument would normally be a simple expression,
# but often relations (esp. Statistics) or arrows appear, so allow those as well.
Argument : Expression extendArgument[$item[1]]
# extendArgument[$argpart] : recognize some longer form "arguments";
# things that may look like relations.
extendArgument :
/^\Z/ { $arg[0]; } # short circuit
| relopExpr(s) extendArgument[NewFormula($arg[0],map(@$_,@{$item[1]}))]
| METARELOP Formula extendArgument[Apply($item[1],$arg[0],$item[2])]
| { $arg[0]; }
# makeComposition[$thing,$comp]; Given something that presumably is a function,
# and a composition operator, read another function and possibly args
makeComposition :
preScripted['FUNCTION'] addArgs[Apply($arg[1],$arg[0],$item[1])]
{ $item[2]; }
| preScripted['OPFUNCTION'] addOpFunArgs[Apply($arg[1],$arg[0],$item[1])]
{ $item[2]; }
| preScripted['TRIGFUNCTION']
addTrigFunArgs[Apply($arg[1],$arg[0],$item[1])] { $item[2]; }
# Given an explicit composition operator, the next thing may safely(?)
# be assumed to be a function, so treat it as such.
| Factor addArgs[Apply($arg[1],$arg[0],$item[1])] { $item[2]; }
# addOpArgs[$bigop]; Add following Term to a bigop, if present.
addOpArgs :
/^\Z/ { $arg[0];} # short circuit!
# Is the APPLYOP getting "lost" here?
| APPLYOP(?) Factor moreOpArgFactors[$item[2]] { Apply($arg[0],$item[3]);}
| { $arg[0]; }
# moreOpArgFactors[$factor1] : Similar to moreFactors,
# but w/o evalAtOp since that most likely belongs to the operator, not
# the factors.
moreOpArgFactors :
/^\Z/ { $arg[0];} # short circuit!
| MulOp Factor moreOpArgFactors[ApplyNary($item[1],$arg[0],$item[2])]
| Factor moreOpArgFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
| { $arg[0]; }
# addIntOpArgs[$bigop]; Add following Term to a INTOP as integrand, if present.
# The main point here is to recognize a "d" as a diff operator.
# This is insufficient, in general, because the "d" may be contained within
# a subexpression, particularly a fraction; the top-level parsing needs to be able
# to parse subexpressions within a context, and yet, needs to parse the subexpressions
# beforehand to (potentially) determine the role of the subexpression!
addIntOpArgs :
/^\Z/ { $arg[0];} # short circuit!
# Is the APPLYOP getting "lost" here?
| APPLYOP(?) IntFactor moreIntOpArgFactors[$item[2]] { Apply($arg[0],$item[3]);}
| { $arg[0]; }
# moreIntOpArgFactors[$factor1] : Similar to moreOpArgFactors,
# but recognizing d as diff
moreIntOpArgFactors :
/^\Z/ { $arg[0];} # short circuit!
| MulOp IntFactor moreIntOpArgFactors[ApplyNary($item[1],$arg[0],$item[2])]
| IntFactor moreIntOpArgFactors[ApplyNary(InvisibleTimes(),$arg[0],$item[1])]
| { $arg[0]; }
IntFactor :
## diffd ATOM_OR_ID { Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[2]); }
## | diffd UNKNOWN { Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[2]); }
diffd ATOM_OR_ID addScripts[$item[2]]
{ Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[3]); }
| diffd UNKNOWN addScripts[$item[2]]
{ Apply(Annotate($item[1],role=>'DIFFOP',meaning=>'differential-d'),$item[3]); }
| Factor { $item[1]; }
diffd :
/UNKNOWN:d:\d+/ { Lookup($item[1]); }
| /ID:d:\d+/ { Lookup($item[1]); }
# Punctuation separating function arguments; things marked MIDDLE could
# also separate arguments
# With great trepidation, I'm adding VERBAR here
argPunct : PUNCT | MIDDLE | VERTBAR
# ================================================================================
# Operator args, etc.
# nestOperators[$operator*]; Nest a possible sequence of operators
nestOperators :
/^\Z/ { recApply(@arg); }
| OPERATOR addScripts[$item[1]] nestOperators[@arg,$item[2]]
| FUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
| OPFUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
| TRIGFUNCTION addScripts[$item[1]] { recApply(@arg,$item[2]); }
| OPEN Expression balancedClose[$item[1]]
{ recApply(@arg[0..$#arg-1],
ApplyDelimited($arg[$#arg],$item[1],$item[2],$item[3])); }
| { recApply(@arg); }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# (slightly) structured operators
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Same as anyop, at the moment.
AnyOp : relop | METARELOP | ARROW | AddOp | MulOp | MODIFIEROP
| preScripted['bigop']
| OPERATOR addScripts[$item[1]]
# Sub or superscripts on operators;
# we recognize the structure, not necessarily the meaning
AddOp : BINOP addOpDecoration[$item[1]]
| ADDOP addOpDecoration[$item[1]]
MulOp : BINOP addOpDecoration[$item[1]]
| MULOP addOpDecoration[$item[1]]
# (BINOP can never really be satisfactory; it comes from something marked
# as \mathbin; we don't know any more about it)
# addOpDecoration[$op] : Decorations for an operator;
# Same thing as addScripts, but not allowing POSTFIX
addOpDecoration :
/^\Z/ { $arg[0];} # short circuit!
| POSTSUPERSCRIPT addOpDecoration[DecorateOperator($arg[0],$item[1])]
| POSTSUBSCRIPT addOpDecoration[DecorateOperator($arg[0],$item[1])]
| { $arg[0]; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Pseudo-Terminals.
# Useful combinations or subsets of terminals.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# A generalized relational operator or arrow
# Note we disallow < or > if we're parsing the contents of a bra or ket!
relop :
{ ($forbidLRAngle ? 1 : undef); } /RELOP:(less|greater)-than:\d+/ <commit> <reject>
| RELOP addOpDecoration[$item[1]]
| ARROW addOpDecoration[$item[1]]
# Check out whether diffop should be treated as bigop or operator
# It depends on the binding
bigop : BIGOP | SUMOP | INTOP | LIMITOP | DIFFOP
operator: OPERATOR
# SUPOP is really only \prime(s) (?)
supops : SUPOP(s) { New(undef,
join('',map($_->textContent,@{$item[1]})),
name=>'prime'.scalar(@{$item[1]})); }
# ================================================================================
# And some special cases...
# balancedClose[$open] : Match a CLOSE that `corresponds' to the OPEN
balancedClose : CLOSE { (isMatchingClose($arg[0],$item[1]) ? 1 : undef) } { $item[1]; }
# The "evaluated at" operator, typically a vertical bar followed by a subscript
# equation. But it is ofen used in \left. \right| pairs!
evalAtOp : VERTBAR
| /CLOSE:\|:\d+/ { Lookup($item[1]); }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Terminals / Lexer
# These correspond to the TeX tokens.
# The Lexer strings are of the form TYPE:NAME:NUMBER where
# TYPE is the grammatical role, or part of speech,
# NAME is the specific name (semantic or presentation) of the token
# NUMBER is the position of the specific token in the current token sequence.
#
# NOTE: RecDescent doesn't clearly distinguish lexing from parsing
# and so it allows us to interpret the same item as several distinct
# terminals; Presumably other parsers would not allow this.
# In a couple of cases, we have symbols that can be used in a few
# different ways:
# | as vertical bar, open or close, also as a close used for eval-at!
# : as meta-relation, as such-that
# <, > can be relop or part of brackets (eg. qm, etc)
# Perhaps these symbols should get a special role reflecting it's specialness
# and then have pseudo-terminals that combine (eg. relop == RELOP | langle)
# This nibbles at the edge of the Ambiguity issue; if it turns out that
# a multi-meaning symbol gets used in a particular way, we'd want to assure
# that it's role, meaning, etc, gets changed to reflect the specific usage!
#
# Upon reflection, this implies that OPEN|CLOSE are rather awkward as roles.
# \left< can be an OPEN _or_ RELOP
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
ATOM : /ATOM:\S*:\d+/ { Lookup($item[1]); }
UNKNOWN : /UNKNOWN:\S*:\d+/ { Lookup($item[1]); }
ID : /ID:\S*:\d+/ { Lookup($item[1]); }
ARRAY : /ARRAY:\S*:\d+/ { Lookup($item[1]); }
NUMBER : /NUMBER:\S*:\d+/ { Lookup($item[1]); }
PUNCT : /PUNCT:\S*:\d+/ { Lookup($item[1]); }
PERIOD : /PERIOD:\S*:\d+/ { Lookup($item[1]); }
RELOP : /RELOP:\S*:\d+/ { Lookup($item[1]); }
LANGLE : /RELOP:less-than:\d+/ { Lookup($item[1]); }
| /OPEN:langle:\d+/ { Lookup($item[1]); }
RANGLE : /RELOP:greater-than:\d+/ { Lookup($item[1]); }
| /CLOSE:rangle:\d+/ { Lookup($item[1]); }
MIDBAR : /VERTBAR:\S*:\d+/ { Lookup($item[1]); }
| /MIDDLE:\|:\d+/ { Lookup($item[1]); }
| /MIDDLE:parallel-to:\d+/ { Lookup($item[1]); }
LBRACE : /OPEN:\{:\d+/ { Lookup($item[1]); }
RBRACE : /CLOSE:\}:\d+/ { Lookup($item[1]); }
METARELOP : /METARELOP:\S*:\d+/ { Lookup($item[1]); }
MODIFIEROP : /MODIFIEROP:\S*:\d+/ { Lookup($item[1]); }
MODIFIER : /MODIFIER:\S*:\d+/ { Lookup($item[1]); }
ARROW : /ARROW:\S*:\d+/ { Lookup($item[1]); }
ADDOP : /ADDOP:\S*:\d+/ { Lookup($item[1]); }
MULOP : /MULOP:\S*:\d+/ { Lookup($item[1]); }
BINOP : /BINOP:\S*:\d+/ { Lookup($item[1]); }
POSTFIX : /POSTFIX:\S*:\d+/ { Lookup($item[1]); }
FUNCTION : /FUNCTION:\S*:\d+/ { Lookup($item[1]); }
OPFUNCTION : /OPFUNCTION:\S*:\d+/ { Lookup($item[1]); }
TRIGFUNCTION : /TRIGFUNCTION:\S*:\d+/ { Lookup($item[1]); }
APPLYOP : /APPLYOP:\S*:\d+/ { Lookup($item[1]); }
COMPOSEOP : /COMPOSEOP:\S*:\d+/ { Lookup($item[1]); }
SUPOP : /SUPOP:\S*:\d+/ { Lookup($item[1]); }
OPEN : /OPEN:\S*:\d+/ { Lookup($item[1]); }
SCRIPTOPEN : /OPEN:\{:\d+/ { Lookup($item[1]); }
CLOSE : /CLOSE:\S*:\d+/ { Lookup($item[1]); }
MIDDLE : /MIDDLE:\S*:\d+/ { Lookup($item[1]); }
VERTBAR : /VERTBAR:\S*:\d+/ { Lookup($item[1]); }
SINGLEVERTBAR : /VERTBAR:\|:\d+/ { Lookup($item[1]); }
BIGOP : /BIGOP:\S*:\d+/ { Lookup($item[1]); }
SUMOP : /SUMOP:\S*:\d+/ { Lookup($item[1]); }
INTOP : /INTOP:\S*:\d+/ { Lookup($item[1]); }
LIMITOP : /LIMITOP:\S*:\d+/ { Lookup($item[1]); }
DIFFOP : /DIFFOP:\S*:\d+/ { Lookup($item[1]); }
OPERATOR : /OPERATOR:\S*:\d+/ { Lookup($item[1]); }
##DIFF : /DIFF:\S*:\d+/ { Lookup($item[1]); }
POSTSUBSCRIPT : /POSTSUBSCRIPT:\S*:\d+/ { Lookup($item[1]); }
POSTSUPERSCRIPT : /POSTSUPERSCRIPT:\S*:\d+/ { Lookup($item[1]); }
FLOATSUPERSCRIPT : /FLOATSUPERSCRIPT:\S*:\d+/ { Lookup($item[1]); }
FLOATSUBSCRIPT : /FLOATSUBSCRIPT:\S*:\d+/ { Lookup($item[1]); }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%