#include "ep.h"
#include "epmacro.h"
struct
tTokenCmp
{
char
* pStart ;
char
* pCurr ;
int
nLen ;
} ;
struct
tTokenTable DefaultTokenTable ;
#define parse_malloc(a,b) malloc(b)
static
int
CmpToken (
const
void
* p1,
const
void
* p2)
{
return
strcmp
(*((
const
char
* *)p1), *((
const
char
* *)p2)) ;
}
static
int
RevCmpToken (
const
void
* p1,
const
void
* p2)
{
return
strcmp
(*((
const
char
* *)p2), *((
const
char
* *)p1)) ;
}
static
int
CmpTokenDesc (
const
void
* p1,
const
void
* p2)
{
int
i =
strcmp
(*((
const
char
* *)p2), *((
const
char
* *)p1)) ;
return
i?i:
strcmp
(((
const
char
* *)p1)[1], ((
const
char
* *)p2)[1]) ;
}
static
int
CheckProcInfo (
register
req * r,
HV * pHash,
struct
tToken * pToken,
void
* * ppCompilerInfo)
{
HE * pEntry ;
char
* pKey ;
SV * * ppSV ;
SV * pSVValue ;
I32 l ;
HV * pHVProcInfo ;
int
n ;
int
i ;
int
m ;
typedef
struct
tSortToken
{
char
* pKey ;
SV * pSVValue ;
} tSortToken ;
tSortToken * pSortTokenHash ;
epTHX ;
ppSV = hv_fetch(pHash,
"procinfo"
,
sizeof
(
"procinfo"
) - 1, 0) ;
if
(ppSV != NULL)
{
if
(*ppSV == NULL || !SvROK (*ppSV) || SvTYPE (SvRV (*ppSV)) != SVt_PVHV)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
"%s => procinfo"
, pToken -> sText) ;
return
rcNotHashRef ;
}
pHVProcInfo = (HV *)SvRV (*ppSV) ;
m = 0 ;
n = HvKEYS (pHVProcInfo) ;
pSortTokenHash = (tSortToken *)
malloc
(
sizeof
(
struct
tSortToken) * n) ;
hv_iterinit (pHVProcInfo) ;
while
((pEntry = hv_iternext (pHVProcInfo)))
{
pKey = hv_iterkey (pEntry, &l) ;
pSVValue = hv_iterval (pHVProcInfo, pEntry) ;
pSortTokenHash[m].pKey = pKey ;
pSortTokenHash[m].pSVValue = pSVValue ;
m++ ;
}
qsort
(pSortTokenHash, m,
sizeof
(
struct
tSortToken), RevCmpToken) ;
i = 0 ;
while
(i < m)
{
pKey = pSortTokenHash[i].pKey ;
pSVValue = pSortTokenHash[i].pSVValue ;
i++ ;
if
(pSVValue == NULL || !SvROK (pSVValue) || SvTYPE (SvRV (pSVValue)) != SVt_PVHV)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
"%s => procinfo"
, pToken -> sText) ;
free
(pSortTokenHash) ;
return
rcNotHashRef ;
}
if
(
strcmp
(pKey,
"embperl"
) == 0)
embperl_CompileInitItem (r, (HV *)(SvRV (pSVValue)), pToken -> nNodeName, pToken -> nNodeType, 1, ppCompilerInfo) ;
else
if
(
strncmp
(pKey,
"embperl#"
, 8) == 0 && (n =
atoi
(pKey+8)) > 0)
embperl_CompileInitItem (r, (HV *)(SvRV (pSVValue)), pToken -> nNodeName, pToken -> nNodeType, n, ppCompilerInfo) ;
}
free
(pSortTokenHash) ;
}
return
ok ;
}
static
int
BuildSubTokenTable (
register
req * r,
int
nLevel,
HV * pHash,
const
char
* pKey,
const
char
* pAttr,
const
char
* pDefEnd,
void
* * ppCompilerInfo,
struct
tTokenTable * * pTokenTable)
{
SV * * ppSV ;
int
rc ;
epTHX ;
nLevel++ ;
ppSV = hv_fetch(pHash, (
char
*)pAttr,
strlen
(pAttr), 0) ;
if
(ppSV != NULL)
{
struct
tTokenTable * pNewTokenTable ;
HV * pSubHash ;
if
(*ppSV == NULL || !SvROK (*ppSV) || SvTYPE (SvRV (*ppSV)) != SVt_PVHV)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
"%s => %s"
, pKey, pAttr) ;
return
rcNotHashRef ;
}
pSubHash = (HV *)SvRV (*ppSV) ;
if
((pNewTokenTable = (
struct
tTokenTable *)GetHashValueInt (aTHX_ pSubHash,
"--cptr"
, 0)) == NULL)
{
if
((pNewTokenTable = parse_malloc (r,
sizeof
(
struct
tTokenTable))) == NULL)
return
rcOutOfMemory ;
if
(r -> Component.Config.bDebug & dbgBuildToken)
lprintf (r -> pApp,
"[%d]TOKEN: %*c-> %s\n"
, r -> pThread -> nPid, nLevel*2,
' '
, pAttr) ;
if
((rc = BuildTokenTable (r, nLevel, NULL, pSubHash, pDefEnd, ppCompilerInfo, pNewTokenTable)))
return
rc ;
if
(r -> Component.Config.bDebug & dbgBuildToken)
lprintf (r -> pApp,
"[%d]TOKEN: %*c<- %s\n"
, r -> pThread -> nPid, nLevel*2,
' '
, pAttr) ;
if
(pNewTokenTable -> numTokens == 0)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
"%s => %s does not contain any tokens"
, pKey, pAttr) ;
return
rcTokenNotFound ;
}
hv_store(pSubHash,
"--cptr"
,
sizeof
(
"--cptr"
) - 1, newSViv ((IV)pNewTokenTable), 0) ;
}
else
if
(r -> Component.Config.bDebug & dbgBuildToken)
lprintf (r -> pApp,
"[%d]TOKEN: %*c-> %s already build; numTokens=%d\n"
, r -> pThread -> nPid, nLevel*2,
' '
, pAttr, pNewTokenTable->numTokens) ;
*pTokenTable = pNewTokenTable ;
return
ok ;
}
*pTokenTable = NULL ;
return
ok ;
}
int
BuildTokenTable (
register
req * r,
int
nLevel,
const
char
* sName,
HV * pTokenHash,
const
char
* pDefEnd,
void
* * ppCompilerInfo,
struct
tTokenTable * pTokenTable)
{
int
rc ;
SV * pToken ;
HE * pEntry ;
char
* pKey ;
const
char
* c ;
int
numTokens ;
struct
tToken * pTable ;
struct
tToken * p ;
I32 l ;
STRLEN len ;
int
n ;
int
m ;
int
i ;
typedef
struct
tSortToken
{
char
* pKey ;
SV * pToken ;
} tSortToken ;
tSortToken * pSortTokenHash ;
unsigned
char
* pStartChars = pTokenTable -> cStartChars ;
unsigned
char
* pAllChars = pTokenTable -> cAllChars ;
epTHX ;
tainted = 0 ;
memset
(pStartChars, 0,
sizeof
(pTokenTable -> cStartChars)) ;
memset
(pAllChars, 0,
sizeof
(pTokenTable -> cAllChars)) ;
pTokenTable -> bLSearch = 0 ;
pTokenTable -> nDefNodeType = ntypCDATA ;
pTokenTable -> pContainsToken = NULL ;
pTokenTable -> pCompilerInfo = NULL ;
pTokenTable -> sRootNode = NULL ;
pTokenTable -> sName = sName ;
if
(ppCompilerInfo == NULL)
ppCompilerInfo = &pTokenTable -> pCompilerInfo ;
hv_store(pTokenHash,
"--cptr"
,
sizeof
(
"--cptr"
) - 1, newSViv ((IV)pTokenTable), 0) ;
numTokens = 1 ;
hv_iterinit (pTokenHash) ;
while
((pEntry = hv_iternext (pTokenHash)))
{
pKey = hv_iterkey (pEntry, &l) ;
pToken = hv_iterval (pTokenHash, pEntry) ;
if
(*pKey !=
'-'
)
numTokens++ ;
}
if
((pTable = parse_malloc (r,
sizeof
(
struct
tToken) * numTokens)) == NULL)
return
rcOutOfMemory ;
n = 0 ;
hv_iterinit (pTokenHash) ;
while
((pEntry = hv_iternext (pTokenHash)))
{
pKey = hv_iterkey (pEntry, &l) ;
pToken = hv_iterval (pTokenHash, pEntry) ;
if
(*pKey ==
'-'
)
{
if
(
strcmp
(pKey,
"-rootnode"
) == 0)
{
pTokenTable -> sRootNode = sstrdup (r, SvPV((SV *)pToken, len)) ;
}
if
(
strcmp
(pKey,
"-defnodetype"
) == 0)
{
pTokenTable -> nDefNodeType = SvIV ((SV *)pToken) ;
}
else
if
(
strcmp
(pKey,
"-lsearch"
) == 0)
{
pTokenTable -> bLSearch = SvIV ((SV *)pToken) ;
}
else
if
(
strcmp
(pKey,
"-contains"
) == 0)
{
STRLEN l ;
char
* c = SvPV (pToken, l) ;
while
(*c)
{
pAllChars [
tolower
(*c) >> 3] |= 1 << (
tolower
(*c) & 7) ;
pAllChars [
toupper
(*c) >> 3] |= 1 << (
toupper
(*c) & 7) ;
c++ ;
}
}
}
}
m = 0 ;
n = HvKEYS (pTokenHash) ;
pSortTokenHash = (tSortToken *)
malloc
(
sizeof
(
struct
tSortToken) * n) ;
hv_iterinit (pTokenHash) ;
while
((pEntry = hv_iternext (pTokenHash)))
{
pKey = hv_iterkey (pEntry, &l) ;
pToken = hv_iterval (pTokenHash, pEntry) ;
pSortTokenHash[m].pKey = pKey ;
pSortTokenHash[m].pToken = pToken ;
m++ ;
}
qsort
(pSortTokenHash, m,
sizeof
(
struct
tSortToken), CmpToken) ;
n = 0 ;
i = 0 ;
while
(i < m)
{
HV * pHash ;
struct
tTokenTable * pNewTokenTable ;
char
* sContains ;
char
* sC ;
pKey = pSortTokenHash[i].pKey ;
pToken = pSortTokenHash[i].pToken ;
i++ ;
if
(r -> Component.Config.bDebug & dbgBuildToken)
lprintf (r -> pApp,
"[%d]TOKENKey: %s\n"
, r -> pThread -> nPid, pKey) ;
if
(*pKey !=
'-'
)
{
if
(!SvROK (pToken) || SvTYPE (SvRV (pToken)) != SVt_PVHV)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
"%s"
, pKey) ;
return
rcNotHashRef ;
}
pHash = (HV *)SvRV (pToken) ;
p = &pTable[n] ;
p -> sName = pKey ;
p -> sText = GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"text"
,
""
) ;
p -> nTextLen = p -> sText?
strlen
(p -> sText):0 ;
p -> sEndText = GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"end"
, (
char
*)pDefEnd) ;
p -> sNodeName = GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"nodename"
, NULL) ;
p -> nNodeType = (tNodeType)GetHashValueInt (aTHX_ pHash,
"nodetype"
, ntypTag) ;
p -> bUnescape = GetHashValueInt (aTHX_ pHash,
"unescape"
, 0) ;
p -> bAddFlags = GetHashValueInt (aTHX_ pHash,
"addflags"
, 0) ;
p -> nCDataType = (tNodeType)GetHashValueInt (aTHX_ pHash,
"cdatatype"
, pTokenTable -> nDefNodeType) ;
p -> nForceType = (tNodeType)GetHashValueInt (aTHX_ pHash,
"forcetype"
, 0) ;
p -> bRemoveSpaces = GetHashValueInt (aTHX_ pHash,
"removespaces"
, p -> nNodeType != ntypCDATA?2:0) ;
p -> bInsideMustExist = GetHashValueInt (aTHX_ pHash,
"insidemustexist"
, 0) ;
p -> bMatchAll = GetHashValueInt (aTHX_ pHash,
"matchall"
, 0) ;
p -> bDontEat = GetHashValueInt (aTHX_ pHash,
"donteat"
, 0) ;
p -> bExitInside= GetHashValueInt (aTHX_ pHash,
"exitinside"
, 0) ;
p -> bAddFirstChild = GetHashValueInt (aTHX_ pHash,
"addfirstchild"
, 0) ;
p -> pStartTag = (
struct
tToken *)GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"starttag"
, NULL) ;
p -> pEndTag = (
struct
tToken *)GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"endtag"
, NULL) ;
p -> sParseTimePerlCode = GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"parsetimeperlcode"
, NULL) ;
if
((sC = sContains = GetHashValueStrDup (aTHX_ r -> pThread -> pMainPool, pHash,
"contains"
, NULL)))
{
unsigned
char
* pC ;
if
((p -> pContains = parse_malloc (r,
sizeof
(tCharMap))) == NULL)
return
rcOutOfMemory ;
pC = p -> pContains ;
memset
(pC, 0,
sizeof
(tCharMap)) ;
while
(*sContains)
{
pC[*sContains >> 3] |= 1 << (*sContains & 7) ;
pStartChars [*sContains >> 3] |= 1 << (*sContains & 7) ;
pStartChars [*sContains >> 3] |= 1 << (*sContains & 7) ;
sContains++ ;
}
}
else
p -> pContains = NULL ;
if
(p -> bMatchAll)
{
memset
(pStartChars, 0xff,
sizeof
(tCharMap)) ;
}
else
if
((c = p -> sText))
{
pStartChars [
toupper
(*c) >> 3] |= 1 << (
toupper
(*c) & 7) ;
pStartChars [
tolower
(*c) >> 3] |= 1 << (
tolower
(*c) & 7) ;
while
(*c)
{
pAllChars [
tolower
(*c) >> 3] |= 1 << (
tolower
(*c) & 7) ;
pAllChars [
toupper
(*c) >> 3] |= 1 << (
toupper
(*c) & 7) ;
c++ ;
}
}
if
(r -> Component.Config.bDebug & dbgBuildToken)
lprintf (r -> pApp,
"[%d]TOKEN: %*c%s ... %s unesc=%d nodetype=%d, cdatatype=%d, nodename=%s contains='%s' addfirstchild=%d\n"
, r -> pThread -> nPid, nLevel*2,
' '
, p -> sText, p -> sEndText, p -> bUnescape, p -> nNodeType, p -> nCDataType, p -> sNodeName?p -> sNodeName:
"<null>"
, sC?sC:
""
, p -> bAddFirstChild) ;
if
(p -> sNodeName)
{
if
(p -> sNodeName[0] !=
'!'
)
p -> nNodeName = String2Ndx (r -> pApp, p -> sNodeName,
strlen
(p -> sNodeName)) ;
else
p -> nNodeName = String2UniqueNdx (r -> pApp, p -> sNodeName + 1,
strlen
(p -> sNodeName + 1)) ;
}
else
p -> nNodeName = String2Ndx (r -> pApp, p -> sText,
strlen
(p -> sText)) ;
if
((rc = CheckProcInfo (r, pHash, p, ppCompilerInfo)) != ok)
return
rc ;
if
((rc = BuildSubTokenTable (r, nLevel, pHash, pKey,
"follow"
, p -> sEndText, ppCompilerInfo, &pNewTokenTable)))
return
rc ;
p -> pFollowedBy = pNewTokenTable ;
if
((rc = BuildSubTokenTable (r, nLevel, pHash, pKey,
"inside"
,
""
, ppCompilerInfo, &pNewTokenTable)))
return
rc ;
p -> pInside = pNewTokenTable ;
n++ ;
}
}
free
(pSortTokenHash) ;
qsort
(pTable, numTokens - 1,
sizeof
(
struct
tToken), pTokenTable -> bLSearch?CmpTokenDesc:CmpToken) ;
for
(i = 0; i < n; i++)
{
if
(pTable[i].pContains && !pTable[i].sText[0])
pTokenTable -> pContainsToken = &pTable[i] ;
if
(pTable[i].pEndTag)
{
char
* s = (
char
*)pTable[i].pEndTag ;
int
j ;
pTable[i].pEndTag = NULL ;
for
(j = 0; j < n; j++)
{
if
(
strcmp
(pTable[j].sName, s) == 0)
pTable[i].pEndTag = &pTable[j] ;
}
if
(pTable[i].pEndTag == NULL)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
" EndTag %s for %s not found"
, pTable[i].sText, s) ;
return
rcTokenNotFound ;
}
}
if
(pTable[i].pStartTag)
{
char
* s = (
char
*)pTable[i].pStartTag ;
int
j ;
pTable[i].pStartTag = NULL ;
for
(j = 0; j < n; j++)
{
if
(
strcmp
(pTable[j].sName, s) == 0)
pTable[i].pStartTag = &pTable[j] ;
}
if
(pTable[i].pStartTag == NULL)
{
strncpy
(r -> errdat1,
"BuildTokenHash"
,
sizeof
(r -> errdat1)) ;
sprintf
(r -> errdat2,
" StartTag %s for %s not found"
, pTable[i].sText, s) ;
return
rcTokenNotFound ;
}
}
}
p = &pTable[n] ;
p -> sText =
""
;
p -> nTextLen = 0 ;
p -> sEndText =
""
;
p -> pFollowedBy = NULL ;
p -> pInside = NULL ;
pTokenTable -> pTokens = pTable ;
pTokenTable -> numTokens = numTokens - 1 ;
return
ok ;
}
static
int
ExecParseTimeCode (
register
req * r,
struct
tToken * pToken,
char
* pCurr,
int
nLen,
int
nLinenumber)
{
SV * pSV ;
int
rc ;
const
char
* sPCode = pToken -> sParseTimePerlCode ;
int
plen =
strlen
(sPCode) ;
char
* sCode ;
const
char
* p ;
int
n ;
SV * args[2] ;
epTHX ;
if
((p = strnstr (sPCode,
"%%"
, nLen)))
{
sCode = parse_malloc (r, nLen + plen + 1) ;
n = p - sPCode ;
memcpy
(sCode, sPCode, n) ;
memcpy
(sCode + n, pCurr, nLen) ;
memcpy
(sCode + n + nLen, sPCode + n + 2, plen - n - 2) ;
nLen = nLen + plen - 2 ;
sCode[nLen] =
'\0'
;
}
else
{
sCode = (
char
*)sPCode ;
nLen = plen ;
}
if
(nLen && r -> Component.Config.bDebug & dbgParse)
lprintf (r -> pApp,
"[%d]PARSE: ParseTimeCode: %*.*s\n"
, r -> pThread -> nPid, nLen, nLen, sCode) ;
pSV = newSVpvf(
"package %s ;\nmy ($_ep_req) = @_;\n#line %d \"%s\"\n%*.*s"
,
"Embperl::Parser"
, nLinenumber, r -> Component.sSourcefile, nLen, nLen, sCode) ;
newSVpvf2(pSV) ;
args[0] = r -> _perlsv ;
if
((rc = EvalDirect (r, pSV, 1, args)) != ok)
LogError (r, rc) ;
SvREFCNT_dec(pSV);
return
rc ;
}
static
int
CmpTokenN (
const
void
* p1,
const
void
* p2)
{
struct
tTokenCmp * c = (
struct
tTokenCmp *)p1 ;
int
i ;
int
p1Len = c -> nLen ;
int
p2Len = ((
struct
tToken *)p2) -> nTextLen ;
if
((i = strnicmp (c -> pStart, *((
const
char
* *)p2), p1Len)) == 0)
{
if
(p1Len == p2Len)
return
0 ;
else
if
(p1Len > p2Len)
return
1 ;
return
-1 ;
}
return
i ;
}
static
int
ParseTokens (
register
req * r,
char
* * ppCurr,
char
* pEnd,
struct
tTokenTable * pTokenTable,
const
char
* sEndText,
const
unsigned
char
* pParentContains,
tNodeType nCDataType,
tNodeType nForceType,
int
bUnescape,
int
bInsideMustExist,
int
bRemoveSpaces,
tStringIndex nParentNodeName,
tNode xParentNode,
int
level,
char
* pCDATAStart,
const
char
* sStopText,
int
bDontEat)
{
unsigned
char
* pStartChars = pTokenTable -> cStartChars ;
struct
tTokenCmp c ;
int
nEndText = sEndText?
strlen
(sEndText):0 ;
char
* pCurr = *ppCurr ;
char
* pCurrStart = pCDATAStart?pCDATAStart:pCurr ;
tNode xNewNode ;
int
rc = 0 ;
tDomTree * pDomTree = DomTree_self (r -> Component.xCurrDomTree) ;
int
numInside = 0 ;
if
(nEndText == 0 && sStopText)
{
sEndText = sStopText ;
nEndText = sEndText?
strlen
(sEndText):0 ;
}
else
sStopText = NULL ;
while
(pCurr < pEnd)
{
struct
tToken * pToken = NULL ;
int
bFollow = 0 ;
if
(level == 0 && pTokenTable != r -> Component.pTokenTable)
{
pTokenTable = r -> Component.pTokenTable ;
pStartChars = pTokenTable -> cStartChars ;
}
if
(pStartChars [*pCurr >> 3] & 1 << (*pCurr & 7))
{
struct
tTokenTable * pNextTokenTab = pTokenTable ;
tStringIndex nNodeName = 0 ;
char
* pCurrTokenStart = pCurr ;
do
{
struct
tToken * pTokenTab = pNextTokenTab -> pTokens ;
int
numTokens = pNextTokenTab -> numTokens ;
unsigned
char
* pAllChars = pNextTokenTab -> cAllChars ;
bFollow++ ;
if
(pNextTokenTab -> bLSearch)
{
int
r = 1 ;
int
i ;
for
(i = 0, pToken = pTokenTab; i < numTokens; i++, pToken++)
{
if
(pToken -> bMatchAll && (numInside == 0 || pToken -> bMatchAll > 0))
{
r = 0 ;
break
;
}
if
(pToken -> nTextLen == 0)
continue
;
r = strnicmp (pCurr, pToken -> sText, pToken -> nTextLen) ;
if
(r == 0 ||
(*pCurr > *(pToken -> sText) && pStartChars[0] != 0xff))
break
;
}
if
(r != 0)
pToken = NULL ;
else
if
(!pToken -> bMatchAll && (pToken -> bDontEat & 1) == 0)
pCurr += pToken -> nTextLen ;
}
else
{
c.pStart = pCurr ;
while
(pAllChars [*pCurr >> 3] & (1 << (*pCurr & 7)))
pCurr++ ;
c.nLen = pCurr - c.pStart ;
pToken = (
struct
tToken *)
bsearch
(&c, pTokenTab, numTokens,
sizeof
(
struct
tToken), CmpTokenN) ;
if
(!pToken)
{
pCurr = c.pStart ;
}
}
if
(pToken)
{
numInside++ ;
if
(pToken -> bRemoveSpaces & 2)
while
(
isspace
(*pCurr))
pCurr++ ;
else
if
(pToken -> bRemoveSpaces & 8)
while
((*pCurr ==
' '
|| *pCurr ==
'\t'
|| *pCurr ==
'\r'
))
pCurr++ ;
if
(pToken -> sNodeName)
nNodeName = pToken -> nNodeName ;
}
else
{
pToken = pNextTokenTab -> pContainsToken ;
if
(pToken && pToken -> sNodeName)
nNodeName = pToken -> nNodeName ;
break
;
}
}
while
((pNextTokenTab = pToken -> pFollowedBy)) ;
if
(pToken)
{
struct
tTokenTable * pInside ;
if
(pCurrStart < pCurrTokenStart)
{
if
(nCDataType)
{
const
char
* pEnd = pCurrTokenStart - 1;
if
(pToken -> bRemoveSpaces & 1)
while
(pEnd >= pCurrStart &&
isspace
(*pEnd))
pEnd-- ;
else
if
(pToken -> bRemoveSpaces & 4)
while
(pEnd >= pCurrStart && (*pEnd ==
' '
|| *pEnd ==
'\t'
|| *pEnd ==
'\r'
))
pEnd-- ;
else
if
(pToken -> bRemoveSpaces & 16)
{
while
(pEnd >= pCurrStart &&
isspace
(*pEnd))
pEnd-- ;
if
(pEnd >= pCurrStart && pEnd < pCurrTokenStart - 1)
pEnd++ ;
}
if
(bUnescape)
{
int
newlen ;
r -> Component.bEscInUrl = bUnescape - 1 ;
newlen = TransHtml (r, pCurrStart, pEnd - pCurrStart + 1) ;
pEnd = pCurrStart + newlen - 1 ;
r -> Component.bEscInUrl = 0 ;
}
if
(pEnd - pCurrStart + 1)
if
(!(xNewNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, nCDataType, 0, pCurrStart, pEnd - pCurrStart + 1, level, GetLineNoOf (r, pCurrStart), NULL)))
return
1 ;
}
pCurrStart = pCurrTokenStart ;
}
if
(nNodeName == 0)
nNodeName = pToken -> nNodeName ;
if
(pToken -> nNodeType == ntypEndTag && level > 0)
{
tNodeData * pStartTag ;
char
* pEndCurr =
strstr
(pCurr, pToken -> sEndText) ;
if
(!pEndCurr && pToken -> sEndText[0] ==
'\n'
&& pToken -> sEndText[1] ==
'\n'
&& sEndText[2] ==
'\0'
)
{
pEndCurr =
strstr
(pCurr,
"\n\r\n"
) ;
if
(pEndCurr && pEndCurr[-1] ==
'\r'
)
pEndCurr-- ;
}
if
(pEndCurr)
{
tNode xNewAttrNode ;
if
(pEndCurr - pCurr && pToken -> nCDataType && pToken -> nCDataType != ntypCDATA)
{
char
* pEnd = pEndCurr ;
char
c;
if
(pToken -> bRemoveSpaces & 32)
while
(pEnd > pCurrStart &&
isspace
(*(pEnd-1)))
pEnd-- ;
else
if
(pToken -> bRemoveSpaces & 64)
while
(pEnd > pCurrStart && ((c = *(pEnd-1)) ==
' '
|| c ==
'\t'
|| c ==
'\r'
))
pEnd-- ;
if
(pToken -> bUnescape)
{
int
newlen ;
r -> Component.bEscInUrl = pToken -> bUnescape - 1 ;
newlen = TransHtml (r, pCurr, pEnd - pCurr) ;
pEnd = pCurr + newlen ;
r -> Component.bEscInUrl = 0 ;
}
if
(!(xNewAttrNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, pToken -> nCDataType, 0, pCurr, pEnd - pCurr, level+1, GetLineNoOf (r, pCurr), NULL)))
return
1 ;
if
(pToken -> bAddFlags)
Node_self (pDomTree, xNewAttrNode) -> bFlags |= pToken -> bAddFlags ;
}
pCurr = pEndCurr +
strlen
(pToken -> sEndText) ;
}
level-- ;
xParentNode = Node_parentNode (r -> pApp, pDomTree, xParentNode, 0) ;
pStartTag = Node_selfLastChild (r -> pApp, pDomTree, Node_self (pDomTree, xParentNode), 0) ;
if
(pStartTag -> nText != pToken -> nNodeName &&
(pToken -> pStartTag == NULL
|| pStartTag -> nText != pToken -> pStartTag -> nNodeName))
{
strncpy
(r -> errdat2, Ndx2String (pStartTag -> nText),
sizeof
(r -> errdat2)) ;
strncpy
(r -> errdat1, Ndx2String (pToken -> nNodeName),
sizeof
(r -> errdat1)) ;
r -> Component.pCurrPos = pCurrTokenStart ;
return
rcTagMismatch ;
}
}
else
{
if
(pToken -> nNodeType == ntypEndStartTag && level > 0)
{
xParentNode = Node_parentNode (r -> pApp, pDomTree, xParentNode, 0) ;
level-- ;
}
if
((pToken -> nNodeType && pToken -> nNodeType != ntypCDATA) || pToken -> sNodeName)
{
tNodeType nType = pToken -> nNodeType ;
if
(nType == ntypStartEndTag)
nType = ntypStartTag ;
if
(!(xNewNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, nType, (nCDataType == ntypAttrValue && pToken -> nNodeType != ntypAttr)?(pToken -> nForceType?2:1):0, NULL, nNodeName, level, GetLineNoOf (r, pCurrTokenStart), pToken -> sText)))
{
r -> Component.pCurrPos = pCurrTokenStart ;
return
rc ;
}
if
(pToken -> bAddFlags)
Node_self (pDomTree, xNewNode) -> bFlags |= pToken -> bAddFlags ;
if
(!pToken -> pInside)
bInsideMustExist = 0 ;
if
(pToken -> bAddFirstChild)
{
if
(!(Node_appendChild (r -> pApp, pDomTree, xNewNode, 0, nCDataType,
0,
""
, 0,
0, 0, NULL)))
{
return
rc ;
}
}
}
else
{
xNewNode = xParentNode ;
}
if
((pInside = pToken -> pInside))
{
rc = ParseTokens (r, &pCurr, pEnd, pInside,
pToken -> sEndText,
pToken -> pContains,
(tNodeType)(pToken -> nCDataType == ntypCDATA && !pToken -> sNodeName?ntypAttrValue:pToken -> nCDataType),
0,
pToken -> bUnescape,
pToken -> bInsideMustExist + bInsideMustExist,
pToken -> bRemoveSpaces,
nNodeName,
xNewNode,
level+1,
pToken -> nNodeType == ntypCDATA?pCurrTokenStart:NULL,
sEndText && *sEndText?sEndText:NULL,
pToken -> bDontEat) ;
if
(rc == ok)
bInsideMustExist = 0 ;
else
if
(pToken -> bInsideMustExist && rc == rcTokenNotFound)
{
rc = ok ;
if
(xNewNode != xParentNode)
{
Node_removeChild (r -> pApp, pDomTree, xParentNode, xNewNode, 0) ;
if
(r -> Component.Config.bDebug & dbgParse)
lprintf (r -> pApp,
"[%d]PARSE: DelNode: +%02d %*s parent=%d node=%d\n"
,
r -> pThread -> nPid, level, level * 2,
""
, xParentNode, xNewNode) ;
}
if
(!(xNewNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, (tNodeType)pTokenTable -> nDefNodeType, 0, pCurrStart, pCurr - pCurrStart, level, GetLineNoOf (r, pCurrStart), NULL)))
return
1 ;
}
else
if
(rc != rcTokenNotFound)
{
return
rc ;
}
if
(pToken -> nNodeType == ntypStartEndTag)
{
xParentNode = Node_parentNode (r -> pApp, pDomTree, xNewNode, 0) ;
pToken = NULL ;
bFollow = 2 ;
}
}
else
{
char
* pEndCurr ;
unsigned
char
* pContains ;
int
nSkip = 0 ;
if
((pContains = pToken -> pContains))
{
pEndCurr = pCurr ;
while
(pContains [*pEndCurr >> 3] & (1 << (*pEndCurr & 7)))
pEndCurr++ ;
nSkip = 0 ;
}
else
{
pEndCurr = NULL ;
if
(
strcmp
(pToken -> sEndText,
"\n\n"
) == 0)
{
if
((pEndCurr =
strstr
(pCurr,
"\n\r\n"
)))
{
if
(pEndCurr[-1] ==
'\r'
)
{
pEndCurr-- ;
nSkip = pCurr[4] ==
'\r'
?5:4 ;
}
else
nSkip = pCurr[3] ==
'\r'
?4:3 ;
}
}
if
(!pEndCurr)
{
pEndCurr =
strstr
(pCurr, pToken -> sEndText) ;
nSkip =
strlen
(pToken -> sEndText) ;
}
if
(pToken -> bDontEat & 2)
nSkip = 0 ;
if
(pToken -> nNodeType == ntypCDATA && pEndCurr && !pToken -> sNodeName)
{
pEndCurr += nSkip ;
nSkip = 0 ;
pCurr = pCurrTokenStart ;
}
}
if
(pEndCurr)
{
tNode xNewAttrNode ;
if
(pEndCurr - pCurr && pToken -> nCDataType)
{
int
nLine ;
char
* pEnd = pEndCurr ;
char
c;
if
(pToken -> bRemoveSpaces & 32)
while
(pEnd > pCurrStart &&
isspace
(*(pEnd-1)))
pEnd-- ;
else
if
(pToken -> bRemoveSpaces & 64)
while
(pEnd > pCurrStart && ((c = *(pEnd-1)) ==
' '
|| c ==
'\t'
|| c ==
'\r'
))
pEnd-- ;
if
(pToken -> bUnescape)
{
int
newlen ;
r -> Component.bEscInUrl = pToken -> bUnescape - 1 ;
newlen = TransHtml (r, pCurr, pEnd - pCurr) ;
pEnd = pCurr + newlen ;
r -> Component.bEscInUrl = 0 ;
}
if
(!(xNewAttrNode = Node_appendChild (r -> pApp, pDomTree, xNewNode, 0, pToken -> nCDataType,
0, pCurr, pEnd - pCurr, level+1,
nLine = GetLineNoOf (r, pCurr), pToken -> sText)))
return
1 ;
if
(pToken -> bAddFlags)
Node_self (pDomTree, xNewAttrNode) -> bFlags |= pToken -> bAddFlags ;
if
(pToken -> sParseTimePerlCode)
if
((rc = ExecParseTimeCode (r, pToken, pCurr, pEnd - pCurr, nLine)) != ok)
{
r -> Component.pCurrPos = pCurrTokenStart ;
return
rc ;
}
}
if
(pToken -> nNodeType == ntypStartEndTag)
{
xParentNode = Node_parentNode (r -> pApp, pDomTree, xNewNode, 0) ;
pToken = NULL ;
}
pCurr = pEndCurr + nSkip ;
}
}
if
(pToken && (pToken -> nNodeType == ntypStartTag ||
pToken -> nNodeType == ntypEndStartTag ||
pToken -> nNodeType == ntypStartEndTag))
{
if
(level++ > 1000)
{
r -> Component.pCurrPos = pCurrTokenStart ;
return
rcTooDeepNested ;
}
xParentNode = xNewNode ;
nCDataType = pTokenTable -> nDefNodeType ;
}
}
pCurrStart = pCurr ;
}
}
if
(pParentContains && ((pParentContains [*pCurr >> 3] & 1 << (*pCurr & 7)) == 0) )
{
if
(pCurr - pCurrStart && nCDataType)
{
if
(!(xNewNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, nCDataType, 0,
pCurrStart, pCurr - pCurrStart, level,
GetLineNoOf (r, pCurrStart), NULL)))
return
1 ;
}
*ppCurr = pCurr ;
return
bInsideMustExist?rcTokenNotFound:ok ;
}
else
if
(sEndText == NULL ||
((*pCurr == *sEndText && (
strncmp
(pCurr, sEndText, nEndText) == 0)) ||
(pCurr[0] ==
'\n'
&& pCurr[1] ==
'\r'
&& pCurr[2] ==
'\n'
&& sEndText[1] ==
'\n'
&& sEndText[2] ==
'\0'
)) ||
(pCurr[0] ==
'\r'
&& pCurr[1] ==
'\n'
&& pCurr[2] ==
'\r'
&& pCurr[3] ==
'\n'
&& sEndText[0] ==
'\n'
&& sEndText[1] ==
'\n'
&& sEndText[2] ==
'\0'
)
)
{
char
* pEnd ;
if
(pCDATAStart)
pCurr += nEndText ;
pEnd = pCurr - 1 ;
if
(bRemoveSpaces & 32)
while
(pEnd >= pCurrStart &&
isspace
(*pEnd))
pEnd-- ;
else
if
(bRemoveSpaces & 64)
while
(pEnd >= pCurrStart && (*pEnd ==
' '
|| *pEnd ==
'\t'
|| *pEnd ==
'\r'
))
pEnd-- ;
if
((pEnd - pCurrStart + 1 != 0 || nCDataType == ntypAttrValue) && nCDataType)
if
(!(xNewNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, nCDataType, 0,
pCurrStart, pEnd - pCurrStart + 1, level,
GetLineNoOf (r, pCurr), NULL)))
return
1 ;
if
(!pCDATAStart && !sStopText && (bDontEat & 2) == 0)
pCurr += nEndText ;
*ppCurr = pCurr ;
return
bInsideMustExist?rcTokenNotFound:ok ;
}
else
if
(!pToken && bFollow < 2)
pCurr++ ;
if
(pToken && (pToken -> bExitInside))
{
*ppCurr = pCurr ;
return
ok ;
}
}
if
(nCDataType && pCurr - pCurrStart)
if
(!(xNewNode = Node_appendChild (r -> pApp, pDomTree, xParentNode, 0, nCDataType, 0,
pCurrStart, pCurr - pCurrStart, level,
GetLineNoOf (r, pCurrStart), NULL)))
return
1 ;
*ppCurr = pCurr ;
return
bInsideMustExist?rcTokenNotFound:ok ;
}
static
int
embperl_ParseSource (
register
req * r,
char
* pSource,
size_t
nLen,
tDomTree * pDomTree)
{
char
* pStart = pSource ;
char
* pEnd = pSource + nLen ;
int
rc ;
tNode xDocNode ;
tNode xDocNode2 ;
tNode xNode ;
tTokenTable * pTokenTableSave ;
clock_t
cl1 =
clock
() ;
clock_t
cl2 ;
r -> Component.pBuf = (
char
*)pStart ;
r -> Component.pEndPos = (
char
*)pEnd ;
r -> Component.pSourcelinePos = r -> Component.pCurrPos = r -> Component.pBuf ;
if
(r -> Component.Config.bDebug & dbgParse)
lprintf (r -> pApp,
"[%d]PARSE: Start parsing %s DomTree = %d\n"
, r -> pThread -> nPid, r -> Component.sSourcefile, r -> Component.xCurrDomTree) ;
pDomTree -> xFilename = String2Ndx (r -> pApp, r -> Component.sSourcefile,
strlen
(r -> Component.sSourcefile)) ;
if
(!(xDocNode = Node_appendChild (r -> pApp, pDomTree, 0, 0, ntypTag, 0,
"attr"
, 3, 0, 0, NULL)))
return
rcOutOfMemory ;
if
(!(xDocNode = Node_appendChild (r -> pApp, pDomTree, 0, 0, (tNodeType)((r -> Component.pPrev)?ntypDocumentFraq:ntypDocument), 0,
NULL, r -> Component.pPrev?xDocumentFraq:xDocument, 0, 0, NULL)))
return
rcOutOfMemory ;
xDocNode2 = xDocNode ;
if
(r -> Component.pTokenTable -> sRootNode)
{
if
(!(Node_appendChild (r -> pApp, pDomTree, xDocNode, 0, ntypCDATA, 0,
""
, 0,
0, 0, NULL)))
return
rcOutOfMemory ;
if
(!(xDocNode2 = Node_appendChild (r -> pApp, pDomTree, xDocNode, 0, ntypStartTag, 0,
r -> Component.pTokenTable -> sRootNode,
strlen
(r -> Component.pTokenTable -> sRootNode),
0, 0, NULL)))
return
rcOutOfMemory ;
}
if
(!(xNode = Node_appendChild (r -> pApp, pDomTree, xDocNode, 0, ntypAttr, 0, NULL, xDomTreeAttr, 0, 0, NULL)))
return
rcOutOfMemory ;
if
(!(xNode = Node_appendChild (r -> pApp, pDomTree, xNode, 0, ntypAttrValue, 0, (
char
*)&(r -> Component.xCurrDomTree),
sizeof
(r -> Component.xCurrDomTree), 0, 0, NULL)))
return
rcOutOfMemory ;
if
(!(xNode = Node_appendChild (r -> pApp, pDomTree, xDocNode2, 0, ntypCDATA, 0,
""
, 0, 0, 0, NULL)))
return
rcOutOfMemory ;
pDomTree -> xDocument = xDocNode ;
pTokenTableSave = r -> Component.pTokenTable ;
if
((rc = ParseTokens (r, &pStart, pEnd, r -> Component.pTokenTable,
""
, NULL, (tNodeType)r -> Component.pTokenTable -> nDefNodeType, 0, 0, 0, 0, String2Ndx (r -> pApp,
"root"
, 4), xDocNode2, 0, NULL, NULL, 0)) != ok)
return
rc ;
if
(!(xNode = Node_appendChild (r -> pApp, pDomTree, xDocNode2, 0, ntypCDATA, 0,
""
, 0, 0, 0, NULL)))
return
rcOutOfMemory ;
r -> Component.pTokenTable = pTokenTableSave ;
#ifdef CLOCKS_PER_SEC
if
(r -> Component.Config.bDebug)
{
cl2 =
clock
() ;
lprintf (r -> pApp,
"[%d]PERF: Parse Start Time: %d ms \n"
, r -> pThread -> nPid, ((cl1 - r -> startclock) * 1000 / CLOCKS_PER_SEC)) ;
lprintf (r -> pApp,
"[%d]PERF: Parse End Time: %d ms \n"
, r -> pThread -> nPid, ((cl2 - r -> startclock) * 1000 / CLOCKS_PER_SEC)) ;
lprintf (r -> pApp,
"[%d]PERF: Parse Time: %d ms \n"
, r -> pThread -> nPid, ((cl2 - cl1) * 1000 / CLOCKS_PER_SEC)) ;
DomStats (r -> pApp) ;
}
#endif
return
ok ;
}
int
embperl_Parse (
register
req * r,
char
* pSource,
size_t
nLen,
tIndex * pxDomTree)
{
int
rc ;
tDomTree * pDomTree ;
if
(!(r -> Component.xCurrDomTree = DomTree_new (r -> pApp, &pDomTree)))
return
rcOutOfMemory ;
if
((rc = embperl_ParseSource (r, pSource, nLen, pDomTree)) != ok)
{
pDomTree = DomTree_self (r -> Component.xCurrDomTree) ;
*pxDomTree = r -> Component.xCurrDomTree = 0 ;
DomTree_delete (r -> pApp, pDomTree) ;
return
rc ;
}
*pxDomTree = r -> Component.xCurrDomTree ;
return
ok ;
}
int
ParseFile (
register
req * r)
{
char
* pStart = r -> Component.pBuf ;
char
* pEnd = r -> Component.pEndPos ;
tIndex xDomTree ;
return
embperl_Parse (r, pStart, pEnd - pStart, &xDomTree) ;
}