/* This is very specific module oriented to support fast text adding
* for XAO displaying engine. Helps a lot with template processing,
* especially when template splits into thousands or even millions of
* pieces.
*
* The idea is to have one long buffer that extends automatically and a
* stack of positions in it that can be pushed/popped when application
* need new portion of text.
*
* Andrew Maltsev, <am@xao.com>, 2000, 2002
*/
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include <string.h>
#include <ctype.h>
/* Workaround for older versions of perl that do not define these macros
*/
#ifndef pTHX_
#define pTHX_
#endif
#ifndef aTHX_
#define aTHX_
#endif
/************************************************************************/
#define MAX_STACK 200
#define CHUNK_SIZE 1000
static char *buffer=NULL;
static STRLEN bufsize=0;
static STRLEN bufpos=0;
static STRLEN pstack[MAX_STACK];
static unsigned stacktop=0;
/************************************************************************/
/* Allows letters, digits, underscore and dot
*/
static int
isalnum_dot(int c) {
return isalnum(c) || c=='.' || c=='_';
}
/* Parsing template into an array suitable for Web::Page
*/
static SV*
parse_text(pTHX_ char * template, STRLEN length, short is_unicode) {
AV* parsed=newAV();
char *str=template;
char *text_ptr=template;
char *end=template+length;
if(!length) {
return newRV_noinc((SV*)parsed);
}
while(str<end) {
char var_flag;
HV* hv;
SV* sv;
/* Simple parser with basically just two states -- text and
* object, instead of tracking states we just have two separate
* loops for each one.
*
* First is text.
*/
while(1) {
if(*str=='<' && str+1<end && (str[1]=='%' || str[1]=='$')) {
if(str+3<end && str[2]==str[1] && str[3]=='>') {
/* A way to embed '<%' or '<$' -- <%%> or <$$> */
hv=newHV();
sv=newSVpvn(text_ptr,str+2-text_ptr);
if(is_unicode) SvUTF8_on(sv);
hv_store(hv,"text",4,sv,0);
av_push(parsed,newRV_noinc((SV*)hv));
str+=4;
text_ptr=str;
}
else {
if(text_ptr!=str) {
hv=newHV();
sv=newSVpvn(text_ptr,str-text_ptr);
if(is_unicode) SvUTF8_on(sv);
hv_store(hv,"text",4,sv,0);
av_push(parsed,newRV_noinc((SV*)hv));
}
break;
}
}
else if(*str=='<' && str+4<end && str[1]=='!' && str[2]=='-' && str[3]=='-' && str[4]!='/' && str[4]!='[' && str[4]!='>' && str[4]!='<') {
if(text_ptr!=str) {
hv=newHV();
sv=newSVpvn(text_ptr,str-text_ptr);
if(is_unicode) SvUTF8_on(sv);
hv_store(hv,"text",4,sv,0);
av_push(parsed,newRV_noinc((SV*)hv));
}
str+=4;
while(str+2<end && (*str!='-' || str[1]!='-' || str[2]!='>')) str++;
if(str+2>=end) {
av_clear(parsed);
return newSVpvf("Unclosed comment at position %ld (%*s)",
str-template,
(int)(end-str>10 ? 10 : end-str),str);
}
str+=3;
text_ptr=str;
}
else {
str++;
if(str>=end) {
if(text_ptr!=str) {
hv=newHV();
sv=newSVpvn(text_ptr,str-text_ptr);
if(is_unicode) SvUTF8_on(sv);
hv_store(hv,"text",4,sv,0);
av_push(parsed,newRV_noinc((SV*)hv));
str+=2;
}
break;
}
}
}
/* Bailing out if we're at the end
*/
if(str>=end)
break;
/* And now we're in the object or variable. Getting its name.
*/
var_flag=str[1] == '$' ? 1 : 0;
str+=2;
while(str<end && isspace(*str)) str++;
text_ptr=str;
while(str<end && (isalnum_dot(*str) || *str==':')) str++;
/* End object is a special case, we stop parsing if we meet it
* and do not even look what's behind it. That helps if there
* are some elements with broken syntax after the <%End%> that the
* developer intended to ignore.
*/
if(!var_flag && str-text_ptr==3 && !strncmp(text_ptr,"End",3)) {
return newRV_noinc((SV*)parsed);
}
/* Storing the name
*/
hv=newHV();
hv_store(hv,var_flag ? "varname" : "objname",7,
newSVpvn(text_ptr,str-text_ptr),0);
while(str<end && isspace(*str)) str++;
/* Flag after the name if present -- <%VAR/f%>
*/
if(*str=='/') {
text_ptr=++str;
while(str<end && isalnum(*str)) str++;
hv_store(hv,"flag",4,newSVpvn(text_ptr,1),0);
while(str<end && isspace(*str)) str++;
}
/* And finally, if that's a variable we're looking for the
* closing bracket, if that's an object -- we're scanning its
* arguments.
*/
if(var_flag) {
if(*str=='$' && str+1<end && str[1]=='>') {
str+=2;
text_ptr=str;
av_push(parsed,newRV_noinc((SV*)hv));
continue;
}
else {
av_clear(parsed);
return newSVpvf("Variable is not closed in template, pos=%ld (%*s)",
str-template,
(int)(end-str>10 ? 10 : end-str),str);
}
}
else {
HV* args=newHV();
while(1) {
char * name_end;
if(*str=='%' && str+1<end && str[1]=='>') {
str+=2;
text_ptr=str;
break;
}
/* Argument name
*/
text_ptr=str;
while(str<end && isalnum_dot(*str)) str++;
if(str==text_ptr) {
av_clear(parsed);
return newSVpvf("Wrong argument name, pos=%ld (%*s)",
str-template,
(int)(end-str>10 ? 10 : end-str),str);
}
/* Empty argument value gets replaced with 'on' text for
* compatibility
*/
name_end=str;
if(str==end || *str!='=') {
AV* tav=newAV();
HV* thv=newHV();
hv_store(thv,"text",4,newSVpvn("on",2),0);
av_push(tav,newRV_noinc((SV*)thv));
hv_store(args,text_ptr,name_end-text_ptr,
newRV_noinc((SV*)tav),0);
}
/* We get here only when there is '=' sign in the str
* position and therefore we expect an argument.
*/
else {
char * val_start;
char * val_end;
char literal;
str++;
while(str<end && isspace(*str)) str++;
if(str==end) {
av_clear(parsed);
return newSVpvf("Unclosed object in template, pos=%ld (..%*s)",
str-template,
(int)(length>10 ? 10 : length),
length>10 ? end-10 : template);
}
else if(*str=='"') {
val_start=++str;
while(str<end && *str!='"') str++;
if(str==end) {
av_clear(parsed);
return newSVpvf("Unmatched \" in the argument, pos=%ld (%*s)",
val_start-template,
(int)(end-val_start>10 ? 10 : end-val_start),val_start);
}
val_end=str++;
literal=0;
}
else if(*str=='\'') {
val_start=++str;
while(str<end && *str!='\'') str++;
if(str==end) {
av_clear(parsed);
return newSVpvf("Unmatched ' in the argument, pos=%ld (%*s)",
val_start-template,
(int)(end-val_start>10 ? 10 : end-val_start),val_start);
}
val_end=str++;
literal=1;
}
else if(*str=='{' && str+1<end && str[1]=='\'') {
unsigned count=0;
str+=2;
val_start=str;
while(str<end && (count || *str!='\'' || str+1>=end || str[1]!='}')) {
if(*str=='{' && str+1<end && str[1]=='\'') {
count++;
str+=2;
}
else if(*str=='\'' && str+1<end && str[1]=='}') {
count--;
str+=2;
}
else {
str++;
}
}
if(str==end) {
av_clear(parsed);
return newSVpvf("Unmatched {' in the argument, pos=%ld (%*s)",
val_start-template,
(int)(end-val_start>10 ? 10 : end-val_start),val_start);
}
val_end=str;
str+=2;
literal=1;
}
else if(*str=='{') {
unsigned count=0;
val_start=++str;
while(str<end && (count || *str!='}')) {
if(*str=='{') {
count++;
}
else if(*str=='}') {
count--;
}
str++;
}
if(str==end) {
av_clear(parsed);
return newSVpvf("Unmatched { in the argument, pos=%ld (%*s)",
val_start-template,
(int)(end-val_start>10 ? 10 : end-val_start),val_start);
}
val_end=str++;
literal=0;
}
else {
/* We have to count <%%> to be compatible with older
* code -- there are cases where there are no
* quotes for both simple things like '<%A b=4%>' and
* references like '<%A b=<%C/f%>%>'.
*
* There is no similar provision for <$A$> to
* discourage from using unquoted values.
*/
unsigned count=0;
val_start=str;
while(str<end && (count || !isspace(*str))) {
if(str+1<end) {
if(*str=='<' && str[1]=='%') {
count++;
str++;
}
else if(*str=='%' && str[1]=='>') {
if(!count) break;
count--;
str++;
}
}
str++;
}
val_end=str;
literal=0;
}
if(literal) {
sv=newSVpvn(val_start,val_end-val_start);
if(is_unicode) SvUTF8_on(sv);
hv_store(args,
text_ptr,name_end-text_ptr,
sv,
0);
}
else {
SV* val=parse_text(aTHX_ val_start,val_end-val_start,is_unicode);
if(SvROK(val)) {
hv_store(args,text_ptr,name_end-text_ptr,
val,0);
}
else {
av_clear(parsed);
return val;
}
}
}
while(str<end && isspace(*str)) str++;
}
hv_store(hv,"args",4,newRV_noinc((SV*)args),0);
av_push(parsed,newRV_noinc((SV*)hv));
}
}
return newRV_noinc((SV*)parsed);
}
/************************************************************************/
MODULE = XAO::PageSupport PACKAGE = XAO::PageSupport
###############################################################################
unsigned
level()
CODE:
RETVAL=stacktop;
OUTPUT:
RETVAL
void
reset()
CODE:
bufpos=pstack[stacktop=0]=0;
void
push()
CODE:
if(stacktop+1>=MAX_STACK) {
fprintf(stderr,"XAO::PageSupport - maximum stack deep reached!\n");
return;
}
pstack[stacktop++]=bufpos;
SV *
pop(is_unicode)
short is_unicode;
CODE:
char *text;
STRLEN len;
if(!buffer) {
text="";
len=0;
}
else {
len=bufpos;
if(stacktop) {
bufpos=pstack[--stacktop];
len-=bufpos;
} else {
bufpos=0;
}
text=buffer+bufpos;
}
RETVAL=newSVpvn(text,len);
if(is_unicode)
SvUTF8_on(RETVAL);
OUTPUT:
RETVAL
unsigned long
bookmark()
CODE:
RETVAL=bufpos;
OUTPUT:
RETVAL
SV *
peek(len, is_unicode)
unsigned long len;
short is_unicode;
CODE:
if(!buffer || len>bufpos) {
RETVAL=newSVpvn("",0);
}
else {
RETVAL=newSVpvn(buffer+len,bufpos-len);
}
if(is_unicode)
SvUTF8_on(RETVAL);
OUTPUT:
RETVAL
void
addtext(text)
STRLEN len=0;
char * text=SvPV(ST(0),len);
CODE:
if(text && len) {
if(bufpos+len >= bufsize) {
buffer=realloc(buffer,sizeof(*buffer)*(bufsize+=len+CHUNK_SIZE));
if(! buffer) {
fprintf(stderr,
"XAO::PageSupport - out of memory, length=%lu, bufsize=%lu, bufpos=%lu\n",
(unsigned long)len,(unsigned long)bufsize,(unsigned long)bufpos);
return;
}
}
memcpy(buffer+bufpos,text,len);
bufpos+=len;
}
SV *
parse(template,is_unicode)
STRLEN length=0;
char *template=SvPV(ST(0),length);
short is_unicode;
CODE:
RETVAL=parse_text(aTHX_ template, length, is_unicode);
OUTPUT:
RETVAL