#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
#define XML_DEBUG 0
#define XML_DEVEL 0
#include "xmlfast.h"
#if XML_DEBUG
#define WHERESTR " at %s line %d.\n"
#define WHEREARG __FILE__, __LINE__
#define debug(...) do{ fprintf(stderr, __VA_ARGS__); fprintf(stderr, WHERESTR, WHEREARG); } while(0)
#else
#define debug(...)
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#ifndef ptr_t
typedef
void
* ptr_t;
#endif
#ifndef PERL_ARGS_ASSERT_SV_RECODE_TO_UTF8
#define PERL_ARGS_ASSERT_SV_RECODE_TO_UTF8
#endif
typedef
struct
{
char
*name;
unsigned
int
len;
char
*fullname;
unsigned
int
fulllen;
} xml_node;
#define UTF8_BYTES 1
#define UTF8_UPGRADE 2
#define UTF8_DECODE 3
#define UTF8_EDECODE 4
#define EMIT_WARNS 0x0001
#define TAG_MATCH 0x0002
#define MODE_ORDER 0x1000
#define MODE_TRIM 0x2000
#define MODE_ARRAYS 0x4000
typedef
struct
{
unsigned
int
flags;
unsigned
int
bytes;
unsigned
int
utf8;
SV * attr;
SV * text;
SV * join;
SV * cdata;
SV * comm;
HV * array;
char
*encoding;
SV *encode;
int
depth;
unsigned
int
chainsize;
xml_node * chain;
HV ** hchain;
HV * hcurrent;
SV * pi;
SV * attrname;
SV * textval;
SV * error;
parser_state * state;
} parsestate;
typedef
struct
{
unsigned
int
flags;
unsigned
int
bytes;
unsigned
int
utf8;
char
* attr; STRLEN attl;
char
* text;
char
* join;
char
* cdata;
char
* comm;
HV * array;
char
*encoding;
SV *encode;
unsigned
int
chainsize;
xml_node * chain;
HV ** hchain;
HV * hcurrent;
SV * pi;
SV * attrname;
SV * textval;
SV * error;
I32 depth;
I32 ix;
SV * result;
SV * nested;
} compstate;
#define hv_store_a( hv, key, sv ) \
STMT_START { \
SV **exists; \
char
*kv = SvPV_nolen(key); \
int
kl = SvCUR(key); \
if
( exists = hv_fetch(hv, kv, kl, 0) ) { \
if
( SvROK(*exists) && SvTYPE( SvRV(*exists) ) == SVt_PVAV) { \
AV *av = (AV *) SvRV( *exists ); \
av_push( av, sv ); \
} \
else
{ \
AV *av = newAV(); \
if
(SvROK(*exists)) { \
SvREFCNT_inc(*exists); \
av_push( av, *exists ); \
}
else
{ \
SV *old = newSV(0); \
sv_copypv(old, *exists); \
av_push( av, old ); \
} \
av_push( av, sv ); \
(
void
) hv_store( hv, kv, kl, newRV_noinc( (SV *) av ), 0 ); \
} \
}
else
{ \
(
void
) hv_store(hv, kv, kl, sv, 0); \
} \
} STMT_END
#define hv_store_aa( hv, key, sv ) \
STMT_START { \
SV **exists; \
char
*kv = SvPV_nolen(key); \
int
kl = SvCUR(key); \
if
( ( exists = hv_fetch(hv, kv, kl, 0) ) && SvROK(*exists) && (SvTYPE( SvRV(*exists) ) == SVt_PVAV) ) { \
AV *av = (AV *) SvRV( *exists ); \
av_push( av, sv ); \
} \
else
{ \
AV *av = newAV(); \
av_push( av, sv ); \
(
void
) hv_store( hv, kv, kl, newRV_noinc( (SV *) av ), 0 ); \
} \
} STMT_END
#define xml_sv_decode(ctx, sv) \
STMT_START { \
if
(!ctx->bytes && !SvUTF8(sv)) {\
if
(ctx->utf8 == UTF8_UPGRADE) { \
SvUTF8_on(sv); \
} \
else
if
(ctx->utf8 == UTF8_DECODE) { \
sv_utf8_decode(sv); \
} \
else
if
(ctx->encode) { \
(
void
) sv_recode_to_utf8(sv, ctx->encode); \
} \
}\
} STMT_END
void
on_bytes_charset_part(
void
* pctx,
char
* data, unsigned
int
length);
void
on_bytes_charset(
void
* pctx,
char
* data, unsigned
int
length);
void
on_tag_close(
void
* pctx,
char
* data, unsigned
int
length);
static
inline
void
my_warn(parsestate *ctx,
char
* format, ...) {
if
(!(ctx->flags & EMIT_WARNS))
return
;
va_list
va;
va_start
(va,format);
SV *text = sv_2mortal(newSVpvn(
""
,0));
sv_vcatpvf(text, format, &va);
va_end
(va);
warn(
"%s"
,SvPV_nolen(text));
}
static
inline
void
DESTROY (parsestate *ctx) {
if
(ctx->encode) { SvREFCNT_dec(ctx->encode); ctx->encode = 0; }
if
(ctx->textval) { SvREFCNT_dec(ctx->textval); ctx->textval = 0; }
if
(ctx->pi) { SvREFCNT_dec(ctx->pi); ctx->pi = 0; }
if
(ctx->attrname) { SvREFCNT_dec(ctx->attrname); ctx->attrname = 0; }
if
(ctx->array) { SvREFCNT_dec(ctx->array); ctx->array = 0; }
if
(ctx->depth > -1) {
int
currdepth = ctx->depth;
while
(ctx->depth > -1) {
on_tag_close(ctx,ctx->chain->name,ctx->chain->len);
if
(currdepth == ctx->depth) {
my_warn(ctx,
"Recursion during autoclose tags. depth=%d\n"
,ctx->depth);
break
;
}
}
}
if
(ctx->hchain) { Safefree(ctx->hchain); ctx->hchain = 0; }
if
(ctx->chain) { Safefree(ctx->chain); ctx->chain = 0; }
}
static
inline
void
my_croak(parsestate *ctx,
char
* format, ...) {
DESTROY(ctx);
va_list
va;
va_start
(va,format);
SV *text = sv_2mortal(newSVpvn(
""
,0));
sv_vcatpvf(text, format, &va);
va_end
(va);
croak(
"%s"
,SvPV_nolen(text));
}
SV * find_encoding(
char
* encoding) {
dSP;
int
count;
ENTER;
SAVETMPS;
PUSHMARK(SP);
XPUSHs(sv_2mortal(newSVpv(encoding, 0)));
PUTBACK;
count = call_pv(
"Encode::find_encoding"
,G_SCALAR);
SPAGAIN;
if
(SvTRUE(ERRSV)) {
warn(
"Shit happens: %s\n"
, SvPV_nolen(ERRSV));
(
void
) POPs;
}
if
(count != 1)
croak(
"find_encoding fault: bad number of returned values: %d"
,count);
SV *encode = POPs;
SvREFCNT_inc(encode);
PUTBACK;
FREETMPS;
LEAVE;
return
encode;
}
SV * get_constant(
char
* name) {
dSP;
int
count;
ENTER;
SAVETMPS;
PUSHMARK(SP);
PUTBACK;
count = call_pv(name,G_SCALAR);
SPAGAIN;
if
(count != 1)
croak(
"Bad number of returned values: %d"
,count);
SV *value = POPs;
sv_dump(value);
SvREFCNT_inc(value);
PUTBACK;
FREETMPS;
LEAVE;
return
value;
}
SV * sv_recode_from_utf8(pTHX_ SV *sv, SV *encoding) {
dVAR;
PERL_ARGS_ASSERT_SV_RECODE_TO_UTF8;
if
(SvPOK(sv) && SvUTF8(sv) && SvROK(encoding)) {
SV *bytes;
dSP;
ENTER;
SAVETMPS;
save_re_context();
PUSHMARK(sp);
EXTEND(SP, 3);
XPUSHs(encoding);
XPUSHs(sv);
XPUSHs(sv_2mortal(newSViv(4)));
PUTBACK;
call_method(
"encode"
, G_SCALAR);
SPAGAIN;
bytes = POPs;
SvREFCNT_inc(bytes);
PUTBACK;
FREETMPS;
LEAVE;
return
bytes;
}
return
SvPOKp(sv) ? sv : NULL;
}
void
on_comment(
void
* pctx,
char
* data,unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_comment"
);
#endif
parsestate *ctx = pctx;
SV *sv = newSVpvn(data, length);
hv_store_a(ctx->hcurrent, ctx->comm, sv );
}
void
on_pi_attr(parsestate *ctx) {
if
(
(SvCUR(ctx->attrname) == 8) &&
(
memcmp
(SvPV_nolen(ctx->attrname),
"encoding"
, 8) == 0 )
) {
ctx->encoding = (
char
*) SvPV_nolen(ctx->textval);
if
( ( SvCUR(ctx->textval) == 5 ) && ( strncasecmp( ctx->encoding,
"utf-8"
,5 ) == 0 ) ) {
if
(ctx->bytes) ctx->utf8 = UTF8_BYTES;
}
else
{
ctx->encode = find_encoding(ctx->encoding);
ctx->utf8 = 0;
if
(ctx->text) {
ctx->state->cb.bytes = on_bytes_charset;
ctx->state->cb.bytespart = on_bytes_charset_part;
}
}
}
else
{
}
sv_2mortal(ctx->attrname);
sv_2mortal(ctx->textval);
ctx->attrname = ctx->textval = 0;
}
static
inline
SV * mkchr(UV chr) {
char
*end, utf[UTF8_MAXBYTES + 1];
SV *tmp;
end = uvchr_to_utf8(utf, chr);
*end =
'\0'
;
tmp = newSVpvn(utf, end-utf);
SvUTF8_on(tmp);
return
tmp;
}
void
on_uchar(
void
* pctx,
wchar_t
chr) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_text_part"
);
#endif
parsestate *ctx = pctx;
dTHX;
if
( !( ctx->attrname || ctx->text ) )
return
;
if
(!ctx->utf8 && ctx->bytes && !UTF8_IS_INVARIANT(chr) ) {
if
(!ctx->encode)
my_croak(ctx,
"Can't decode entities in non-utf8, bytes mode"
);
SV *tmp = mkchr(chr);
SV *bytes = sv_recode_from_utf8(aTHX_ tmp, ctx->encode);
if
(SvCUR(bytes) == 0) {
my_warn(ctx,
"Can't recode U+%04d entity into %s in bytes mode"
, chr, ctx->encoding);
if
(ctx->textval) {
sv_catpvn(ctx->textval,
"?"
,1);
}
else
{
ctx->textval = newSVpvn(
"?"
,1);
}
sv_2mortal(tmp);
sv_2mortal(bytes);
return
;
}
if
(ctx->textval) {
sv_catsv(ctx->textval,bytes);
sv_2mortal(bytes);
}
else
{
ctx->textval = bytes;
}
return
;
}
else
{
char
*start, *end;
STRLEN len = 0;
if
(ctx->textval) {
len = SvCUR(ctx->textval);
}
else
{
ctx->textval = newSVpvn(
""
,0);
}
sv_grow(ctx->textval, len + UTF8_MAXBYTES + 1 );
start = end = SvEND(ctx->textval);
end = uvchr_to_utf8(start, chr);*end =
'\0'
;
SvCUR_set(ctx->textval,len + end - start);
}
}
void
on_bytes_charset_part(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_bytes_charset_part"
);
#endif
parsestate *ctx = pctx;
if
( !( ctx->attrname || ctx->text ) )
return
;
if
(!length)
return
;
SV *tmp = newSVpvn(data, length);
xml_sv_decode(ctx,tmp);
if
(ctx->textval) {
sv_catsv(ctx->textval, tmp);
sv_2mortal(tmp);
}
else
{
ctx->textval = tmp;
}
}
void
on_bytes_charset(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_bytes"
);
#endif
parsestate *ctx = pctx;
if
( !( ctx->attrname || ctx->text ) )
return
;
SV *tmp = newSVpvn(data, length);
xml_sv_decode(ctx,tmp);
if
(ctx->textval) {
sv_catsv(ctx->textval, tmp);
sv_2mortal(tmp);
}
else
{
ctx->textval = tmp;
}
if
(ctx->attrname) {
if
(ctx->pi) {
on_pi_attr(ctx);
}
else
{
hv_store_a(ctx->hcurrent, ctx->attrname, ctx->textval);
}
sv_2mortal(ctx->attrname);
ctx->attrname = 0;
ctx->textval = 0;
}
else
{
hv_store_a(ctx->hcurrent, ctx->text, ctx->textval);
}
ctx->textval = 0;
}
void
on_bytes_part(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_bytes_part"
);
#endif
parsestate *ctx = pctx;
if
( !( ctx->attrname || ctx->text ) )
return
;
if
(ctx->textval) {
if
(length > 0) { sv_catpvn(ctx->textval, data, length); }
}
else
{
ctx->textval = newSVpvn(data, length);
}
}
void
on_bytes(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_bytes"
);
#endif
parsestate *ctx = pctx;
if
( !( ctx->attrname || ctx->text ) )
return
;
if
(ctx->textval) {
if
(length > 0) { sv_catpvn(ctx->textval, data, length); }
}
else
{
ctx->textval = newSVpvn(data, length);
}
xml_sv_decode(ctx,ctx->textval);
if
(ctx->attrname) {
if
(ctx->pi) {
on_pi_attr(ctx);
}
else
{
hv_store_a(ctx->hcurrent, ctx->attrname, ctx->textval);
}
sv_2mortal(ctx->attrname);
ctx->attrname = 0;
ctx->textval = 0;
}
else
{
hv_store_a(ctx->hcurrent, ctx->text, ctx->textval);
}
ctx->textval = 0;
}
void
on_cdata(
void
* pctx,
char
* data,unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_cdata"
);
#endif
parsestate *ctx = pctx;
SV *sv = newSVpvn(data, length);
xml_sv_decode(ctx,sv);
hv_store_a(ctx->hcurrent, ctx->cdata, sv );
}
void
on_pi_open(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_pi_open"
);
#endif
parsestate *ctx = pctx;
ctx->pi = newSVpvn(data,length);
}
void
on_pi_close(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_pi_close"
);
#endif
parsestate *ctx = pctx;
sv_2mortal(ctx->pi);
ctx->pi = 0;
}
void
on_tag_open(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_tag_open"
);
#endif
parsestate *ctx = pctx;
if
(ctx->textval) {
xml_sv_decode(ctx,ctx->textval);
hv_store_a(ctx->hcurrent, ctx->text, ctx->textval);
ctx->textval = 0;
}
HV * hv = newHV();
ctx->depth++;
if
(ctx->depth >= ctx->chainsize) {
warn(
"XML depth too high. Consider increasing `_max_depth' to at more than %d to avoid reallocations"
,ctx->chainsize);
ctx->chainsize *= 2;
Renew( ctx->hchain, ctx->chainsize, HV* );
Renew( ctx->chain, ctx->chainsize, xml_node);
}
ctx->chain[ctx->depth].len = length;
ctx->chain[ctx->depth].name = data;
if
(ctx->flags & TAG_MATCH) {
if
(ctx->depth == 0) {
ctx->chain[ctx->depth].fulllen = length + 1;
Newx(ctx->chain[ctx->depth].fullname, ctx->chain[ctx->depth].fulllen + 1,
char
);
ctx->chain[ctx->depth].fullname[0] =
'/'
;
memcpy
(ctx->chain[ctx->depth].fullname+1,data,length);
ctx->chain[ctx->depth].fullname[length+1] = 0;
}
else
{
ctx->chain[ctx->depth].fulllen = ctx->chain[ctx->depth - 1].fulllen + length + 1;
Newx(ctx->chain[ctx->depth].fullname, ctx->chain[ctx->depth].fulllen + 1,
char
);
memcpy
(
ctx->chain[ctx->depth].fullname,
ctx->chain[ctx->depth - 1].fullname,
ctx->chain[ctx->depth - 1].fulllen
);
ctx->chain[ctx->depth].fullname[ ctx->chain[ctx->depth - 1].fulllen ] =
'/'
;
memcpy
(
ctx->chain[ctx->depth].fullname + ctx->chain[ctx->depth - 1].fulllen + 1,
data,
length
);
ctx->chain[ctx->depth].fullname[ ctx->chain[ctx->depth].fulllen ] = 0;
}
}
ctx->hchain[ ctx->depth ] = ctx->hcurrent;
ctx->hcurrent = hv;
}
void
on_tag_close(
void
* pctx,
char
* data, unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_tag_close"
);
#endif
parsestate *ctx = pctx;
SV *tag = sv_2mortal(newSVpvn(data,length));
SV **text;
I32 keys = HvKEYS(ctx->hcurrent);
SV *svtext = 0;
if
(ctx->textval) {
xml_sv_decode(ctx,ctx->textval);
hv_store_a(ctx->hcurrent, ctx->text, ctx->textval);
ctx->textval = 0;
}
if
(ctx->depth < 0) {
my_warn(ctx,
"Ignore unbalanced tag: closed upper than root"
);
return
;
}
if
( (ctx->chain[ctx->depth].len != length) || (
memcmp
( ctx->chain[ctx->depth].name, data, length) != 0) ) {
int
close, depth = ctx->depth;
my_warn(ctx,
"Unbalanced close tag <%s> depth=%d\n"
, SvPV_nolen(tag),depth);
int
checkdepth = depth + 10;
while
(depth > -1 && checkdepth-- > 0) {
if
( (ctx->chain[depth].len == length) && (
memcmp
( ctx->chain[depth].name, data, length) == 0)) {
for
(close = ctx->depth; close >= depth; close--) {
my_warn(ctx,
"Force tag close <%.*s> at depth %u"
, ctx->chain[close].len, ctx->chain[close].name, close);
on_tag_close(pctx, ctx->chain[close].name, (STRLEN)ctx->chain[close].len);
}
depth = -1;
break
;
}
depth--;
}
if
(depth != -1) {
my_warn(ctx,
"Found no open tag for %s. Ignored"
, SvPV_nolen(tag));
}
return
;
}
if
(ctx->text) {
text = hv_fetch(ctx->hcurrent, SvPV_nolen(ctx->text), SvCUR(ctx->text), 0);
if
(text && SvOK(*text)) {
if
(SvROK(*text) && SvTYPE( SvRV(*text) ) == SVt_PVAV) {
AV *av = (AV *) SvRV( *text );
SV **val;
I32 len = 0, avlen = av_len(av) + 1;
if
(ctx->join) {
svtext = newSVpvn(
""
,0);
if
(SvCUR(ctx->join)) {
for
( len = 0; len < avlen; len++ ) {
if
( ( val = av_fetch(av,len,0) ) && SvOK(*val) ) {
if
(len > 0) { sv_catsv(svtext,ctx->join); }
sv_catsv(svtext,*val);
}
}
}
else
{
for
( len = 0; len < avlen; len++ ) {
if
( ( val = av_fetch(av,len,0) ) && SvOK(*val) ) {
sv_catsv(svtext,*val);
}
}
}
SvREFCNT_inc(svtext);
(
void
) hv_store(ctx->hcurrent, SvPV_nolen(ctx->text), SvCUR(ctx->text), svtext, 0);
}
else
{
svtext = newRV( (SV *) av );
}
}
else
{
svtext = *text;
SvREFCNT_inc(svtext);
}
}
}
if
(ctx->depth > -1) {
HV *hv = ctx->hcurrent;
ctx->hcurrent = ctx->hchain[ ctx->depth ];
ctx->hchain[ ctx->depth ] = (HV *)NULL;
ctx->depth--;
if
(keys == 0) {
SvREFCNT_dec(hv);
SV *sv = newSVpvn(
""
,0);
if
(ctx->flags & MODE_ARRAYS) {
hv_store_aa(ctx->hcurrent, tag, sv);
}
else
if
(ctx->array && (hv_exists(ctx->array, data,length ) )) {
hv_store_aa(ctx->hcurrent, tag, sv);
}
else
{
hv_store_a(ctx->hcurrent, tag, sv);
}
}
else
if
(keys == 1 && svtext) {
SvREFCNT_inc(svtext);
SvREFCNT_dec(hv);
if
(ctx->flags & MODE_ARRAYS) {
hv_store_aa(ctx->hcurrent, tag, svtext);
}
else
if
(ctx->array && (hv_exists(ctx->array, data,length ) )) {
hv_store_aa(ctx->hcurrent, tag, svtext);
}
else
{
hv_store_a(ctx->hcurrent, tag, svtext);
}
}
else
{
SV *sv = newRV_noinc( (SV *) hv );
if
(ctx->flags & MODE_ARRAYS) {
hv_store_aa(ctx->hcurrent, tag, sv);
}
else
if
(ctx->array && ( hv_exists(ctx->array, data,length ) )) {
hv_store_aa(ctx->hcurrent, tag, sv);
}
else
{
hv_store_a(ctx->hcurrent, tag, sv);
}
}
if
(svtext) SvREFCNT_dec(svtext);
}
else
{
my_croak(ctx,
"Bad depth: %d for tag close %s\n"
,ctx->depth,SvPV_nolen(tag));
}
}
void
on_attr_name(
void
* pctx,
char
* data,unsigned
int
length) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_attr_name"
);
#endif
parsestate *ctx = pctx;
if
(ctx->textval) {
my_croak(ctx,
"Have textval=%s, while called attrname\n"
,SvPV_nolen(ctx->textval));
}
if
(ctx->attrname) {
my_croak(ctx,
"Called attrname '%-.*s'=, while have attrname='%-.*s'\n"
,length,data,SvCUR(ctx->attrname),SvPVX(ctx->attrname));
}
if
(ctx->pi) {
ctx->attrname = newSVpvn(data,length);
}
else
{
if
( ctx->attr ) {
ctx->attrname = newSV(length + SvCUR(ctx->attr));
sv_copypv(ctx->attrname, ctx->attr);
sv_catpvn(ctx->attrname, data, length);
}
else
{
ctx->attrname = newSVpvn(data, length);
}
}
}
void
on_warn(
void
* pctx,
char
* format, ...) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_warn"
);
#endif
va_list
va;
va_start
(va,format);
SV *text = sv_2mortal(newSVpvn(
""
,0));
sv_vcatpvf(text, format, &va);
warn(
"%s"
,SvPV_nolen(text));
va_end
(va);
}
void
on_die(
void
* pctx,
char
* format, ...) {
#if XML_DEVEL
if
(!pctx) croak(
"Context not passed to on_die"
);
#endif
parsestate *ctx = pctx;
va_list
va;
va_start
(va,format);
ctx->error = sv_2mortal(newSVpvn(
""
,0));
sv_vcatpvf(ctx->error, format, &va);
va_end
(va);
}
#ifndef HePV_nolen
#define HePV_nolen(he) ((HeKLEN(he) == HEf_SVKEY) ? \
SvPV_nolen(HeKEY_sv(he)) : \
(HeKEY(he)))
#endif
#define mystrcmp(a,b) ( a && b ? strcmp(a,b) : 2 )
void
h2xout(
char
*data, compstate *p ) {
printf
(
"%*s"
, p->depth * 4,
""
);
printf
(
"%s\n"
, data );
}
void
h2xoute(
char
*data, compstate *p ) {
printf
(
"%*s"
, p->depth * 4,
""
);
printf
(
"e(%s)\n"
, data);
}
void
h2xp( compstate *p,
char
*f, ... ) {
va_list
va_args;
va_start
(va_args,f);
sv_vcatpvf( p->result, f, &va_args );
va_end
(va_args);
}
void
h2xpe( compstate *p,
char
*s ) {
char
*b = s;
while
(1) {
switch
(*s) {
warn(
"%c"
, *s);
case
0:
if
(b < s) sv_catpvf( p->result,
"%-.*s"
, (
int
)(s - b), b );
return
;
case
'<'
:
if
(b < s) sv_catpvf( p->result,
"%-.*s"
, (
int
)(s - b), b );
sv_catpvf( p->result,
"%s"
,
"<"
);
b = s+1;
break
;
case
'>'
:
if
(b < s) sv_catpvf( p->result,
"%-.*s"
, (
int
)(s - b), b );
sv_catpvf( p->result,
"%s"
,
">"
);
b = s+1;
break
;
case
'"'
:
if
(b < s) sv_catpvf( p->result,
"%-.*s"
, (
int
)(s - b), b );
sv_catpvf( p->result,
"%s"
,
"""
);
b = s+1;
break
;
case
'\''
:
if
(b < s) sv_catpvf( p->result,
"%-.*s"
, (
int
)(s - b), b );
sv_catpvf( p->result,
"%s"
,
"'"
);
b = s+1;
break
;
case
'&'
:
if
(b < s) sv_catpvf( p->result,
"%-.*s"
, (
int
)(s - b), b );
sv_catpvf( p->result,
"%s"
,
"&"
);
b = s+1;
break
;
default
:
break
;
}
s++;
}
}
void
kv2x (
char
*key, SV *val, compstate *p );
void
kv2x (
char
*key, SV *val, compstate *p ) {
char
closed;
HE* ent;
char
*nkey;
STRLEN i, nlen;
AV *av;
SV **avv;
debug(
"key=%s, val=%s"
,key, SvPV_nolen(val));
if
( mystrcmp( key, p->text ) == 0 ) {
h2xpe(p, SvPV_nolen( val ));
}
else
if
( mystrcmp( key, p->cdata ) == 0 ) {
h2xp(p,
"<![CDATA["
);
h2xp(p, SvPV_nolen( val ));
h2xp(p,
"]]>"
);
}
else
if
( p->comm && mystrcmp( key, p->comm) == 0 ) {
debug(
"comm: %s"
, SvPV_nolen( val ));
h2xp(p,
"<!-- "
);
h2xpe(p, SvPV_nolen( val ));
h2xp(p,
" -->"
);
}
else
{
if
(SvROK( val )) {
switch
( SvTYPE( SvRV( val ) ) ) {
case
SVt_PVHV:
debug(
"%s -> hash inside"
, key);
(
void
) hv_iterinit( (HV *) SvRV( val ) );
h2xp(p,
"<%s"
, key);
closed = 0;
while
((ent = hv_iternext( (HV *) SvRV( val ) ))) {
nkey = HePV(ent, nlen);
if
(
strncmp
( nkey, p->attr, p->attl ) == 0 ) {
nkey += p->attl;
h2xp(p,
" %s=\""
,nkey);
h2xpe(p, SvPV_nolen(HeVAL(ent)));
h2xp(p,
"\""
);
continue
;
}
}
(
void
) hv_iterinit( (HV *) SvRV( val ) );
while
((ent = hv_iternext( (HV *) SvRV( val ) ))) {
nkey = HePV(ent, nlen);
if
(
strncmp
( nkey, p->attr, p->attl ) == 0 ) {
continue
;
}
debug(
"Nested %s->%s"
, nkey, SvPV_nolen(HeVAL(ent)));
if
(!closed) {
closed = 1;
h2xp(p,
">"
);
}
p->depth++;
kv2x( nkey, HeVAL(ent), p );
p->depth--;
}
if
(!closed) {
h2xp(p,
"/>"
);
}
else
{
h2xp(p,
"</%s>"
,key);
}
break
;
case
SVt_PVAV:
debug(
"array inside (Multinode)"
);
av = (AV *) SvRV( val );
nlen = av_len(av) + 1;
for
( i = 0; i < nlen; i++ ) {
if
( ( avv = av_fetch(av,i,0) ) && SvOK(*avv) ) {
kv2x( key, *avv, p );
}
}
break
;
default
:
warn(
"Bad reference found: %s"
, SvPV_nolen( SvRV(val) ));
break
;
}
}
else
{
if
(SvOK(val) && SvCUR(val)) {
h2xp(p,
"<%s>"
, key);
h2xpe(p, SvPV_nolen(val));
h2xp(p,
"</%s>"
, key);
}
else
{
h2xp(p,
"<%s/>"
,key);
}
}
}
}
void
h2x ( SV *x, compstate *p );
void
h2x ( SV *x, compstate *p ) {
HE* ent;
SV *val;
char
*key;
STRLEN klen;
if
(SvROK (x)) {
switch
( SvTYPE( SvRV(x) ) ) {
case
SVt_PVHV:
(
void
) hv_iterinit( (HV *) SvRV( x ) );
while
((ent = hv_iternext( (HV *) SvRV( x ) ))) {
key = HePV(ent, klen);
val = HeVAL(ent);
if
(
strncmp
( key, p->attr,
strlen
(p->attr) ) == 0 ) {
debug(
"Attribute %s at this level is prohibited"
, key);
continue
;
}
kv2x( key, val, p );
}
break
;
default
:
warn(
"skip %s"
, SvPV_nolen( SvRV(x) ));
}
}
else
{
warn(
"skip nonref"
);
}
}
MODULE = XML::Fast PACKAGE = XML::Fast
void
_test()
CODE:
dTHX;
SV * cons = get_constant(
"Encode::FB_QUIET"
);
SV * test = newSViv(4);
sv_dump(test);
printf
(
"Got constant %s\n"
, SvPV_nolen(cons));
UV chr = 0xAB;
char
*end, utf[UTF8_MAXBYTES + 1];
SV *encode = find_encoding(
"windows-1251"
);
end = uvchr_to_utf8(utf, chr);
*end =
'\0'
;
SV *tmp = sv_2mortal(newSVpvn(utf, end-utf));
SvUTF8_on(tmp);
SV *bytes = sv_recode_from_utf8(aTHX_ tmp, encode);
sv_dump(bytes);
printf
(
"Created char %s / %s / bytes = %s\n"
, utf, SvPV_nolen(tmp), SvPV_nolen(bytes));
croak(
"Force exit"
);
void
_xml2hash(xml_sv,conf)
SV *xml_sv;
HV *conf;
PROTOTYPE: $$
PPCODE:
SvGETMAGIC(xml_sv);
char
*xml = SvPVbyte_nolen(xml_sv);
SV * RV;
parser_state state;
memset
(&state,0,
sizeof
(state));
parsestate ctx;
memset
(&ctx,0,
sizeof
(parsestate));
state.ctx = &ctx;
ctx.state = &state;
SV **key;
if
((key = hv_fetch(conf,
"order"
, 5, 0)) && SvTRUE(*key)) {
ctx.flags |= MODE_ORDER;
}
if
((key = hv_fetch(conf,
"trim"
, 4, 0)) && SvTRUE(*key)) {
ctx.flags |= MODE_TRIM;
}
if
((key = hv_fetch(conf,
"bytes"
, 5, 0)) && SvTRUE(*key)) {
ctx.bytes = 1;
}
else
{
if
((key = hv_fetch(conf,
"utf8decode"
, 10, 0)) && SvTRUE(*key)) {
ctx.utf8 = UTF8_DECODE;
}
else
{
ctx.utf8 = UTF8_UPGRADE;
}
}
if
((key = hv_fetch(conf,
"attr"
, 4, 0))) {
if
(SvOK(*key) && SvCUR(*key) > 0 ) {
ctx.attr = *key;
}
}
if
((key = hv_fetch(conf,
"text"
, 4, 0)) && SvOK(*key)) {
ctx.text = *key;
}
if
((key = hv_fetch(conf,
"join"
, 4, 0)) && SvPOK(*key)) {
ctx.join = *key;
}
if
((key = hv_fetch(conf,
"cdata"
, 5, 0)) && SvPOK(*key)) {
ctx.cdata = *key;
}
if
((key = hv_fetch(conf,
"comm"
, 4, 0)) && SvPOK(*key)) {
ctx.comm = *key;
}
if
((key = hv_fetch(conf,
"array"
, 5, 0)) && SvOK(*key)) {
if
(SvROK(*key) && SvTYPE( SvRV(*key) ) == SVt_PVAV) {
AV *av = (AV *) SvRV( *key );
ctx.array = newHV();
I32 len = 0, avlen = av_len(av) + 1;
SV **val;
for
( len = 0; len < avlen; len++ ) {
if
( ( val = av_fetch(av,len,0) ) && SvOK(*val) ) {
if
(SvPOK(*val)) {
(
void
) hv_store( ctx.array, SvPV_nolen(*val), SvCUR(*val), newSV(0), 0 );
}
else
{
my_croak(&ctx,
"Bad enrty in array entry: %s"
,SvPV_nolen(*val));
}
}
}
}
else
if
(!SvROK(*key)) {
if
(SvTRUE(*key)) {
ctx.flags |= MODE_ARRAYS;
}
}
else
{
my_croak(&ctx,
"Bad entry in array: %s"
,SvPV_nolen(*key));
}
}
if
((key = hv_fetch(conf,
"_max_depth"
, 10, 0)) && SvOK(*key)) {
ctx.chainsize = SvIV(*key);
if
(ctx.chainsize < 1) {
my_croak(&ctx,
"_max_depth contains bad value (%d)"
,ctx.chainsize);
}
}
else
{
ctx.chainsize = 256;
}
if
(!ctx.bytes) {
ctx.encoding =
"utf8"
;
}
if
(ctx.flags & MODE_ORDER) {
my_croak(&ctx,
"Ordered mode not implemented yet\n"
);
}
else
{
ctx.hcurrent = newHV();
Newx(ctx.chain, ctx.chainsize, xml_node);
Newx(ctx.hchain, ctx.chainsize, HV*);
ctx.depth = -1;
RV = sv_2mortal(newRV_noinc( (SV *) ctx.hcurrent ));
state.cb.piopen = on_pi_open;
state.cb.piclose = on_pi_close;
state.cb.tagopen = on_tag_open;
state.cb.tagclose = on_tag_close;
state.cb.attrname = on_attr_name;
if
((key = hv_fetch(conf,
"nowarn"
, 6, 0)) && SvTRUE(*key)) {
}
else
{
state.cb.warn = on_warn;
ctx.flags |= EMIT_WARNS;
}
state.cb.die = on_die;
if
(ctx.comm)
state.cb.comment = on_comment;
if
(ctx.cdata)
state.cb.cdata = on_cdata;
else
if
(ctx.text)
state.cb.cdata = on_bytes;
state.cb.bytes = on_bytes;
state.cb.bytespart = on_bytes_part;
state.cb.uchar = on_uchar;
if
(!(ctx.flags & MODE_TRIM))
state.save_wsp = 1;
}
parse(xml,&state);
if
(ctx.depth > -1 && !ctx.error) {
ctx.error = sv_2mortal(newSVpv(
"Unbalanced tags"
,0));
}
DESTROY(&ctx);
if
(ctx.error) {
croak(
"%s"
, SvPV_nolen(ctx.error));
}
ST(0) = RV;
XSRETURN(1);
void
_hash2xml(hash,conf)
SV *hash;
HV *conf;
PROTOTYPE: $$
PPCODE:
compstate ctx;
memset
(&ctx,0,
sizeof
(parsestate));
SV **key;
if
((key = hv_fetch(conf,
"order"
, 5, 0)) && SvTRUE(*key)) {
ctx.flags |= MODE_ORDER;
}
if
((key = hv_fetch(conf,
"trim"
, 4, 0)) && SvTRUE(*key)) {
ctx.flags |= MODE_TRIM;
}
if
((key = hv_fetch(conf,
"attr"
, 4, 0)) && SvPOK(*key)) {
ctx.attr = SvPV_nolen(*key);
}
else
{
ctx.attr =
"-"
;
}
if
((key = hv_fetch(conf,
"text"
, 4, 0)) && SvPOK(*key)) {
ctx.text = SvPV_nolen(*key);
}
else
{
ctx.text =
"#text"
;
}
if
((key = hv_fetch(conf,
"cdata"
, 5, 0)) && SvPOK(*key)) {
ctx.cdata = SvPV_nolen(*key);
}
else
{
ctx.cdata = 0;
}
if
((key = hv_fetch(conf,
"comm"
, 4, 0)) && SvPOK(*key)) {
ctx.comm = SvPV_nolen(*key);
}
else
{
ctx.comm = 0;
}
ctx.depth = 0;
ctx.ix = 0;
ctx.result = sv_2mortal(newSVpv(
""
,0));
ctx.attl =
strlen
(ctx.attr);
SvGROW(ctx.result,1024);
h2x( hash, &ctx);
if
(ctx.error) {
croak(
"%s"
, SvPV_nolen(ctx.error));
}
ST(0) = ctx.result;
XSRETURN(1);