#define PERL_NO_GET_CONTEXT // we'll define thread context if necessary (faster)
#include "EXTERN.h" // globals/constant import locations
#include "perl.h" // Perl symbols, structures and constants definition
#include "XSUB.h" // xsubpp functions and macros
#define NEED_utf8_to_uvchr_buf
#include "ppport.h"
#include <string.h>
#include <stdlib.h>
#define isEOL(c) ((c >= 0xa) && (c <= 0xd ) || (c == 0x85))
#define isEOL_UTF8(c) (isEOL(c) || c == 0x2028 || c == 0x2029)
char* _minify_ascii(pTHX_ char* src, STRLEN len, STRLEN* packed) {
char* dest;
Newx(dest, len + 1, char);
if (!dest) /* malloc failed */
return dest;
/* initialize to end-of-string in case string contains only spaces */
*dest = 0;
char* end = src + len;
char* ptr = dest;
char* leading = ptr; /* start of leading whitespace, or NULL if none */
char* trailing = NULL; /* start of trailing whitespace, or NULL if none */
if (len == 0) {
*packed = len;
return dest;
}
while (len > 0) {
char c = *src;
src ++;
len --;
if (leading && !isSPACE(c))
leading = NULL;
if (!leading) {
if (isEOL(c)) {
if (trailing) ptr = trailing;
if ( c == '\r' ) c = '\n'; /* Normalise EOL */
leading = ptr;
}
else if (isSPACE(c)) {
if (!trailing) trailing = ptr;
}
else {
trailing = NULL;
}
*ptr++ = c;
}
}
if (trailing) {
ptr = trailing;
char c = *ptr;
if (isEOL(c)) { ptr++; }
}
*packed = ptr - dest;
return dest;
}
STATIC U8* _minify_utf8(pTHX_ U8* src, STRLEN len, STRLEN* packed) {
U8* dest;
Newx(dest, len + 1, U8);
if (!dest) /* malloc failed */
return dest;
/* initialize to end-of-string in case string contains only spaces */
*dest = 0;
U8* end = src + len;
U8* ptr = dest;
U8* leading = ptr; /* start of leading whitespace, or NULL if none */
U8* trailing = NULL; /* start of trailing whitespace, or NULL if none */
if (len == 0) {
*packed = len;
return dest;
}
while (len > 0) {
UV c = *src;
if (UTF8_IS_INVARIANT(c)) {
src ++;
len --;
}
else {
STRLEN skip;
c = utf8_to_uvchr_buf(src, end, &skip);
if (c == 0) {
c = *src;
}
if ((int) skip > 0) {
src += skip;
len -= skip;
}
else {
src ++;
len --;
}
if (len < 0) {
croak("UTF-8 character overflow");
src = end;
len = 0;
trailing = NULL;
}
}
if (leading && !isSPACE(c))
leading = NULL;
if (!leading) {
if (isEOL_UTF8(c)) {
if (trailing) ptr = trailing;
if ( c == '\r' ) c = '\n'; /* Normalise EOL */
leading = ptr;
}
else if (isSPACE(c)) {
if (!trailing) trailing = ptr;
}
else {
trailing = NULL;
}
if (UTF8_IS_INVARIANT(c))
*ptr++ = c;
else
ptr = uvchr_to_utf8( ptr, c);
}
}
if (trailing) {
ptr = trailing;
UV c = *ptr;
STRLEN skip = UTF8SKIP(ptr);
if (!UTF8_IS_INVARIANT(c)) {
c = utf8_to_uvchr_buf(ptr, ptr + skip, &skip);
if (c == 0) {
c = *ptr;
}
}
if (isEOL_UTF8(c)) {
if ((int) skip <= 0) {
skip = 1;
}
ptr += skip;
}
}
*packed = ptr - dest;
return dest;
}
MODULE = Text::Minify::XS PACKAGE = Text::Minify::XS
PROTOTYPES: ENABLE
SV*
minify(inStr)
SV* inStr
INIT:
char* outStr = NULL;
RETVAL = &PL_sv_undef;
CODE:
char* src;
STRLEN len;
STRLEN packed = 0;
U32 is_utf8;
if (SvOK(inStr)) {
src = SvPVX(inStr);
len = SvCUR(inStr);
outStr = _minify_utf8(aTHX_ src, len, &packed);
if (outStr != NULL) {
SV* result = newSVpvn(outStr, packed);
is_utf8 = SvUTF8(inStr);
if (is_utf8)
SvUTF8_on(result);
RETVAL = result;
Safefree(outStr);
}
else {
croak("_minify_utf8 returned NULL");
}
}
OUTPUT:
RETVAL
SV*
minify_ascii(inStr)
SV* inStr
INIT:
char* outStr = NULL;
RETVAL = &PL_sv_undef;
CODE:
char* src;
STRLEN len;
STRLEN packed = 0;
if (SvOK(inStr)) {
src = SvPVX(inStr);
len = SvCUR(inStr);
outStr = _minify_ascii(aTHX_ src, len, &packed);
if (outStr != NULL) {
SV* result = newSVpvn(outStr, packed);
RETVAL = result;
Safefree(outStr);
}
else {
croak("_minify_ascii returned NULL");
}
}
OUTPUT:
RETVAL