XS.xs - metacpan.org

#include <EXTERN.h>
#include <perl.h>
#include <XSUB.h>

#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <ctype.h>

const char* start_ie_hack = "/*\\*/";
const char* end_ie_hack   = "/**/";

/* ****************************************************************************
 * CHARACTER CLASS METHODS
 * ****************************************************************************
 */
bool charIsSpace(char ch) {
    if (ch == ' ')  return 1;
    if (ch == '\t') return 1;
    return 0;
}
bool charIsEndspace(char ch) {
    if (ch == '\n') return 1;
    if (ch == '\r') return 1;
    if (ch == '\f') return 1;
    return 0;
}
bool charIsWhitespace(char ch) {
    return charIsSpace(ch) || charIsEndspace(ch);
}
bool charIsNumeric(char ch) {
    if ((ch >= '0') && (ch <= '9')) return 1;
    return 0;
}
bool charIsIdentifier(char ch) {
    if ((ch >= 'a') && (ch <= 'z')) return 1;
    if ((ch >= 'A') && (ch <= 'Z')) return 1;
    if ((ch >= '0') && (ch <= '9')) return 1;
    if (ch == '_')  return 1;
    if (ch == '.')  return 1;
    if (ch == '#')  return 1;
    if (ch == '@')  return 1;
    if (ch == '%')  return 1;
    return 0;
}
bool charIsInfix(char ch) {
    /* WS before+after these characters can be removed */
    if (ch == '{')  return 1;
    if (ch == '}')  return 1;
    if (ch == ';')  return 1;
    if (ch == ',')  return 1;
    if (ch == '~')  return 1;
    if (ch == '>')  return 1;
    return 0;
}
bool charIsPrefix(char ch) {
    /* WS after these characters can be removed */
    if (ch == '(')  return 1;   /* requires leading WS when used in @media */
    if (ch == ':')  return 1;   /* requires leading WS when used in pseudo-selector */
    return charIsInfix(ch);
}
bool charIsPostfix(char ch) {
    /* WS before these characters can be removed */
    if (ch == ')')  return 1;   /* requires trailing WS for MSIE */
    return charIsInfix(ch);
}

/* ****************************************************************************
 * TYPE DEFINITIONS
 * ****************************************************************************
 */
typedef enum {
    NODE_EMPTY,
    NODE_WHITESPACE,
    NODE_BLOCKCOMMENT,
    NODE_IDENTIFIER,
    NODE_LITERAL,
    NODE_SIGIL
} NodeType;

struct _Node;
typedef struct _Node Node;
struct _Node {
    /* linked list pointers */
    Node*       prev;
    Node*       next;
    /* node internals */
    const char* contents;
    size_t      length;
    NodeType    type;
    bool        can_prune;
};

#define NODE_SET_SIZE 50000

struct _NodeSet;
typedef struct _NodeSet NodeSet;
struct _NodeSet {
    /* link to next NodeSet */
    NodeSet*    next;
    /* Nodes in this Set */
    Node        nodes[NODE_SET_SIZE];
    size_t      next_node;
};

typedef struct {
    /* singly linked list of NodeSets */
    NodeSet*    head_set;
    NodeSet*    tail_set;
    /* doubly linked list of Nodes */
    Node*       head;
    Node*       tail;
    /* doc internals */
    const char* buffer;
    size_t      length;
    size_t      offset;
} CssDoc;

/* ****************************************************************************
 * NODE CHECKING MACROS/FUNCTIONS
 * ****************************************************************************
 */

/* checks to see if the node is the given string, case INSENSITIVELY */
bool nodeEquals(Node* node, const char* string) {
    /* not the same length? Not equal */
    size_t len = strlen(string);
    if (len != node->length)
        return 0;
    /* compare contents to see if they're equal */
    return (strncasecmp(node->contents, string, node->length) == 0);
}

/* checks to see if the node contains the given string, case INSENSITIVELY */
bool nodeContains(Node* node, const char* string) {
    const char* haystack = node->contents;
    const char* endofhay = haystack + node->length;
    size_t len = strlen(string);
    char ul_start[2] = { tolower(*string), toupper(*string) };

    /* if node is shorter we know we're not going to have a match */
    if (len > node->length)
        return 0;

    /* find the needle in the haystack */
    while (haystack && *haystack) {
        /* find first char of needle */
        haystack = strpbrk( haystack, ul_start );
        /* didn't find it? Oh well. */
        if (haystack == NULL)
            return 0;
        /* found it, but will the end be past the end of our node? */
        if ((haystack+len) > endofhay)
            return 0;
        /* see if it matches */
        if (strncasecmp(haystack, string, len) == 0)
            return 1;
        /* nope, move onto next character in the haystack */
        haystack ++;
    }

    /* no match */
    return 0;
}

/* checks to see if the node begins with the given string, case INSENSITIVELY.
 */
bool nodeBeginsWith(Node* node, const char* string) {
    /* If the string is longer than the node, it's not going to match */
    size_t len = strlen(string);
    if (len > node->length)
        return 0;
    /* check for match */
    return (strncasecmp(node->contents, string, len) == 0);
}

/* checks to see if the node ends with the given string, case INSENSITVELY. */
bool nodeEndsWith(Node* node, const char* string) {
    /* If the string is longer than the node, it's not going to match */
    size_t len = strlen(string);
    if (len > node->length)
        return 0;
    /* check for match */
    size_t off = node->length - len;
    return (strncasecmp(node->contents+off, string, len) == 0);
}

/* macros to help see what kind of node we've got */
#define nodeIsWHITESPACE(node)          ((node->type == NODE_WHITESPACE))
#define nodeIsBLOCKCOMMENT(node)        ((node->type == NODE_BLOCKCOMMENT))
#define nodeIsIDENTIFIER(node)          ((node->type == NODE_IDENTIFIER))
#define nodeIsLITERAL(node)             ((node->type == NODE_LITERAL))
#define nodeIsSIGIL(node)               ((node->type == NODE_SIGIL))

#define nodeIsEMPTY(node)               ((node->type == NODE_EMPTY) || ((node->length==0) || (node->contents==NULL)))
#define nodeIsMACIECOMMENTHACK(node)    (nodeIsBLOCKCOMMENT(node) && nodeEndsWith(node,"\\*/"))
#define nodeIsPREFIXSIGIL(node)         (nodeIsSIGIL(node) && charIsPrefix(node->contents[0]))
#define nodeIsPOSTFIXSIGIL(node)        (nodeIsSIGIL(node) && charIsPostfix(node->contents[0]))
#define nodeIsCHAR(node,ch)             ((node->contents[0]==ch) && (node->length==1))

/* checks if this node is the start of "!important" (with optional intravening
 * whitespace. */
bool nodeStartsBANGIMPORTANT(Node* node) {
    if (!node) return 0;

    /* Doesn't start with a "!", nope */
    if (!nodeIsCHAR(node,'!')) return 0;

    /* Skip any following whitespace */
    Node* next = node->next;
    while (next && nodeIsWHITESPACE(next)) {
        next = node->next;
    }
    if (!next) return 0;

    /* Next node _better be_ "important" */
    if (!nodeIsIDENTIFIER(next)) return 0;
    if (nodeEquals(next, "important")) return 1;
    return 0;
}

/* ****************************************************************************
 * NODE MANIPULATION FUNCTIONS
 * ****************************************************************************
 */
/* allocates a new node */
Node* CssAllocNode(CssDoc* doc) {
    Node* node;
    NodeSet* set = doc->tail_set;

    /* if our current NodeSet is full, allocate a new NodeSet */
    if (set->next_node >= NODE_SET_SIZE) {
        NodeSet* next_set;
        Newz(0, next_set, 1, NodeSet);
        set->next = next_set;
        doc->tail_set = next_set;
        set = next_set;
    }

    /* grab the next Node out of the NodeSet */
    node = set->nodes + set->next_node;
    set->next_node ++;

    /* initialize the node */
    node->prev = NULL;
    node->next = NULL;
    node->contents = NULL;
    node->length = 0;
    node->type = NODE_EMPTY;
    node->can_prune = 1;
    return node;
}

/* sets the contents of a node */
void CssSetNodeContents(Node* node, const char* string, size_t len) {
    node->contents = string;
    node->length   = len;
    return;
}

/* removes the node from the list and discards it entirely */
void CssDiscardNode(Node* node) {
    if (node->prev)
        node->prev->next = node->next;
    if (node->next)
        node->next->prev = node->prev;
}

/* appends the node to the given element */
void CssAppendNode(Node* element, Node* node) {
    if (element->next)
        element->next->prev = node;
    node->next = element->next;
    node->prev = element;
    element->next = node;
}

/* ****************************************************************************
 * TOKENIZING FUNCTIONS
 * ****************************************************************************
 */

/* extracts a quoted literal string */
void _CssExtractLiteral(CssDoc* doc, Node* node) {
    const char* buf = doc->buffer;
    size_t offset   = doc->offset;
    char delimiter  = buf[offset];
    /* skip start of literal */
    offset ++;
    /* search for end of literal */
    while (offset < doc->length) {
        if (buf[offset] == '\\') {
            /* escaped character; skip */
            offset ++;
        }
        else if (buf[offset] == delimiter) {
            const char* start = buf + doc->offset;
            size_t length     = offset - doc->offset + 1;
            CssSetNodeContents(node, start, length);
            node->type = NODE_LITERAL;
            return;
        }
        /* move onto next character */
        offset ++;
    }
    croak( "unterminated quoted string literal" );
}

/* extracts a block comment */
void _CssExtractBlockComment(CssDoc* doc, Node* node) {
    const char* buf = doc->buffer;
    size_t offset   = doc->offset;

    /* skip start of comment */
    offset ++;  /* skip "/" */
    offset ++;  /* skip "*" */

    /* search for end of comment block */
    while (offset < doc->length) {
        if (buf[offset] == '*') {
            if (buf[offset+1] == '/') {
                const char* start = buf + doc->offset;
                size_t length     = offset - doc->offset + 2;
                CssSetNodeContents(node, start, length);
                node->type = NODE_BLOCKCOMMENT;
                return;
            }
        }
        /* move onto next character */
        offset ++;
    }

    croak( "unterminated block comment" );
}

/* extracts a run of whitespace characters */
void _CssExtractWhitespace(CssDoc* doc, Node* node) {
    const char* buf = doc->buffer;
    size_t offset   = doc->offset;
    while ((offset < doc->length) && charIsWhitespace(buf[offset]))
        offset ++;
    CssSetNodeContents(node, doc->buffer+doc->offset, offset-doc->offset);
    node->type = NODE_WHITESPACE;
}

/* extracts an identifier */
void _CssExtractIdentifier(CssDoc* doc, Node* node) {
    const char* buf = doc->buffer;
    size_t offset   = doc->offset;
    while ((offset < doc->length) && charIsIdentifier(buf[offset]))
        offset++;
    CssSetNodeContents(node, doc->buffer+doc->offset, offset-doc->offset);
    node->type = NODE_IDENTIFIER;
}

/* extracts a -single- symbol/sigil */
void _CssExtractSigil(CssDoc* doc, Node* node) {
    CssSetNodeContents(node, doc->buffer+doc->offset, 1);
    node->type = NODE_SIGIL;
}

/* tokenizes the given string and returns the list of nodes */
Node* CssTokenizeString(CssDoc* doc, const char* string) {
    /* parse the CSS */
    while ((doc->offset < doc->length) && (doc->buffer[doc->offset])) {
        /* allocate a new node */
        Node* node = CssAllocNode(doc);
        if (!doc->head)
            doc->head = node;
        if (!doc->tail)
            doc->tail = node;

        /* parse the next node out of the CSS */
        if ((doc->buffer[doc->offset] == '/') && (doc->buffer[doc->offset+1] == '*'))
            _CssExtractBlockComment(doc, node);
        else if ((doc->buffer[doc->offset] == '"') || (doc->buffer[doc->offset] == '\''))
            _CssExtractLiteral(doc, node);
        else if (charIsWhitespace(doc->buffer[doc->offset]))
            _CssExtractWhitespace(doc, node);
        else if (charIsIdentifier(doc->buffer[doc->offset]))
            _CssExtractIdentifier(doc, node);
        else
            _CssExtractSigil(doc, node);

        /* move ahead to the end of the parsed node */
        doc->offset += node->length;

        /* add the node to our list of nodes */
        if (node != doc->tail)
            CssAppendNode(doc->tail, node);
        doc->tail = node;
    }

    /* return the node list */
    return doc->head;
}

/* ****************************************************************************
 * MINIFICATION FUNCTIONS
 * ****************************************************************************
 */

/* Skips over any "zero value" found in the provided string, returning a
 * pointer to the next character after those zeros (which may be the same
 * as the pointer to ther original string, if no zeros were found).
 */
const char* CssSkipZeroValue(const char* str) {
    /* Skip leading zeros */
    while (*str == '0') { str ++; }
    const char* after_leading_zeros = str;

    /* Decimal point, followed by more zeros? */
    if (*str == '.') {
        str ++;
        while (*str == '0') { str ++; }
        if (charIsNumeric(*str)) {
            /* ends in digit; significant at the decimal point */
            return after_leading_zeros;
        }
        return str;
    }

    /* Done. */
    return after_leading_zeros;
}

/* checks to see if the string contains a known CSS unit */
bool CssIsKnownUnit(const char* str) {
    /* If it ends with a known Unit, its a Zero Unit */
    if (0 == strncmp(str, "em",   2)) { return 1; }
    if (0 == strncmp(str, "ex",   2)) { return 1; }
    if (0 == strncmp(str, "ch",   2)) { return 1; }
    if (0 == strncmp(str, "rem",  3)) { return 1; }
    if (0 == strncmp(str, "vw",   2)) { return 1; }
    if (0 == strncmp(str, "vh",   2)) { return 1; }
    if (0 == strncmp(str, "vmin", 3)) { return 1; }
    if (0 == strncmp(str, "vmax", 3)) { return 1; }
    if (0 == strncmp(str, "cm",   2)) { return 1; }
    if (0 == strncmp(str, "mm",   2)) { return 1; }
    if (0 == strncmp(str, "in",   2)) { return 1; }
    if (0 == strncmp(str, "px",   2)) { return 1; }
    if (0 == strncmp(str, "pt",   2)) { return 1; }
    if (0 == strncmp(str, "pc",   2)) { return 1; }
    if (0 == strncmp(str, "%",    1)) { return 1; }

    /* Nope */
    return 0;
}

/* collapses all of the nodes to their shortest possible representation */
void CssCollapseNodes(Node* curr) {
    bool inMacIeCommentHack = 0;
    bool inFunction = 0;
    while (curr) {
        Node* next = curr->next;
        switch (curr->type) {
            case NODE_WHITESPACE:
                /* collapse to a single whitespace character */
                curr->length = 1;
                break;
            case NODE_BLOCKCOMMENT:
                if (!inMacIeCommentHack && nodeIsMACIECOMMENTHACK(curr)) {
                    /* START of mac/ie hack */
                    CssSetNodeContents(curr, start_ie_hack, strlen(start_ie_hack));
                    curr->can_prune = 0;
                    inMacIeCommentHack = 1;
                }
                else if (inMacIeCommentHack && !nodeIsMACIECOMMENTHACK(curr)) {
                    /* END of mac/ie hack */
                    CssSetNodeContents(curr, end_ie_hack, strlen(end_ie_hack));
                    curr->can_prune = 0;
                    inMacIeCommentHack = 0;
                }
                break;
            case NODE_IDENTIFIER:
            {
                /* if the node doesn't begin with a "zero", nothing to collapse */
                const char* ptr = curr->contents;
                if ( (*ptr != '0') && (*ptr != '.' )) {
                    /* not "0" and not "point-something" */
                    break;
                }
                if ( (*ptr == '.') && (*(ptr+1) != '0') ) {
                    /* "point-something", but not "point-zero" */
                    break;
                }

                /* skip all leading zeros */
                ptr = CssSkipZeroValue(curr->contents);

                /* if we didn't skip anything, no Zeros to collapse */
                if (ptr == curr->contents) {
                    break;
                }

                /* did we skip the entire thing, and thus the Node is "all zeros"? */
                size_t skipped = ptr - curr->contents;
                if (skipped == curr->length) {
                    /* nothing but zeros, so truncate to "0" */
                    CssSetNodeContents(curr, "0", 1);
                    break;
                }

                /* was it a zero percentage? */
                if (*ptr == '%') {
                    /* a zero percentage; truncate to "0%" */
                    CssSetNodeContents(curr, "0%", 2);
                    break;
                }

                /* if all we're left with is a known CSS unit, and we're NOT in
                 * a function (where we have to preserve units), just truncate
                 * to "0"
                 */
                if (!inFunction && CssIsKnownUnit(ptr)) {
                    /* not in a function, and is a zero unit; truncate to "0" */
                    CssSetNodeContents(curr, "0", 1);
                    break;
                }

                /* otherwise, just skip leading zeros, and preserve any unit */
                /* ... do we need to back up one char to find a significant zero? */
                if (*ptr != '.') { ptr --; }
                /* ... if that's not the start of the buffer ... */
                if (ptr != curr->contents) {
                    /* set the buffer to "0 + units", blowing away the earlier bits */
                    size_t len = curr->length - (ptr - curr->contents);
                    CssSetNodeContents(curr, ptr, len);
                }
                break;
            }
            case NODE_SIGIL:
                if (nodeIsCHAR(curr,'(')) { inFunction = 1; }
                if (nodeIsCHAR(curr,')')) { inFunction = 0; }
                break;
            default:
                break;
        }
        curr = next;
    }
}

/* checks to see whether we can prune the given node from the list.
 *
 * THIS is the function that controls the bulk of the minification process.
 */
enum {
    PRUNE_NO,
    PRUNE_PREVIOUS,
    PRUNE_CURRENT,
    PRUNE_NEXT
};
int CssCanPrune(Node* node) {
    Node* prev = node->prev;
    Node* next = node->next;

    /* only if node is prunable */
    if (!node->can_prune)
        return PRUNE_NO;

    switch (node->type) {
        case NODE_EMPTY:
            /* prune empty nodes */
            return PRUNE_CURRENT;
        case NODE_WHITESPACE:
            /* remove whitespace before comment blocks */
            if (next && nodeIsBLOCKCOMMENT(next))
                return PRUNE_CURRENT;
            /* remove whitespace after comment blocks */
            if (prev && nodeIsBLOCKCOMMENT(prev))
                return PRUNE_CURRENT;
            /* remove whitespace before "!important" */
            if (next && nodeStartsBANGIMPORTANT(next)) {
                return PRUNE_CURRENT;
            }
            /* leading whitespace gets pruned */
            if (!prev)
                return PRUNE_CURRENT;
            /* trailing whitespace gets pruned */
            if (!next)
                return PRUNE_CURRENT;
            /* keep all other whitespace */
            return PRUNE_NO;
        case NODE_BLOCKCOMMENT:
            /* keep comments that contain the word "copyright" */
            if (nodeContains(node,"copyright"))
                return PRUNE_NO;
            /* remove comment blocks */
            return PRUNE_CURRENT;
        case NODE_IDENTIFIER:
            /* keep all identifiers */
            return PRUNE_NO;
        case NODE_LITERAL:
            /* keep all literals */
            return PRUNE_NO;
        case NODE_SIGIL:
            /* remove whitespace after "prefix" sigils */
            if (nodeIsPREFIXSIGIL(node) && next && nodeIsWHITESPACE(next))
                return PRUNE_NEXT;
            /* remove whitespace before "postfix" sigils */
            if (nodeIsPOSTFIXSIGIL(node) && prev && nodeIsWHITESPACE(prev))
                return PRUNE_PREVIOUS;
            /* remove ";" characters at end of selector groups */
            if (nodeIsCHAR(node,';') && next && nodeIsSIGIL(next) && nodeIsCHAR(next,'}'))
                return PRUNE_CURRENT;
            /* keep all other sigils */
            return PRUNE_NO;
    }
    /* keep anything else */
    return PRUNE_NO;
}

/* prune nodes from the list */
Node* CssPruneNodes(Node *head) {
    Node* curr = head;
    while (curr) {
        /* see if/how we can prune this node */
        int prune = CssCanPrune(curr);
        /* prune.  each block is responsible for moving onto the next node */
        Node* prev = curr->prev;
        Node* next = curr->next;
        switch (prune) {
            case PRUNE_PREVIOUS:
                /* discard previous node */
                CssDiscardNode(prev);
                /* reset "head" if that's what got pruned */
                if (prev == head)
                    head = curr;
                break;
            case PRUNE_CURRENT:
                /* discard current node */
                CssDiscardNode(curr);
                /* reset "head" if that's what got pruned */
                if (curr == head)
                    head = prev ? prev : next;
                /* backup and try again if possible */
                curr = prev ? prev : next;
                break;
            case PRUNE_NEXT:
                /* discard next node */
                CssDiscardNode(next);
                /* stay on current node, and try again */
                break;
            default:
                /* move ahead to next node */
                curr = next;
                break;
        }
    }

    /* return the (possibly new) head node back to the caller */
    return head;
}

/* ****************************************************************************
 * Minifies the given CSS, returning a newly allocated string back to the
 * caller (YOU'RE responsible for freeing its memory).
 * ****************************************************************************
 */
char* CssMinify(const char* string) {
    char* results;
    CssDoc doc;

    /* initialize our CSS document object */
    doc.head = NULL;
    doc.tail = NULL;
    doc.buffer = string;
    doc.length = strlen(string);
    doc.offset = 0;
    Newz(0, doc.head_set, 1, NodeSet);
    doc.tail_set = doc.head_set;

    /* PASS 1: tokenize CSS into a list of nodes */
    Node* head = CssTokenizeString(&doc, string);
    if (!head) return NULL;
    /* PASS 2: collapse nodes */
    CssCollapseNodes(head);
    /* PASS 3: prune nodes */
    head = CssPruneNodes(head);
    if (!head) return NULL;
    /* PASS 4: re-assemble CSS into single string */
    {
        Node* curr;
        char* ptr;
        /* allocate the result buffer to the same size as the original CSS; in
         * a worst case scenario that's how much memory we'll need for it.
         */
        Newz(0, results, (strlen(string)+1), char);
        ptr = results;
        /* copy node contents into result buffer */
        curr = head;
        while (curr) {
            memcpy(ptr, curr->contents, curr->length);
            ptr += curr->length;
            curr = curr->next;
        }
        *ptr = 0;
    }
    /* free memory used by the NodeSets */
    {
        NodeSet* curr = doc.head_set;
        while (curr) {
            NodeSet* next = curr->next;
            Safefree(curr);
            curr = next;
        }
    }
    /* return resulting minified CSS back to caller */
    return results;
}



MODULE = CSS::Minifier::XS              PACKAGE = CSS::Minifier::XS

PROTOTYPES: disable

SV*
minify(string)
    SV* string
    INIT:
        char* buffer = NULL;
        RETVAL = &PL_sv_undef;
    CODE:
        /* minify the CSS */
        buffer = CssMinify( SvPVX(string) );
        /* hand back the minified CSS (if we had any) */
        if (buffer != NULL) {
            RETVAL = newSVpv(buffer, 0);
            Safefree( buffer );
        }
    OUTPUT:
        RETVAL
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)