#include "Regexp.h"
#include <limits.h>
#include <panda/from_chars.h>

namespace panda { namespace uri { namespace router {

static inline void add_non_printable (string& ret, char c) {
    ret += "\\x";
    ret += "  ";
    auto p = ret.buf() + ret.length() - 2;
    auto res = panda::to_chars(p, p+2, (unsigned char)c, 16);
    assert(!res.ec);
    if (res.ptr == p + 1) {
        p[1] = p[0];
        p[0] = '0';
    }
}

static inline void literal_stringify (string& ret, char c) {
    if (isprint(c)) {
        switch (c) {
            case '.':
            case '[':
            case ']':
            case '(':
            case ')':
            case '?':
            case '*':
            case '+':
            case '{':
            case '}':
            case '\\':
                ret += "\\";
            default : ret += c; break;
        }
    }
    else add_non_printable(ret, c);
}

static inline string literal_stringify (const string& literal) {
    string ret(literal.length());
    for (auto c : literal) {
        switch (c) {
            case '.': ret += "\\."; break;
            default : literal_stringify(ret, c); break;
        }
    }
    return ret;
}

static inline void symclass_stringify (string& ret, char c) {
    if (isprint(c)) {
        switch (c) {
            case ']':
            case '[':
            case '\\':
            case '-':
                ret += "\\";
            default : ret += c; break;
        }
    }
    else add_non_printable(ret, c);
}

static inline string symclass_stringify (const string& chars) {
    string ret(chars.length());
    for (auto c : chars) symclass_stringify(ret, c);
    return ret;
}

static void print_symclass (const Regexp::Symclass& s) {
    printf("CHARS(%s), RANGES(", symclass_stringify(s.chars).c_str());
    for (size_t i = 0; i < s.ranges.size(); ++i) {
        printf("%d-%d", s.ranges[i].from, s.ranges[i].to);
        if (i < s.ranges.size() - 1) printf(",");
    }
    printf(")%s", s.inverse ? " INVERSE" : "");
}

static void print_token (const Regexp::Token& t, string pre) {
    printf("%s", pre.c_str());
    switch (t.type) {
        case Regexp::Token::Type::Literal:
            printf("LITERAL: %s\n", literal_stringify(t.literal).c_str());
            break;
        case Regexp::Token::Type::Symclass:
            printf("SYMCLASS: ");
            print_symclass(t.symclass);
            printf("\n");
            break;
        case Regexp::Token::Type::Group:
            printf("GROUP\n");
            t.regexp->print(pre + "  ");
            break;
        case Regexp::Token::Type::Capture:
            printf("CAPGROUP\n");
            t.regexp->print(pre + "  ");
            break;
    }
}

static void print_quant (const Regexp::Quant& q, string pre) {
    printf("%sQUANT: {%d,%d}\n", pre.c_str(), q.min, q.max);
}

static void print_element (const Regexp::Element& e, string pre) {
    printf("%sELEMENT: \n", pre.c_str());
    print_token(e.token, pre + "  ");
    print_quant(e.quant, pre + "  ");
}

static void print_expression (const Regexp::Expression& expr, string pre) {
    printf("%sEXPR: \n", pre.c_str());
    for (auto& element : expr.elements) {
        print_element(element, pre + "  ");
    }
}

void Regexp::print (string pre) const {
    printf("%sRE: \n", pre.c_str());
    for (size_t i = 0; i < expressions.size(); ++i) {
        print_expression(expressions[i], pre + "  ");
        if (i < expressions.size() - 1) printf("%sOR\n", pre.c_str());
    }
}

static inline bool cmp_range (const Regexp::Symclass& sc, std::initializer_list<Regexp::Symclass::Range> list) {
    if (sc.ranges.size() != list.size()) return false;
    size_t i = 0;
    for (auto r : list) {
        auto& r2 = sc.ranges[i++];
        if (r2.from != r.from || r2.to != r.to) return false;
    }
    return true;
}

string Regexp::to_string() const {
    string ret;
    for (size_t i = 0; i < expressions.size(); ++i) {
        auto& expr = expressions[i];
        for (auto& element : expr.elements) {
            auto& t = element.token;
            auto& q = element.quant;
            switch (t.type) {
                case Regexp::Token::Type::Literal:
                    ret += literal_stringify(t.literal);
                    break;
                case Regexp::Token::Type::Symclass: {
                    auto& sc = t.symclass;
                    if (!sc.inverse) {
                        if (!sc.chars) {
                            if (cmp_range(sc, {{CHAR_MIN, CHAR_MAX}}))              { ret += '.'; break; }
                            if (cmp_range(sc, {{'0', '9'}}))                        { ret += "\\d"; break; }
                            if (cmp_range(sc, {{CHAR_MIN,'0'-1},{'9'+1,CHAR_MAX}})) { ret += "\\D"; break; }
                            if (cmp_range(sc, {{CHAR_MIN, '0'-1},{'9'+1,'A'-1},{'Z'+1,'_'-1},{'_'+1,'a'-1},{'z'+1,CHAR_MAX}})) { ret += "\\W"; break; }
                            if (cmp_range(sc, {{CHAR_MIN,'\t'-1},{'\n'+1,'\r'-1},{'\r'+1,' '-1},{' '+1,'\v'-1},{'\v'+1,CHAR_MAX}})) { ret += "\\S"; break; }
                        }
                        if (!sc.ranges.size()) {
                            if (sc.chars == "\n")        { ret += "\\n"; break; }
                            if (sc.chars == "\r")        { ret += "\\r"; break; }
                            if (sc.chars == "\t\v")      { ret += "\\t"; break; }
                            if (sc.chars == " \n\r\t\v") { ret += "\\s"; break; }
                        }
                        if (sc.chars == "_" && cmp_range(sc, {{'a', 'z'}, {'A', 'Z'}, {'0', '9'}})) { ret += "\\w"; break; }
                    }
                    ret += '[';
                    if (sc.inverse) ret += '^';
                    ret += symclass_stringify(sc.chars);
                    for (auto& r : sc.ranges) {
                        symclass_stringify(ret, r.from);
                        ret += '-';
                        symclass_stringify(ret, r.to);
                    }
                    ret += ']';

                    break;
                }
                case Regexp::Token::Type::Group:
                    ret += "(?:";
                    ret += t.regexp->to_string();
                    ret += ')';
                    break;
                case Regexp::Token::Type::Capture:
                    ret += '(';
                    ret += t.regexp->to_string();
                    ret += ')';
                    break;
            }
            if (!q.is_default()) {
                if      (q.min == 0 && q.max ==  1) ret += '?';
                else if (q.min == 0 && q.max == -1) ret += '*';
                else if (q.min == 1 && q.max == -1) ret += '+';
                else {
                    ret += '{';
                    if (q.min != 0) ret += panda::to_string(q.min);
                    ret += ',';
                    if (q.max != -1) ret += panda::to_string(q.max);
                    ret += '}';
                }
            }
        }
        if (i < expressions.size() - 1) ret += '|';
    }
    return ret;
}

}}}