#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "ppport.h"

#include <stdio.h>
#include <stdlib.h>
#include "salcpis.h"

/* coupling functions to sais-lite-lcp-master
 *
 * TODO:
 * support error reporting through the $! variable
 * support accessor functions in the arrays
 * */

#ifndef min
#define min(a,b) ((a) < (b) ? (a) : (b))
#endif
//#define DEBUG_SET_NOOVERLAPS
//#define DEBUG_FILECLIP
//#define DEBUG_SHADOWS


unsigned int *SA = 0;
unsigned int *ISA = 0;
unsigned int *LCP= 0;
//unsigned int *RANKS= 0;
//unsigned int *LAST_RANK= 0;
//unsigned int *RANKP= 0;
size_t n= 0;

/*AV *
get_ranks()
PREINIT:
  const unsigned int *lcp = LCP+1;
INIT:
  unsigned int *RANKS;
  AV * arr; 
  unsigned c;
CODE:
  // alloc 
  arr = newAV();
  RANKS = 0;//(unsigned int *)malloc((size_t) n * sizeof(int));
  if (!RANKS) {
    goto MYOUTPUT;
  }
  // copy
  //memcpy();
  // sort
  //qsort();
  // copy
  for (c=0; c < n; ++c)
  {
    av_store(arr, c, newSVuv(RANKS[c]));
  }
  // free
  free(RANKS);
MYOUTPUT:
  RETVAL = arr;
OUTPUT:
  RETVAL*/

/*
unsigned int
get_next_ranked_index()
PREINIT:
INIT:
CODE:
  if (RANKP < LAST_RANK) {
    RETVAL = *RANKP++;
  } else {
    RETVAL = ~0U;
  }
OUTPUT:
  RETVAL



void
reset_rank_iterator()
PREINIT:
INIT:
CODE:
  RANKP = RANKS;
*/


MODULE = Code::DRY        PACKAGE = Code::DRY
PROTOTYPES: ENABLE

int
build_suffixarray_and_lcp(in)
    SV * in
INIT:
    size_t size;
    unsigned int *sa;
    unsigned int *isa;
    unsigned int i;
    unsigned char * T = NULL;
CODE:
  if (!SA) {
    free(SA);
  }
  if (!LCP) {
    free(LCP);
  }
  if (!ISA) {
    free(ISA);
  }

  size = sv_len(in);
  T = SvPV_nolen(in);
  if (T == NULL) {
    RETVAL = -1;
    goto MYOUTPUT;
  }

  /* TODO
 *   steal this input string from perl
 *   and set the original scalar to undef
 * */

  SA  = (unsigned int *)malloc((size_t)(size+1) * sizeof(int)); // +1 for computing LCP
  LCP = (unsigned int *)malloc((size_t) size    * sizeof(int));
  ISA = (unsigned int *)malloc((size_t)(size  ) * sizeof(int));
  if((SA == NULL) || (LCP == NULL) || (ISA == NULL)) {
    RETVAL = -2;
    if (SA) {
      free(SA);
      SA = 0;
    }
    if (LCP) {
      free(LCP);
      LCP = 0;
    }
    if (ISA) {
      free(ISA);
      ISA = 0;
    }
    goto MYOUTPUT;
  }

  n = size;

  if (sais(T, (int *)SA, (int *)LCP, (int)n) != 0) {
    free(SA);
    free(LCP);
    free(ISA);
    SA = LCP = ISA = 0;
    n = 0;

    RETVAL = -3;
    goto MYOUTPUT;
  }
  // generate the inverse suffix array
  sa = SA;
  isa = ISA;
  for (i = 0; i < n; ++i) {
    isa[*sa++] = i;
  }
  if (n > 0) {
    LCP[0] = 0;
  }
  RETVAL = 0;
MYOUTPUT:
OUTPUT:
  RETVAL


void 
reduce_lcp_to_nonoverlapping_lengths()
PREINIT:
        unsigned int *lcp = LCP+n-1;
  const unsigned int *sa  = SA +n-1;
INIT:
        unsigned c;
CODE:
  if (n < 2) {
    return;
  }
  for (c = n-1; c; --c) {
    if (*lcp > abs(*(sa-1) - *sa)) {
#ifdef DEBUG_SET_NOOVERLAPS
fprintf(stderr, "at %u: offset (%u) + lcp(%u) -1 >= prevSA(%u) -> set lcp from %u to %u\n", c, *sa, *lcp, *(sa -1), *lcp, abs(*(sa-1) - *sa));
#endif
      *lcp = abs(*(sa-1) - *sa);
    }
    --sa;
    --lcp;
  }


void
clip_lcp_to_fileboundaries(boundaries)
    AV * boundaries;
PREINIT:
        unsigned int *lcp = LCP+1;
  const unsigned int *sa  = SA +1;
        unsigned lastb;
        unsigned file_limit;
        unsigned lastFilelimit;
INIT:
CODE:
  {
    const unsigned maxbound = av_len(boundaries);
    if (maxbound < 1) {
#ifdef DEBUG_FILECLIP
fprintf(stderr, "no file limits (%u), abort\n", maxbound);
#endif
      return;
    }
    if (n < 2) {
#ifdef DEBUG_FILECLIP
fprintf(stderr, "size < 2 (%u), abort\n", n);
#endif
      return;
    }

    /* return if offsets are not sorted or contain 'holes' empty slots */
    if (!av_exists(boundaries, maxbound)) {
#ifdef DEBUG_FILECLIP
fprintf(stderr, "last file boundary at (%u) is empty slot, abort\n", maxbound);
#endif
      return;
    }
    lastb = SvIV(*av_fetch(boundaries, maxbound, 0));

    {
        int c;
        for (c = maxbound-1; c >= 0; --c) {
            unsigned thisb;
            if (!av_exists(boundaries, c)) {
#ifdef DEBUG_FILECLIP
fprintf(stderr, "file boundary at (%u) is empty slot, abort\n", c);
#endif
                return;
            }
            thisb = SvIV(*av_fetch(boundaries, c, 0));
	     
            if (thisb >= lastb) {
#ifdef DEBUG_FILECLIP
fprintf(stderr, "file boundary at (%u) is not greater (%u) than previous one: (%u), abort\n", c, thisb, lastb);
#endif
                return;
            }
            lastb = thisb;
        }
    }
    /* now make sure that (*sa + *lcp - 1) does not extend past their file boundary */
    lastFilelimit = ~0;
    {
        int c;
        for (c = 1; c < n; ++c) {

            const unsigned int offset = *sa;
            unsigned left  = 0;
            unsigned right = maxbound;
            unsigned minlcp;

            if (0 == right) {
                file_limit = 0;
            } else {

                unsigned test = (left + right) / 2;

                file_limit = 0;
                while (left < right) {
                    unsigned this_fileend;
                    unsigned previous_fileend;
                    if (((test > 0 && (previous_fileend = SvIV(*av_fetch(boundaries, test-1, 0))) < offset) || test == 0)
                         && offset <= (this_fileend = SvIV(*av_fetch(boundaries, test, 0)))) {
                        file_limit = this_fileend;
                        break;
                    }

                    if (test > 0 && previous_fileend >= offset) {
                        right = test;
                        test  = (left + right    ) / 2;
                    } else {
                        left  = test;
                        test  = (left + right + 1) / 2;
                    }
                }
            }

            /* if previous entry is shorter than current lcp -> adjust */
            /* if current entry is shorter than current lcp -> adjust */
            minlcp = min(*lcp, 1+ min(lastFilelimit - *(sa -1), file_limit - *(sa)));
            if (*lcp > minlcp) {
#ifdef DEBUG_FILECLIP
fprintf(stderr, "at %u: offset (%u) + lcp(%u) -1 >= file limit(%u)  or  offset(%u) + lcp(%u) - 1 >= file limit(%u)  -> set lcp from %u to %u\n", 
        c, *(sa - 1), *lcp, lastFilelimit, *sa, *lcp, file_limit, *lcp, minlcp);
#endif
                *lcp = minlcp;
            }
            lastFilelimit = file_limit;
            ++sa;
            ++lcp;
        }
    }
  }



void
set_lcp_to_zero_for_shadowed_substrings()
INIT:
   unsigned int *isa;
   unsigned int c;
   unsigned int entry;
   unsigned int lcp;
   unsigned int offset;
   unsigned int offset2;
   unsigned int offsetprev2;
   unsigned int last_lcp;
CODE:
   isa = ISA;
   last_lcp = ~0;
   for (c = 0; c < n; ++c, last_lcp = lcp) {
      entry  = *isa++;
      lcp    = LCP[entry];
      if (entry > 0 && c > 0 && last_lcp >= lcp) {
         offset  = SA[entry  ];
         offset2 = SA[entry-1];
         offsetprev2 = ISA[c-1];
         if (0 == offsetprev2) {
#ifdef DEBUG_SHADOWS
fprintf(stderr, "at %u: first suffix, cannot go back", c);
#endif
            continue;
         }
         offsetprev2 = SA[offsetprev2-1] + 1;
         if (offsetprev2 == offset2) {
#ifdef DEBUG_SHADOWS
fprintf(stderr, "  at %u: offset (%u) set lcp from %u to 0\n", ISA[offset], offset, LCP[ISA[offset]]);
#endif
            LCP[ISA[offset]] = 0;
         }
      }
   }

AV *
get_sa()
INIT:
  AV * arr; 
  unsigned c;
CODE:
  arr = newAV();
  for (c=0; c < n; ++c)
  {
    av_store(arr, c, newSVuv(SA[c]));
  } 
  RETVAL = arr;
OUTPUT:
  RETVAL



AV *
get_lcp()
INIT:
  AV * arr; 
  unsigned c;
CODE:
  arr = newAV();
  for (c=0; c < n; ++c)
  {
    av_store(arr, c, newSVuv(LCP[c]));
  } 
  RETVAL = arr;
OUTPUT:
  RETVAL

unsigned int
get_offset_at(index)
   unsigned index;
INIT:
CODE:
  if (!n || index >= n) {
    RETVAL = ~0U;
  } else {
    RETVAL = SA[index];
  }
OUTPUT:
  RETVAL

unsigned int
get_isa_at(index)
   unsigned index;
INIT:
CODE:
  if (!n || index >= n) {
    RETVAL = ~0U;
  } else {
    RETVAL = ISA[index];
  }
OUTPUT:
  RETVAL

unsigned int
get_len_at(index)
   unsigned int index;
INIT:
CODE:
  if (!n || index >= n) {
    RETVAL = ~0U;
  } else {
    RETVAL = LCP[index];
  }
OUTPUT:
  RETVAL

int 
get_size()
CODE:
  RETVAL = n;
OUTPUT:
  RETVAL

void
__free_all()
CODE:
  if (!SA) {
    free(SA);
  }
  if (!LCP) {
    free(LCP);
  }
  if (!ISA) {
    free(ISA);
  }
  SA = LCP = ISA = 0;
  n = 0;