#define PERL_NO_GET_CONTEXT
#define NO_XSLOCKS
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "ppport.h"

#include <assert.h>

#define NUM_LATTICES 4

/*
 * This program must deal with integers that are too big to be
 * represented by 32 bits.
 *
 * They are represented by AM_BIG_INT, which is typedef'd to
 *
 * unsigned long a[8]
 *
 * where each a[i] < 2*16.  Such an array represents the integer
 *
 * a[0] + a[1] * 2^16 + ... + a[7] * 2^(7*16).
 *
 * We only use 16 bits of the unsigned long instead of 32, so that
 * when we add or multiply two large integers, we have room for overflow.
 * After any addition or multiplication, the result is carried so that
 * each element of the array is again < 2*16.
 *
 * Someday I may rewrite this in assembler.
 *
 */
typedef unsigned short AM_SHORT;
typedef unsigned long AM_LONG;
typedef AM_LONG AM_BIG_INT[8];

#define high_bits(x) x >> 16
#define low_bits(x) x & 0xffff

#define carry(var, ind) \
  var[ind + 1] += high_bits(var[ind]); \
  var[ind] = low_bits(var[ind])

/* carry macros for math using AM_BIG_INT */
#define carry_pointer(p) \
  *(p + 1) += high_bits(*(p)); \
  *(p) = low_bits(*(p))

#define carry_replace(var, ind) \
  var[ind + 1] = high_bits(var[ind]); \
  var[ind] = low_bits(var[ind])

#define hash_pointer_from_stack(ind) \
  (HV *) SvRV(ST(ind))

#define array_pointer_from_stack(ind) \
  AvARRAY((AV *)SvRV(ST(ind)))

#define unsigned_int_from_stack(ind) \
  SvUVX(ST(ind))

/* AM_SUPRAs form a linked list; using for(iter_supra(x, supra)) loops over the list members using the temp variable x */
#define iter_supras(loop_var, supra_ptr) \
  loop_var = supra_ptr + supra_ptr->next; loop_var != supra_ptr; loop_var = supra_ptr + loop_var->next

#define sublist_top(supra) \
  supra->data + supra->data[0] + 1

/*
 * structure for the supracontexts
 */

typedef struct AM_supra {
  /* list of subcontexts in this supracontext
   *
   * data[0] is the number of subcontexts in
   * the array;
   *
   * data[1] is always 0 (useful for finding
   * intersections; see below)
   *
   * data[i] is not an actually subcontext
   * label; instead, all the subcontext labels
   * are kept in an array called subcontext
   * (bad choice of name?)  created in
   * function _fillandcount().  Thus, the
   * actual subcontexts in the supracontext
   * are subcontext[data[2]], ...
   *
   * data[i] < data[i+1] if i > 1 and
   * i < data[0].
   *
   * Using an array of increasing positive
   * integers makes it easy to take
   * intersections (see lattice.pod).
   */
  AM_SHORT *data;

  /* number of supracontexts that contain
   * precisely these subcontexts;
   *
   * According to the AM algorithm, we're
   * supposed to look at all the homogeneous
   * supracontexts to compute the analogical
   * set.  Instead of traversing the
   * supracontextual lattice to find them, we
   * can instead traverse the list of AM_SUPRA
   * with count > 0 and use the value of count
   * to do our computing.
   *
   * Since we're actually traversing four
   * small lattices and taking intersections,
   * we'll be multiplying the four values of
   * count to get what we want.
   *
   */
  AM_SHORT count;

  /*
   * used to implement two linked lists
   *
   * One linked list contains all the nonempty
   * supracontexts (i.e., data[0] is not 0).
   * This linked list is in fact circular.
   *
   * One linked list contains all the unused
   * memory that can be used for new
   * supracontexts.
   */
  AM_SHORT next;

  /*
   * used during the filling of the
   * supracontextual lattice (see below)
   */
  unsigned char touched;
} AM_SUPRA;

/*
 * There is quite a bit of data that must pass between AM.pm and
 * AM.xs.  Instead of repeatedly passing it back and forth on
 * the argument stack, AM.pm sends references to the variables
 * holding this shared data, by calling _xs_initialize() (defined later
 * on).  These pointers are then stored in the following structure,
 * which is put into the magic part of $self (since $self is an HV,
 * it is perforce an SvPVMG as well).
 *
 * Note that for arrays, we store a pointer to the array data itself,
 * not the AV*.  That means that in AM.pm, we have to be careful
 * how we make assignments to array variables; a reassignment such as
 *
 * @sum = (pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0) x @sum;
 *
 * breaks everything because the pointer stored here then won't point
 * to the actual data anymore.  That's why the appropriate line in
 * AM.pm is
 *
 * foreach (@sum) {
 *   $_ = pack "L!8", 0, 0, 0, 0, 0, 0, 0, 0;
 * }
 *
 * Most of the identifiers in the struct have the same names as the
 * variables created in AM.pm and are documented there.  Those
 * that don't are documented below.
 *
 * This trick of storing pointers like this is borrowed from the
 * source code of Perl/Tk.  Thanks, Nick!
 *
 */

typedef struct AM_guts {

  /*
   * Let i be an integer from 0 to 3; this represents which of the
   * four sublattices we are considering.
   *
   * Let lattice = lattice_list[i] and supralist = supra_list[i]; then lattice and
   * supralist taken together tell us which subcontexts are in a
   * particular supracontext.  If s is the label of a supracontext,
   * then it contains the subcontexts listed in
   * supralist[lattice[s]].data[].
   *
   */

  AM_SHORT *lattice_list[NUM_LATTICES];
  AM_SUPRA *supra_list[NUM_LATTICES];

  /* array ref containing number of active features in
   * each lattice (currently we us four lattices)
   */
  SV **lattice_sizes;
  /* array ref containing class labels for whole data set;
   * array index is data item index in data set.
   */
  SV **classes;
  /* TODO: ??? */
  SV **itemcontextchain;
  /* TODO: ??? */
  HV *itemcontextchainhead;
  /* Maps subcontext binary labels to class indices (or 0 if subcontext is heterogeneous) */
  HV *context_to_class;
  /* Maps subcontext binary labels to the number of training items
   * contained in that subcontext
   */
  HV *context_size;
  /* Maps binary context labels to the number of pointers to each,
   * or to the number of pointers to each class label if heterogenous.
   * The key "grand_total" maps to the total number of pointers.
   */
  HV *pointers;
  /* Maps binary context labels to the size of the gang effect of
   * that subcontext. A gang effect is the number of pointers in
   * the given subcontext multiplied by the number of training items
   * contained in the context.
   */
  HV *raw_gang;
  /* number of pointers to each class label;
   * keys are class indices and values are numbers
   * of pointers (AM_BIG_INT).
   */
  SV **sum;
  /*
   * contains the total number of possible class labels;
   * used for computing gang effects.
   */
  IV num_classes;
} AM_GUTS;

/*
 * A function and a vtable necessary for the use of Perl magic
 * TODO: explain the necessity
 */

static int AMguts_mgFree(pTHX_ SV *sv, MAGIC *magic) {
  int i;
  AM_GUTS *guts = (AM_GUTS *) SvPVX(magic->mg_obj);
  for (i = 0; i < NUM_LATTICES; ++i) {
    Safefree(guts->lattice_list[i]);
    Safefree(guts->supra_list[i][0].data);
    Safefree(guts->supra_list[i]);
  }
  return 0;
}

MGVTBL AMguts_vtab = {
  NULL,
  NULL,
  NULL,
  NULL,
  AMguts_mgFree
};

/*
 * arrays used in the change-of-base portion of normalize(SV *s)
 * they are initialized in BOOT
 */

AM_LONG tens[16]; /* 10, 10*2, 10*4, ... */
AM_LONG ones[16]; /*  1,  1*2,  1*4, ... */

/*
 * function: normalize(SV *s)
 *
 * s is an SvPV whose PV* is an unsigned long array representing a very
 * large integer
 *
 * this function modifies s so that its NV is the floating point
 * representation of the very large integer value, while its PV* is
 * the decimal representation of the very large integer value in ASCII
 * (cool, a double-valued scalar)
 *
 * computing the NV is straightforward
 *
 * computing the PV is done using the old change-of-base algorithm:
 * repeatedly divide by 10, and use the remainders to construct the
 * ASCII digits from least to most significant
 *
 */
const unsigned int ASCII_0 = 0x30;
const unsigned int DIVIDE_SPACE = 10;
const int OUTSPACE_SIZE = 55;
void normalize(pTHX_ SV *s) {
  AM_LONG *p = (AM_LONG *)SvPVX(s);

  AM_LONG dspace[DIVIDE_SPACE];
  AM_LONG qspace[DIVIDE_SPACE];
  AM_LONG *dividend, *quotient, *dptr, *qptr;

  STRLEN length = SvCUR(s) / sizeof(AM_LONG);
  /* length indexes into dspace and qspace */
  assert(length <= DIVIDE_SPACE);

  /*
   * outptr iterates outspace from end to beginning, and an ASCII digit is inserted at each location.
   * No need to 0-terminate, since we track the final string length in outlength and pass it to sv_setpvn.
   */
  char outspace[OUTSPACE_SIZE];
  char *outptr;
  outptr = outspace + (OUTSPACE_SIZE - 1);
  unsigned int outlength = 0;

  /* TODO: is this required to be a certain number of bits? */
  long double nn = 0;

  /* nn will be assigned to the NV */
  for (int j = 8; j; --j) {
    /*   2^16    * nn +           p[j-1] */
    nn = 65536.0 * nn + (double) *(p + j - 1);
  }

  dividend = &dspace[0];
  quotient = &qspace[0];
  Copy(p, dividend, length, AM_LONG);

  while (1) {
    while (length && (*(dividend + length - 1) == 0)) {
      --length;
    }
    if (length == 0) {
      sv_setpvn(s, outptr, outlength);
      break;
    }
    dptr = dividend + length - 1;
    qptr = quotient + length - 1;
    AM_LONG carry = 0;
    while (dptr >= dividend) {
      unsigned int i;
      *dptr += carry << 16;
      *qptr = 0;
      for (i = 16; i; ) {
        --i;
        if (tens[i] <= *dptr) {
          *dptr -= tens[i];
          *qptr += ones[i];
        }
      }
      carry = *dptr;
      --dptr;
      --qptr;
    }
    --outptr;
    *outptr = (char)(ASCII_0 + *dividend) & 0x00ff;
    ++outlength;
    AM_LONG *temp = dividend;
    dividend = quotient;
    quotient = temp;
  }

  SvNVX(s) = nn;
  SvNOK_on(s);
}

 /* Given 2 lists of training item indices sorted in descending order,
  * fill a third list with the intersection of items in these lists.
  * This is a simple intersection, and no check for heterogeneity is
  * performed.
  * Return the next empty (available) index address in the third list.
  * If the two lists have no intersection, then the return value is
  * just the same as the third input.
  */
unsigned short *intersect_supras(
    AM_SHORT *intersection_list_top, AM_SHORT *subcontext_list_top, AM_SHORT *k){
  while (1) {
    while (*intersection_list_top > *subcontext_list_top) {
      --intersection_list_top;
    }
    if (*intersection_list_top == 0) {
      break;
    }
    if (*intersection_list_top < *subcontext_list_top) {
      AM_SHORT *temp = intersection_list_top;
      intersection_list_top = subcontext_list_top;
      subcontext_list_top = temp;
      continue;
    }
    *k = *intersection_list_top;
    --intersection_list_top;
    --subcontext_list_top;
    --k;
  }
  return k;
}
 /* The first three inputs are the same as for intersect_supra above,
  * and the fourth paramater should be a list containing the class
  * index for all of the training items. In addition to combining
  * the first two lists into the third via intersection, the final
  * list is checked for heterogeneity and the non-deterministic
  * heterogeneous supracontexts are removed.
  * The return value is the number of items contained in the resulting
  * list.
  */
AM_SHORT intersect_supras_final(
    AM_SHORT *intersection_list_top, AM_SHORT *subcontext_list_top,
    AM_SHORT *intersect, AM_SHORT *subcontext_class){
  AM_SHORT class = 0;
  AM_SHORT length = 0;
  while (1) {
    while (*intersection_list_top > *subcontext_list_top) {
      --intersection_list_top;
    }
    if (*intersection_list_top == 0) {
      break;
    }
    if (*intersection_list_top < *subcontext_list_top) {
      AM_SHORT *temp = intersection_list_top;
      intersection_list_top = subcontext_list_top;
      subcontext_list_top = temp;
      continue;
    }
    *intersect = *intersection_list_top;
    ++intersect;
    ++length;

     /* is it heterogeneous? */
    if (class == 0) {
      /* is it not deterministic? */
      if (length > 1) {
        length = 0;
        break;
      } else {
        class = subcontext_class[*intersection_list_top];
      }
    } else {
      /* Do the classes not match? */
      if (class != subcontext_class[*intersection_list_top]) {
        length = 0;
        break;
      }
    }
    --intersection_list_top;
    --subcontext_list_top;
  }
  return length;
}

/* clear out the supracontexts */
void clear_supras(AM_SUPRA **supra_list, int supras_length)
{
  AM_SUPRA *p;
  for (int i = 0; i < supras_length; i++)
  {
    for (iter_supras(p, supra_list[i]))
    {
      Safefree(p->data);
    }
  }
}

MODULE = Algorithm::AM PACKAGE = Algorithm::AM

PROTOTYPES: DISABLE

BOOT:
  {
    AM_LONG ten = 10;
    AM_LONG one = 1;
    AM_LONG *tensptr = &tens[0];
    AM_LONG *onesptr = &ones[0];
    unsigned int i;
    for (i = 16; i; i--) {
      *tensptr = ten;
      *onesptr = one;
      ++tensptr;
      ++onesptr;
      ten <<= 1;
      one <<= 1;
    }
  }

 /*
  * This function is called by from AM.pm right after creating
  * a blessed reference to Algorithm::AM. It stores the necessary
  * pointers in the AM_GUTS structure and attaches it to the magic
  * part of the reference.
  *
  */

void
_xs_initialize(...)
 PPCODE:
  /* NOT A POINTER THIS TIME! (let memory allocate automatically) */
  AM_GUTS guts;
  /* 9 arguments are passed to the _xs_initialize method: */
  /* $self, the AM object */
  HV *self = hash_pointer_from_stack(0);
  /* For explanations on these, see the comments on AM_guts */
  SV **lattice_sizes = array_pointer_from_stack(1);
  guts.classes = array_pointer_from_stack(2);
  guts.itemcontextchain = array_pointer_from_stack(3);
  guts.itemcontextchainhead = hash_pointer_from_stack(4);
  guts.context_to_class = hash_pointer_from_stack(5);
  guts.context_size = hash_pointer_from_stack(6);
  guts.pointers = hash_pointer_from_stack(7);
  guts.raw_gang = hash_pointer_from_stack(8);
  guts.sum = array_pointer_from_stack(9);
  /* Length of guts.sum */
  guts.num_classes = av_len((AV *) SvRV(ST(9)));

  /*
   * Since the sublattices are small, we just take a chunk of memory
   * here that will be large enough for our purposes and do the actual
   * memory allocation within the code; this reduces the overhead of
   * repeated system calls.
   *
   */

  for (int i = 0; i < NUM_LATTICES; ++i) {
    UV v = SvUVX(lattice_sizes[i]);
    Newxz(guts.lattice_list[i], 1 << v, AM_SHORT);
    Newxz(guts.supra_list[i], 1 << (v + 1), AM_SUPRA); /* CHANGED */ /* TODO: what changed? */
    Newxz(guts.supra_list[i][0].data, 2, AM_SHORT);
  }

  /* Perl magic invoked here */

  SV *svguts = newSVpv((char *)&guts, sizeof(AM_GUTS));
  sv_magic((SV *) self, svguts, PERL_MAGIC_ext, NULL, 0);
  SvRMAGICAL_off((SV *) self);
  MAGIC *magic = mg_find((SV *)self, PERL_MAGIC_ext);
  magic->mg_virtual = &AMguts_vtab;
  mg_magical((SV *) self);

void
_fillandcount(...)
 PPCODE:
  /* Input args are the AM object ($self), number of features in each
   * lattice, and a flag to indicate whether to count occurrences
   * (true) or pointers (false), also known as linear/quadratic.
   */
  HV *self = hash_pointer_from_stack(0);
  SV **lattice_sizes_input = array_pointer_from_stack(1);
  UV linear_flag = unsigned_int_from_stack(2);
  MAGIC *magic = mg_find((SV *)self, PERL_MAGIC_ext);
  AM_GUTS *guts = (AM_GUTS *)SvPVX(magic->mg_obj);

  /*
   * We initialize the memory for the sublattices, including setting up the
   * linked lists.
   */

  AM_SHORT **lattice_list = guts->lattice_list;
  AM_SUPRA **supra_list = guts->supra_list;
  /* this helps us manage the free list in supra_list[i] */
  AM_SHORT nptr[NUM_LATTICES];
  AM_SHORT lattice_sizes[NUM_LATTICES];
  for (int sublattice_index = 0; sublattice_index < NUM_LATTICES; ++sublattice_index) {
    /* Extract numeric values for the specified lattice_sizes */
    lattice_sizes[sublattice_index] = (AM_SHORT) SvUVX(lattice_sizes_input[sublattice_index]);
    /* TODO: explain the lines below */
    Zero(lattice_list[sublattice_index], 1 << lattice_sizes[sublattice_index], AM_SHORT);
    supra_list[sublattice_index][0].next = 0;
    nptr[sublattice_index] = 1;
    for (int i = 1; i < 1 << (lattice_sizes[sublattice_index] + 1); ++i) {/* CHANGED (TODO: changed what?) */
      supra_list[sublattice_index][i].next = (AM_SHORT) i + 1;
    }
  }

  /*
   * Instead of adding subcontext labels directly to the supracontexts,
   * we store all of these labels in an array called subcontext.  We
   * then store the array indices of the subcontext labels in the
   * supracontexts.  That means the list of subcontexts in the
   * supracontexts is an increasing sequence of positive integers, handy
   * for taking intersections (see lattice.pod).
   *
   * The index into the array is called subcontextnumber.
   *
   * The array of matching classes is called subcontext_class.
   *
   */

  HV *context_to_class = guts->context_to_class;
  AM_SHORT subcontextnumber = (AM_SHORT)HvUSEDKEYS(context_to_class);
  AM_SHORT *subcontext;
  Newxz(subcontext, NUM_LATTICES *(subcontextnumber + 1), AM_SHORT);
  subcontext += NUM_LATTICES * subcontextnumber;
  AM_SHORT *subcontext_class;
  Newxz(subcontext_class, subcontextnumber + 1, AM_SHORT);
  subcontext_class += subcontextnumber;

  AM_SHORT *intersectlist, *intersectlist2, *intersectlist3;
  AM_SHORT *ilist2top, *ilist3top;
  Newxz(intersectlist, subcontextnumber + 1, AM_SHORT);
  Newxz(intersectlist2, subcontextnumber + 1, AM_SHORT);
  ilist2top = intersectlist2 + subcontextnumber;
  Newxz(intersectlist3, subcontextnumber + 1, AM_SHORT);
  ilist3top = intersectlist3 + subcontextnumber;

  hv_iterinit(context_to_class);
  HE *context_to_class_entry;
  while ((context_to_class_entry = hv_iternext(context_to_class))) {
    AM_SHORT *contextptr = (AM_SHORT *) HeKEY(context_to_class_entry);
    AM_SHORT class = (AM_SHORT) SvUVX(HeVAL(context_to_class_entry));
    for (int sublattice_index = 0; sublattice_index < NUM_LATTICES; ++sublattice_index, ++contextptr) {
      AM_SHORT active = lattice_sizes[sublattice_index];
      AM_SHORT *lattice = lattice_list[sublattice_index];
      AM_SUPRA *supralist = supra_list[sublattice_index];
      AM_SHORT nextsupra = nptr[sublattice_index];
      AM_SHORT context = *contextptr;

      /* We want to add subcontextnumber to the appropriate
       * supracontexts in the four smaller lattices.
       *
       * Suppose we want to add subcontextnumber to the supracontext
       * labeled by d.  supralist[lattice[d]] is an AM_SUPRA which
       * reflects the current state of the supracontext.  Suppose this
       * state is
       *
       * data:    2 0 x y (i.e., currently contains two subcontexts)
       * count:   5
       * next:    7
       * touched: 0
       *
       * Then we pluck an unused AM_SUPRA off of the free list;
       * suppose that it's located at supralist[9] (the variable
       * nextsupra tells us where).  Then supralist[lattice[d]] will
       * change to
       *
       * data:    2 0 x y
       * count:   4 (decrease by 1)
       * next:    9
       * touched: 1
       *
       * and supralist[9] will become
       *
       * data:    3 0 subcontextnumber x y (now has three subcontexts)
       * count:   1
       * next:    7
       * touched: 0
       *
       * (note: the entries in data[] are added in decreasing order)
       *
       *
       * If, on the other hand, if supralist[lattice[d]] looks like
       *
       * data:    2 0 x y
       * count:   8
       * next:    11
       * touched: 1
       *
       * that means that supralist[11] must look something like
       *
       * data:    3 0 subcontextnumber x y
       * count:   4
       * next:    2
       * touched: 0
       *
       * There already exists a supracontext with subcontextnumber
       * added in!  So we change supralist[lattice[d]] to
       *
       * data:    2 0 x y
       * count:   7 (decrease by 1)
       * next:    11
       * touched: 1
       *
       * change supralist[11] to
       *
       * data:    3 0 subcontextnumber x y
       * count:   5 (increase by 1)
       * next:    2
       * touched: 0
       *
       * and set lattice[d] = 11.
       */

      subcontext[sublattice_index] = context;
      AM_SHORT gaps[16];
      if (context == 0) {
        AM_SUPRA *p;
        for (iter_supras(p, supralist)) {
          AM_SHORT *data;
          Newxz(data, p->data[0] + 3, AM_SHORT);
          Copy(p->data + 2, data + 3, p->data[0], AM_SHORT);
          data[2] = subcontextnumber;
          data[0] = p->data[0] + 1;
          Safefree(p->data);
          p->data = data;
        }
        if (lattice[context] == 0) {

          /* in this case, the subcontext will be
           * added to all supracontexts, so there's
           * no need to hassle with a Gray code and
           * move pointers
           */

          AM_SHORT count = 0;
          AM_SHORT ci = nptr[sublattice_index];
          nptr[sublattice_index] = supralist[ci].next;
          AM_SUPRA *c = supralist + ci;
          c->next = supralist->next;
          supralist->next = ci;
          Newxz(c->data, 3, AM_SHORT);
          c->data[2] = subcontextnumber;
          c->data[0] = 1;
          for (int i = 0; i < (1 << active); ++i) {
            if (lattice[i] == 0) {
              lattice[i] = ci;
              ++count;
            }
          }
          c->count = count;
        }
        continue;
      }

      /* set up traversal using Gray code */
      AM_SHORT d = context;
      AM_SHORT numgaps = 0;
      for (int i = 1 << (active - 1); i; i >>= 1) {
        if (!(i & context)) {
          gaps[numgaps++] = i;
        }
      }
      AM_SHORT t = 1 << numgaps;

      AM_SHORT pi = lattice[context];
      AM_SUPRA *p = supralist + pi;
      if (pi) {
        --(p->count);
      }
      AM_SHORT ci = nextsupra;
      nextsupra = supralist[ci].next;
      p->touched = 1;
      AM_SUPRA *c = supralist + ci;
      c->touched = 0;
      c->next = p->next;
      p->next = ci;
      c->count = 1;
      Newxz(c->data, p->data[0] + 3, AM_SHORT);
      Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
      c->data[2] = subcontextnumber;
      c->data[0] = p->data[0] + 1;
      lattice[context] = ci;

      /* traverse */
      while (--t) {
        AM_SHORT tt;
        int i;
        /* find the rightmost 1 in t; from HAKMEM, I believe */
        for (i = 0, tt = ~t & (t - 1); tt; tt >>= 1, ++i) {
          ;
        }
        d ^= gaps[i];

        p = supralist + (pi = lattice[d]);
        if (pi) {
          --(p->count);
        }
        switch (p->touched) {
          case 1:
            ++supralist[lattice[d] = p->next].count;
            break;
          case 0:
            ci = nextsupra;
            nextsupra = supralist[ci].next;
            p->touched = 1;
            c = supralist + ci;
            c->touched = 0;
            c->next = p->next;
            p->next = ci;
            c->count = 1;
            Newxz(c->data, p->data[0] + 3, AM_SHORT);
            Copy(p->data + 2, c->data + 3, p->data[0], AM_SHORT);
            c->data[2] = subcontextnumber;
            c->data[0] = p->data[0] + 1;
            lattice[d] = ci;
        }
      }

      /* Here we return all AM_SUPRA with count 0 back to the free
       * list and set touched = 0 for all remaining.
       */

      p = supralist;
      p->touched = 0;
      int i;
      do {
        if (supralist[i = p->next].count == 0) {
          Safefree(supralist[i].data);
          p->next = supralist[i].next;
          supralist[i].next = nextsupra;
          nextsupra = (AM_SHORT) i;
        } else {
          p = supralist + p->next;
          p->touched = 0;
        }
      } while (p->next);
      nptr[sublattice_index] = nextsupra;
    } /*end for(sublattice_index = 0...*/
    subcontext -= NUM_LATTICES;
    *subcontext_class = class;
    --subcontext_class;
    --subcontextnumber;
  } /*end while (context_to_class_entry = hv_iternext(...*/

  HV *context_size = guts->context_size;
  HV *pointers = guts->pointers;

  /*
   * The code is in three parts:
   *
   * 1. We successively take one nonempty supracontext from each of the
   *    four small lattices and take their intersection to find a
   *    supracontext of the big lattice.  If at any point we get the
   *    empty set, we move on.
   *
   * 2. We determine if the supracontext so found is heterogeneous; if
   *    so, we skip it.
   *
   * 3. Otherwise, we count the pointers or occurrences.
   *
   */
  {
    /* find intersections */
    AM_SUPRA * p0;
    for (iter_supras(p0, supra_list[0])) {
      AM_SUPRA *p1;
      for (iter_supras(p1, supra_list[1])) {
        /* Find intersection between p0 and p1 */
        AM_SHORT *k = intersect_supras(
          sublist_top(p0),
          sublist_top(p1),
          ilist2top
        );
        /* If k has not been increased then intersection was empty */
        if (k == ilist2top) {
          continue;
        }
        *k = 0;

        AM_SUPRA *p2;
        for (iter_supras(p2, supra_list[2])) {

          /*Find intersection between previous intersection and p2*/
          k = intersect_supras(
            ilist2top,
            sublist_top(p2),
            ilist3top
          );
          /* If k has not been increased then intersection was empty */
          if (k == ilist3top) {
            continue;
          }
          *k = 0;

          AM_SUPRA *p3;
          for (iter_supras(p3, supra_list[3])) {

            /* Find intersection between previous intersection and p3;
             * check for disqualified supras this time.
             */
            AM_SHORT length = intersect_supras_final(
              ilist3top,
              sublist_top(p3),
              intersectlist,
              subcontext_class
            );

            /* count occurrences */
            if (length) {
              AM_BIG_INT count = {0, 0, 0, 0, 0, 0, 0, 0};

              count[0]  = p0->count;

              count[0] *= p1->count;
              carry(count, 0);

              count[0] *= p2->count;
              count[1] *= p2->count;
              carry(count, 0);
              carry(count, 1);

              count[0] *= p3->count;
              count[1] *= p3->count;
              count[2] *= p3->count;
              carry(count, 0);
              carry(count, 1);
              carry(count, 2);
              if(!linear_flag){
                /* If scoring is pointers (quadratic) instead of linear*/
                AM_LONG pointercount = 0;
                for (int i = 0; i < length; ++i) {
                  pointercount += (AM_LONG) SvUV(*hv_fetch(context_size,
                      (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 0));
                }
                if (pointercount & 0xffff0000) {
                  AM_SHORT pchi = (AM_SHORT) (high_bits(pointercount));
                  AM_SHORT pclo = (AM_SHORT) (low_bits(pointercount));
                  AM_LONG hiprod[6];
                  hiprod[1] = pchi * count[0];
                  hiprod[2] = pchi * count[1];
                  hiprod[3] = pchi * count[2];
                  hiprod[4] = pchi * count[3];
                  count[0] *= pclo;
                  count[1] *= pclo;
                  count[2] *= pclo;
                  count[3] *= pclo;
                  carry(count, 0);
                  carry(count, 1);
                  carry(count, 2);
                  carry(count, 3);

                  count[1] += hiprod[1];
                  count[2] += hiprod[2];
                  count[3] += hiprod[3];
                  count[4] += hiprod[4];
                  carry(count, 1);
                  carry(count, 2);
                  carry(count, 3);
                  carry(count, 4);
                  } else {
                    count[0] *= pointercount;
                    count[1] *= pointercount;
                    count[2] *= pointercount;
                    count[3] *= pointercount;
                    carry(count, 0);
                    carry(count, 1);
                    carry(count, 2);
                    carry(count, 3);
                }
              }
              for (int i = 0; i < length; ++i) {
                SV *final_pointers_sv = *hv_fetch(pointers,
                    (char *) (subcontext + (NUM_LATTICES * intersectlist[i])), 8, 1);
                if (!SvPOK(final_pointers_sv)) {
                  SvUPGRADE(final_pointers_sv, SVt_PVNV);
                  SvGROW(final_pointers_sv, 8 * sizeof(AM_LONG) + 1);
                  Zero(SvPVX(final_pointers_sv), 8, AM_LONG);
                  SvCUR_set(final_pointers_sv, 8 * sizeof(AM_LONG));
                  SvPOK_on(final_pointers_sv);
                }
                AM_LONG *final_pointers = (AM_LONG *) SvPVX(final_pointers_sv);
                for (int j = 0; j < 7; ++j) {
                  *(final_pointers + j) += count[j];
                  carry_pointer(final_pointers + j);
                }
              } /* end for (i = 0;... */
            } /* end if (length) */
          } /* end for (iter_supras(p3... */
        } /* end  for (iter_supras(p2... */
      } /* end  for (iter_supras(p1... */
    } /* end  for (iter_supras(p0... */

    clear_supras(supra_list, 4);

    /*
     * compute analogical set and raw gang effects
     *
     * Technically, we don't compute the analogical set; instead, we
     * compute how many pointers/occurrences there are for each of the
     * data items in a particular subcontext, and associate that number
     * with the subcontext label, not directly with the data item.  We can
     * do this because if two data items are in the same subcontext, they
     * will have the same number of pointers/occurrences.
     *
     * If the user wants the detailed analogical set, it will be created
     * in Result.pm.
     *
     */

    HV *raw_gang = guts->raw_gang;
    SV **classes = guts->classes;
    SV **itemcontextchain = guts->itemcontextchain;
    HV *itemcontextchainhead = guts->itemcontextchainhead;
    SV **sum = guts->sum;
    IV num_classes = guts->num_classes;
    AM_BIG_INT grand_total = {0, 0, 0, 0, 0, 0, 0, 0};
    hv_iterinit(pointers);
    HE * pointers_entry;
    while ((pointers_entry = hv_iternext(pointers))) {
      AM_BIG_INT p;
      Copy(SvPVX(HeVAL(pointers_entry)), p, 8, AM_LONG);

      SV *num_examplars = *hv_fetch(context_size, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 0);
      AM_LONG count = (AM_LONG)SvUVX(num_examplars);
      AM_SHORT counthi = (AM_SHORT)(high_bits(count));
      AM_SHORT countlo = (AM_SHORT)(low_bits(count));

      /* initialize 0 because it won't be overwritten */
      /*
       * TODO: multiply through p[7] into gangcount[7]
       * and warn if there's potential overflow
       */
      AM_BIG_INT gangcount;
      gangcount[0] = 0;
      for (int i = 0; i < 7; ++i) {
        gangcount[i] += countlo * p[i];
        carry_replace(gangcount, i);
      }
      gangcount[7] += countlo * p[7];

      /* TODO: why is element 0 not considered here? */
      if (counthi) {
        for (int i = 0; i < 6; ++i) {
          gangcount[i + 1] += counthi * p[i];
          carry(gangcount, i + 1);
        }
      }
      for (int i = 0; i < 7; ++i) {
        grand_total[i] += gangcount[i];
        carry(grand_total, i);
      }
      grand_total[7] += gangcount[7];

      normalize(aTHX_ HeVAL(pointers_entry));

      SV* gang_pointers = *hv_fetch(raw_gang, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 1);
      SvUPGRADE(gang_pointers, SVt_PVNV);
      sv_setpvn(gang_pointers, (char *) gangcount, 8 * sizeof(AM_LONG));
      normalize(aTHX_ gang_pointers);

      SV* this_class_sv = *hv_fetch(context_to_class, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 0);
      AM_SHORT this_class = (AM_SHORT) SvUVX(this_class_sv);
      if (this_class) {
        SV_CHECK_THINKFIRST(sum[this_class]);
        AM_LONG *s = (AM_LONG *) SvPVX(sum[this_class]);
        for (int i = 0; i < 7; ++i) {
          *(s + i) += gangcount[i];
          carry_pointer(s + i);
        }
      } else {
      SV *exemplar = *hv_fetch(itemcontextchainhead, HeKEY(pointers_entry), NUM_LATTICES * sizeof(AM_SHORT), 0);
        while (SvIOK(exemplar)) {
          IV datanum = SvIVX(exemplar);
          IV ocnum = SvIVX(classes[datanum]);
          SV_CHECK_THINKFIRST(sum[ocnum]);
          AM_LONG *s = (AM_LONG *) SvPVX(sum[ocnum]);
          for (int i = 0; i < 7; ++i) {
            *(s + i) += p[i];
            carry_pointer(s + i);
            exemplar = itemcontextchain[datanum];
          }
        }
      }
    }
    for (int i = 1; i <= num_classes; ++i) {
      normalize(aTHX_ sum[i]);
    }

    SV *grand_total_entry = *hv_fetch(pointers, "grand_total", 11, 1);
    SvUPGRADE(grand_total_entry, SVt_PVNV);
    sv_setpvn(grand_total_entry, (char *) grand_total, 8 * sizeof(AM_LONG));
    normalize(aTHX_ grand_total_entry);

    Safefree(subcontext);
    Safefree(subcontext_class);
    Safefree(intersectlist);
    Safefree(intersectlist2);
    Safefree(intersectlist3);
  }