##-*- Mode: CPerl -*-

##======================================================================
## Header Administrivia
##======================================================================
use PDL::VectorValued::Dev;
use PDL::CCS::Config;
use PDL;
use Config;
use strict;

our $VERSION = '0.12.021';
pp_setversion($VERSION);

#$::PP_VERBOSE=1; ##-- PDL::PP debugging

my %ccsConfig = %PDL::CCS::Config::ccsConfig;

my @INT_TYPES     = map {PDL->can($_) ? PDL->can($_)->() : qw()} qw(ushort long indx); #longlong
my @INT_TYPE_CHRS = map {$_->ppsym} @INT_TYPES;
my $INT_TYPE_CHRS = join('',@INT_TYPE_CHRS);

my @VAL_TYPES     = map {PDL->can($_) ? PDL->can($_)->() : qw()} qw(float double);
my @VAL_TYPE_CHRS = map {$_->ppsym} @VAL_TYPES;
my $VAL_TYPE_CHRS = join('',@VAL_TYPE_CHRS);

my $CDEBUG = 0;
my $PPDEBUG = $CDEBUG;
pp_addhdr(($CDEBUG ? '' : "/*")."#define DIACOLLO_DEBUG 1".($CDEBUG ? '': "*/")."\n");

##------------------------------------------------------
## pm additions
pp_addpm({At=>'Top'},<<'EOPM');

use strict;

=pod

=head1 NAME

DiaColloDB::PDL::Utils - low-level PDL utilities for DiaColloDB

=head1 SYNOPSIS

 use PDL;
 use DiaColloDB::PDL::Utils;

 ##---------------------------------------------------------------------
 ## ... stuff happens

=cut

EOPM
## /pm additions
##------------------------------------------------------

##------------------------------------------------------
## Exports: None
#pp_export_nothing();

##------------------------------------------------------
## Includes / defines
pp_addhdr("\n$ccsConfig{INDX_TYPEDEF}\n");
pp_addhdr(<<'EOH');

#include <stdio.h>  /*-- for debugging and IO --*/
#include <string.h> /*-- for memcpy() --*/
#include <stdlib.h> /*-- for qsort(), realloc() --*/

#ifdef DIACOLLO_DEBUG
# define DCDEBUG(x) x
#else
# define DCDEBUG(x)
#endif

EOH


##======================================================================
## C Utilities
##======================================================================
# (none)

##======================================================================
## local subs: utils

## undef = vmsg(@msg)
##  + prints deubbing message if $PPDEBUG is set
sub vmsg {
  return if (!$PPDEBUG);
  print STDERR "$0 \[PPDEBUG]: ", @_, "\n";
}

## %typeinfo = typeinfo($pdl_type_or_name)
sub typeinfo {
  my $type = shift;
  my $thash = (ref($type)
	       ? $PDL::Types::typehash{ $type->symbol }
	       : (grep {$_->{ioname} eq $type} values %PDL::Types::typehash)[0]);
  die("$0: couldn't find type hash for '$type'") if (!$thash);
  #my $name = $hash->{ioname};
  #my $ctype = $vhash->{ctype};
  #my $stype = $vhash->{ppforcetype};
  return $thash;
}

## $str = define_local(%defs)
sub define_local {
  my %defs = @_;
  return "\n".join('', map {defined($defs{$_}) ? "#define $_ $defs{$_}\n" : "#undef $_\n"} sort keys %defs)."\n";
}
## $str = undef_local(%defs)
sub undef_local {
  my %defs = @_;
  return "\n".join('', map {s/\(.*$//; "#undef $_\n"} sort keys %defs)."\n";
}


##======================================================================
## PDL::PP Wrappers
##======================================================================

##--------------------------------------------------------------
## tupleIds : binary search with on-the-fly indexing: TODO
if (0) {
pp_def('diacollo_tupleids',
       Pars => join("\n    ",
		    '',
		    "items(A,N);",    ##-- item-tuples (A:#/attributes, N:#/items)
		    "isort(N,A);",    ##-- sort-indices for items() along dimension N for attribute A
		    "apos();",        ##-- selected attribute (A) index
		    "avals(NV);",     ##-- target values for attribute (A)
		    "[o] ivals(NV);", ##-- all item-indices $ii:N with items($apos,$ii) \in $avals(), -1 for none
		    "[o] nivals();",  ##-- number of returned ivals() -- HARD: we don't know this in advance!
		   ),
      );
}

##--------------------------------------------------------------
## tym creation

##------------------------------------------------------
## tym construction: record sorting
sub tym_qsort_def {
  my $itype = shift;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       #DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	      );

  my $code = define_local(%defs).qq{

static inline
int diacollo_compare_${itype}(const void *a, const void *b)
{
  const DIACOLLO_ITYPE *aval = (const DIACOLLO_ITYPE *)a;
  const DIACOLLO_ITYPE *bval = (const DIACOLLO_ITYPE *)b;
  return (*aval > *bval) - (*aval < *bval);
}

static inline
void diacollo_qsort2_${itype}(DIACOLLO_ITYPE *vals, size_t count) {
  qsort(vals, count, 2*sizeof(DIACOLLO_ITYPE), diacollo_compare_${itype});
}

}.undef_local(%defs);

  pp_addhdr($code);
}
foreach my $itype (@INT_TYPES) {
  tym_qsort_def($itype);
}

##------------------------------------------------------
## tym_create_${ITYPE}
##  + creates tym matrix file(s)
sub tym_create_def {
  my $itype = shift;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	       DIACOLLO_QSORT2 => "diacollo_qsort2_${itype}",
	       DIACOLLO_YBUFLEN => "1024",
	      );

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## tym_create : code
  my $code = define_local(%defs).q{

  DIACOLLO_ITYPE ti,di,ci,cy, nzi0,nzi1, nout=0, ty_prev,ty,tnzi;
  $GENERIC() tyf;
  size_t tballoc = DIACOLLO_YBUFLEN, tbn,tbi;
  DIACOLLO_ITYPE *tbuf = (DIACOLLO_ITYPE*)malloc(2*tballoc*sizeof(DIACOLLO_ITYPE));
  FILE *ixfh, *nzfh;

  DCDEBUG(fprintf(stderr,"tym_create_%s()~%s: Nnz=%d, ND=%d, NC=%d, ixfile=\"%s\", nzfile=\"%s\"\n", DIACOLLO_INAME, "$GENERIC()", $SIZE(Nnz), $SIZE(ND), $SIZE(NC), $COMP(ixfile), $COMP(nzfile)); fflush(stderr);)

  //-- open output file(s)
  if (!(ixfh=fopen($COMP(ixfile), "wb")))
    croak("tym_create_%s(): open failed for ix-file '%s'", DIACOLLO_INAME, $COMP(ixfile));
  if (!(nzfh=fopen($COMP(nzfile), "wb")))
    croak("tym_create_%s(): open failed for nz-file '%s'", DIACOLLO_INAME, $COMP(nzfile));

  //-- ye olde loope
  for (nzi0=0; nzi0 < $SIZE(Nnz); nzi0=nzi1) {
    ti = $tdmix(Ndims=>0,Nnz=>nzi0);

    tbn = 0;
    for (nzi1=nzi0; nzi1 < $SIZE(Nnz) && $tdmix(Ndims=>0,Nnz=>nzi1)==ti; ++nzi1) {
      di = $tdmix(Ndims=>1,Nnz=>nzi1);
      ci = $d2c(ND=>di);
      cy = $c2date(NC=>ci);

      //-- maybe re-alloc term-local buffer
      if (tbn==tballoc) {
         DCDEBUG(fprintf(stderr, "tym_create_%s [ti=%d,nzi1=%d,tbn=%d]: realloc %zd -> %zd\n", DIACOLLO_INAME, ti, nzi1, tbn, tballoc, 2*tballoc); fflush(stderr);)
         tballoc *= 2;
         tbuf     = (DIACOLLO_ITYPE*)realloc(tbuf, 2*tballoc*sizeof(DIACOLLO_ITYPE));
      }

      //-- append to term-local buffer
      tbuf[2*tbn]   = cy;
      tbuf[2*tbn+1] = nzi1;
      ++tbn;
    }

    //-- sort term-buffer & sum over each year
    DIACOLLO_QSORT2(tbuf, tbn);
    ty_prev = (DIACOLLO_ITYPE)-1;
    tyf     = 0;
    for (tbi=0; tbi < tbn; ++tbi) {
      ty   = tbuf[2*tbi];
      tnzi = tbuf[2*tbi+1];
      if (ty != ty_prev) {
         if (tyf != 0) {
           //-- dump current record
           fwrite(&ti,      1, sizeof(DIACOLLO_ITYPE), ixfh);
	   fwrite(&ty_prev, 1, sizeof(DIACOLLO_ITYPE), ixfh);
	   fwrite(&tyf,     1, sizeof($GENERIC()),     nzfh);
	   ++nout;
         }
         ty_prev = ty;
         tyf     = 0;
      }
      tyf += $tdmnz(Nnz1=>tnzi);
    }
    if (tyf != 0) {
      //-- dump final record for term
      fwrite(&ti,      1, sizeof(DIACOLLO_ITYPE), ixfh);
      fwrite(&ty_prev, 1, sizeof(DIACOLLO_ITYPE), ixfh);
      fwrite(&tyf,     1, sizeof($GENERIC()),     nzfh);
      ++nout;
    }
  }

  //-- dump final "missing" value to nzfh
  tyf = 0;
  fwrite(&tyf,     1, sizeof($GENERIC()),     nzfh);

  //-- save output nnz
  $nnzOut() = nout;

  //-- close output file(s)
  if (fclose(ixfh)!=0)
    croak("tym_create_%s(): close failed for ix-file '%s'", DIACOLLO_INAME, $COMP(ixfile));
  if (fclose(nzfh)!=0)
    croak("tym_create_%s(): open failed for nz-file '%s'", DIACOLLO_INAME, $COMP(nzfile));

  //-- cleanup
  if (tbuf) free(tbuf);

}.undef_local(%defs);

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## tym_create : ppdef
  my $INDX = $iinfo->{ppforcetype};
  vmsg("defining diacollo_tym_create_${itype}");
  vvpp_def("diacollo_tym_create_${itype}",
	   Pars => join("\n    ",
			'',
			"$INDX tdmix(Ndims,Nnz);",	##-- tdm: logical (T,D) with ptr(1) : $vs->{tdm}->_whichND
			"      tdmnz(Nnz1);",           ##-- tdm: nonzeroes+missing : $vs->{tdm}->_vals
			##
			"$INDX  d2c(ND);",             	##-- d2c(): doc->cat map : $vs->{d2c}
			"ushort c2date(NC);",           ##-- c2date(): cat->date map : $vs->{c2date}
			##
			"$INDX \[o] nnzOut();",		##-- number of output nonzeroes
			''
		       ),
	   OtherPars => join("\n    ",
			     'char *ixfile;',		##-- output filename
			     'char *nzfile;',		##-- output filename
			     '',
			    ),
	   GenericTypes => \@VAL_TYPE_CHRS,
	   Code => $code,
	   Doc => q{
Guts for term-year matrix construction from term-document matrix,
used by DiaColloDB::Relation::TDF::create().
},
	);
}
if (1) {
  foreach my $itype (@INT_TYPES) {
    tym_create_def($itype);
  }
}



##--------------------------------------------------------------
## cof_${TC}_${ITYPE} : coo+ptr(1)-matrix vs. ptr-vector, output=HASH-ref
##  + ${TC} is one of 't':terms, 'c':cats
##  + ${ITYPE} is a PDL type for integer indices
##  + supports groupby independent TC-attributes only (no term+doc dependence)
##  + from lda/ftdm-kbest.perl inline test _cof3d_pzh()

## undef = cof_x_def($tcinfix, $itype)
##  + $tcinfix is one of 't':terms, 'c':cats
##  + $itype is a PDL integer-type or string name
sub cof_x_def {
  my ($tcinfix,$itype) = @_;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       DIACOLLO_TCINFIX => "\"$tcinfix\"",
	       DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	       (map {(("DIACOLLO_COF_".uc($_)) => ($tcinfix eq $_ ? 1 : undef))} qw(t c)),
	      );

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## cof_X : code
  my $code = define_local(%defs).q{

  DIACOLLO_ITYPE qd,qd1,qc,qval, adnzlo,adnzhi,adnzi,anzi, at,ga,gh;
  PDL_Ushort sliceby,qdate;
  I32 keylen1 = sizeof(DIACOLLO_ITYPE);
  I32 keylen  = ($SIZE(NG)+1)*sizeof(DIACOLLO_ITYPE);
  DIACOLLO_ITYPE *keybuf = (DIACOLLO_ITYPE*)malloc(keylen);
  HV *f1hv  = INT2PTR(HV*, $COMP(f1Hash));
  HV *f12hv = INT2PTR(HV*, $COMP(f12Hash));
  HV *exthv = INT2PTR(HV*, $COMP(extendHash));
  SV **svpp1, **svpp12, **svppd;
  UV f1val0,f1val, f12val0,f12val;
#if defined(DIACOLLO_COF_T)
  //-- temporary hash for doc-local group-counts
  HV   *dochv = newHV();
  char *gkey;
  I32   gkeylen;
  SV   *gvalsv;
#endif /* DIACOLLO_COF_T */

  DCDEBUG(fprintf(stderr,"cof_%s_%s()~%s: NnzA=%d, NnzQ=%d, NA=%d, ND=%d, NC=%d, NG=%d, NHaving=%d, slice=%d, mindate=%d, maxdate=%d, keylen=%d\n", DIACOLLO_TCINFIX, DIACOLLO_INAME, "$GENERIC()", $SIZE(NnzA), $SIZE(NnzQ), $SIZE(NA), $SIZE(ND), $SIZE(NC), $SIZE(NG), $SIZE(NHaving), $dateslice(), $mindate(), $maxdate(), keylen); fflush(stderr);)

  hv_clear(f12hv);

  threadloop %{
    sliceby           = $dateslice();
    keybuf[$SIZE(NG)] = sliceby;

    /*-- guts: compute co-frequencies cofreq(w,v|d) = min{f(w,d),f(v,d)} in f12hv --*/
    DCDEBUG(fprintf(stderr,"cof_x(): loop\n"); fflush(stderr);)
    loop (NnzQ) %{
      qval = $qvals();
      qd   = $qdocs();
      qd1  = qd+1;

      //-- setup output date component, checking target range
      qc    = $d2c(ND=>qd);
      qdate = $c2date(NC=>qc);
      if (qdate < $mindate() || qdate > $maxdate()) continue;
      if (sliceby != 0) {
        keybuf[$SIZE(NG)] = sliceby * (qdate/sliceby);
      }

      //-- track f1 frequency (by slice)
      svpp1 = hv_fetch(f1hv, (char*)(keybuf+$SIZE(NG)), keylen1, TRUE);
      if (SvOK(*svpp1)) {
        f1val0 = SvUV(*svpp1);
	sv_setuv(*svpp1, f1val0+(UV)qval);
      } else {
        sv_setuv(*svpp1, (UV)qval);
      }

#if defined(DIACOLLO_COF_C)
      //DCDEBUG(fprintf(stderr,"DIACOLLO_COF_C: qd=%d ; qc=%d\n", qd, qc));

      //-- COF_C: check HAVING restrictions (groupby-cats mode)
      if ($SIZE(NHaving) > 0) {
        $LB('qc', '$ghaving(NHaving=>$_)', 0,'$SIZE(NHaving)', 'gh');
        if (gh >= $SIZE(NHaving) || $ghaving(NHaving=>gh) != qc) continue;
      }

      //-- COF_C: setup output output hash key (groupby-cats mode)
      loop (NG) %{
        ga = $groupby();
	keybuf[NG] = $xattrs(NA=>ga,NX=>qc);
      %}

      //-- COF_C: check EXTEND restrictions (if specified)
      if (exthv && !hv_exists(exthv, (char*)keybuf, keylen)) {
        continue;
      }

      //-- COF_C: propagate query-supplied f12 count (qval) to f12Hash (groupby-cats mode)
      svpp12 = hv_fetch(f12hv, (char*)keybuf, keylen, TRUE);
      if (SvOK(*svpp12)) {
        f12val = SvUV(*svpp12);
      } else {
        f12val = 0;
      }
      sv_setuv(*svpp12, f12val+(UV)qval);

#elif defined(DIACOLLO_COF_T)

      //-- COF_T: collect co-occurence frequencies by document
      adnzlo = $aptr1(NDplus1=>qd);
      adnzhi = $aptr1(NDplus1=>qd1);
      for (adnzi=adnzlo; adnzi < adnzhi; ++adnzi) {
        anzi   = $apix1(NnzA=>adnzi);
 	at     = $awhich(Ndims=>0,NnzA=>anzi);

        //-- COF_T: check HAVING restrictions (groupby-terms mode)
        if ($SIZE(NHaving) > 0) {
	  $LB('at', '$ghaving(NHaving=>$_)', 0,'$SIZE(NHaving)', 'gh');
          if (gh >= $SIZE(NHaving) || $ghaving(NHaving=>gh) != at) continue;
        }

	//-- COF_T: setup output output hash key (groupby-terms mode)
	loop (NG) %{
	  ga = $groupby();
	  keybuf[NG] = $xattrs(NA=>ga,NX=>at);
	%}

        //-- COF_T: check EXTEND restrictions (if specified)
        if (exthv && !hv_exists(exthv, (char*)keybuf, keylen)) {
          continue;
        }

        //-- COF_T: update doc-local f12 value in temporary dochv (avoid over-counting joint frequencies for nontrivial groupby aggregation)
        f12val = (UV)($avals(NnzA1=>anzi));
        svppd  = hv_fetch(dochv, (char*)keybuf, keylen, TRUE);
        if (SvOK(*svppd)) {
	  f12val += SvUV(*svppd);
        }
        sv_setuv(*svppd, f12val);

        //DCDEBUG(fprintf(stderr, "cof_x(): iter[qd=%d,anzi=%d,at=%d]: qval=%d, aval=%d, dochv.f12=%u\n", qd, anzi, at, qval, $avals(NnzA1=>anzi), SvUV(*svppd)); fflush(stderr));
      }

      //-- COF_T: propagate doc-local f12 values to f12Hash
      hv_iterinit(dochv);
      while ( (gvalsv = hv_iternextsv(dochv, &gkey, &gkeylen)) ) {
        f12val = SvUV(gvalsv);
        if (f12val > qval) f12val = qval;

        svpp12 = hv_fetch(f12hv, gkey, gkeylen, TRUE);
        if (SvOK(*svpp12))
          f12val += SvUV(*svpp12);
        sv_setuv(*svpp12, f12val);
      }
      hv_clear(dochv);

#endif /* DIACOLLO_COF_C, DIACOLLO_COF_T */
    %}
  %}

  //-- cleanup
#ifdef DIACOLLO_COF_T
  SvREFCNT_dec(dochv);
#endif /* DIACOLLO_COF_T */

  if (keybuf) free(keybuf);

}.undef_local(%defs);

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## cof_x : ppdef
  my $INDX = $iinfo->{ppforcetype};
  vmsg("defining diacollo_cof_${tcinfix}_${itype}");
  vvpp_def("diacollo_cof_${tcinfix}_${itype}",
	   Pars => join("\n    ",
			'',
			"$INDX awhich(Ndims,NnzA);",	##-- a() ~ tdm: logical (T,D) with ptr(1) : $vs->{tdm}->_whichND
			"$INDX aptr1(NDplus1);",	##-- a(): ptr(1) : $vs->{ptr1}
			"$INDX apix1(NnzA);",		##-- a(): sort-indices over NNzA for ptr(1) : $vs->{pix1}
			"      avals(NnzA1);",		##-- a(): nonzeroes : $vs->{tdm}->_vals
			##
			"$INDX xattrs(NA,NX);",		##-- attrs() ~ (term|cat) attributes
			"$INDX d2c(ND);",               ##-- d2c(): doc->cat map : $vs->{d2c}
			##
			"ushort c2date(NC);",           ##-- c2date(): cat->date map : $vs->{c2date}
			"ushort dateslice();",          ##-- slice value
			"ushort mindate();",            ##-- minimum date (inlusive)
			"ushort maxdate();",		##-- maximum date (inclusive)
			##
			"$INDX qdocs(NnzQ);",		##-- query vector: nz doc-indices (item1)
			"      qvals(NnzQ);",		##-- query vector: nz doc-frequencies (item1)
			##
			"$INDX groupby(NG);",		##-- attribute indices over NA for groupby()
			"$INDX ghaving(NHaving);",	##-- admissible item2 (term|cat)-ids, or empty for all
			''
		       ),
	   OtherPars => join("\n    ",
                             'IV f1Hash;',		##-- output hash, ( pack($pack_ix, $slice) => $f1, ... )
                             'IV f12Hash;',		##-- output hash, ( pack($pack_ix, $tvals($groupby(),($ti2)), $slice) => $f12, ... )
                             'IV extendHash;'		##-- extend selections: ( $slice => { $groupkey => undef, ... } )
			    ),
	   GenericTypes => \@VAL_TYPE_CHRS,
	   Code => $code,
	   Doc => q{
Guts for term-document matrix co-frequency acquisition.
Collects co-frequency counts in the HASH-ref $f12Hash and indpendent item1 counts by slice in $f1Hash,
suitable for use with groupby-aggregation over only TERM (cof_t_ITYPE) or CATEGORY (cof_c_ITYPE) attributes.
ITYPE is the index-type used by the underlying tdm model, which see for details.
},
	);
}
foreach my $tcinfix (qw(t c)) {
  foreach my $itype (@INT_TYPES) {
    cof_x_def($tcinfix,$itype);
  }
}

##--------------------------------------------------------------
## cof_tc_${ITYPE} : coo+ptr(1)-matrix vs. ptr-vector, output=HASH-ref
##  + ${ITYPE} is a piddle type for integer indices
##  + supports both term- and doc-attributes in groupby groups

## undef = cof_tc_def($itype)
sub cof_tc_def {
  my $itype = shift;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	      );

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## cof_tc : code
  my $code = define_local(%defs).q{

  DIACOLLO_ITYPE qd,qd1,qc,qval, adnzlo,adnzhi,adnzi,anzi, at,ga,gh;
  PDL_Ushort sliceby,qdate;
  I32 keylen1 = sizeof(DIACOLLO_ITYPE);
  I32 keylen  = ($SIZE(NG)+1)*sizeof(DIACOLLO_ITYPE);
  DIACOLLO_ITYPE *keybuf = (DIACOLLO_ITYPE*)malloc(keylen);
  HV *f1hv  = INT2PTR(HV*, $COMP(f1Hash));
  HV *f12hv = INT2PTR(HV*, $COMP(f12Hash));
  HV *exthv = INT2PTR(HV*, $COMP(extendHash));
  SV **svpp;
  UV fval0,fval;

  DCDEBUG(fprintf(stderr,"cof_tc_%s()~%s: NnzA=%d, NnzQ=%d, NA=%d, NM=%d, ND=%d, NC=%d, NG=%d, NHavingT=%d, NHavingC=%d, slice=%d, mindate=%d, maxdate=%d, keylen=%d\n", DIACOLLO_INAME, "$GENERIC()", $SIZE(NnzA), $SIZE(NnzQ), $SIZE(NA), $SIZE(NM), $SIZE(ND), $SIZE(NC), $SIZE(NG), $SIZE(NHavingT), $SIZE(NHavingC), $dateslice(), $mindate(), $maxdate(), keylen); fflush(stderr);)

  hv_clear(f12hv);

  threadloop %{
    sliceby           = $dateslice();
    keybuf[$SIZE(NG)] = sliceby;

    /*-- guts: compute co-frequencies cofreq(w,v|d) = min{f(w,d),f(v,d)} in f12hv --*/
    //DCDEBUG(fprintf(stderr,"cof_tc(): loop\n"); fflush(stderr);)
    loop (NnzQ) %{
      qval = $qvals();
      qd   = $qdocs();
      qd1  = qd+1;

      //-- setup output date component, checking target range
      qc    = $d2c(ND=>qd);
      qdate = $c2date(NC=>qc);
      if (qdate < $mindate() || qdate > $maxdate()) continue;
      if (sliceby != 0) {
        keybuf[$SIZE(NG)] = sliceby * (qdate/sliceby);
      }

      //-- track f1 frequency (by slice)
      svpp = hv_fetch(f1hv, (char*)(keybuf+$SIZE(NG)), keylen1, TRUE);
      if (SvOK(*svpp)) {
        fval0 = SvUV(*svpp);
	sv_setuv(*svpp, fval0+(UV)qval);
      } else {
        sv_setuv(*svpp, (UV)qval);
      }

      //-- check HAVING restrictions (groupby-cats mode)
      if ($SIZE(NHavingC) > 0) {
        $LB('qc', '$ghavingc(NHavingC=>$_)', 0,'$SIZE(NHavingC)', 'gh');
        if (gh >= $SIZE(NHavingC) || $ghavingc(NHavingC=>gh) != qc) continue;
      }

      //-- collect co-occurence frequencies by document
      adnzlo = $aptr1(NDplus1=>qd);
      adnzhi = $aptr1(NDplus1=>qd1);
      for (adnzi=adnzlo; adnzi < adnzhi; ++adnzi) {
        anzi   = $apix1(NnzA=>adnzi);
 	at     = $awhich(Ndims=>0,NnzA=>anzi);

        //-- check HAVING restrictions (groupby-terms mode)
        if ($SIZE(NHavingT) > 0) {
	  $LB('at', '$ghavingt(NHavingT=>$_)', 0,'$SIZE(NHavingT)', 'gh');
          if (gh >= $SIZE(NHavingT) || $ghavingt(NHavingT=>gh) != at) continue;
        }

	//-- setup output output hash key (groupby-(doc+terms) mode)
	loop (NG) %{
	  ga = $groupby();
	  keybuf[NG] = $groupas()==0 ? $tattrs(NA=>ga,NT=>at) : $mattrs(NM=>ga,NC=>qc);
	%}

        //-- check EXTEND restrictions (if specified)
        if (exthv && !hv_exists(exthv, (char*)keybuf, keylen)) {
          continue;
        }

        //-- set or update f12 hash value
        fval = (UV)(qval < $avals(NnzA1=>anzi) ? qval : $avals(NnzA1=>anzi));
	svpp = hv_fetch(f12hv, (char*)keybuf, keylen, TRUE);
        if (SvOK(*svpp)) {
	  fval0 = SvUV(*svpp);
	  sv_setuv(*svpp, fval0+fval);
        } else {
          sv_setuv(*svpp, fval);
        }
      }
    %}
  %}
  if (keybuf) free(keybuf);
}.undef_local(%defs);

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## cof_tc : ppdef
  my $INDX = $iinfo->{ppforcetype};
  vmsg("defining diacollo_cof_tc_${itype}");
  vvpp_def("diacollo_cof_tc_${itype}",
	   Pars => join("\n    ",
			'',
			"$INDX awhich(Ndims,NnzA);",	##-- a() ~ tdm: logical (T,D) with ptr(1) : $vs->{tdm}->_whichND
			"$INDX aptr1(NDplus1);",	##-- a(): ptr(1) : $vs->{ptr1}
			"$INDX apix1(NnzA);",		##-- a(): sort-indices over NNzA for ptr(1) : $vs->{pix1}
			"      avals(NnzA1);",		##-- a(): nonzeroes : $vs->{tdm}->_vals
			##
			"$INDX tattrs(NA,NT);",		##-- tattrs() ~ term attributes
			"$INDX mattrs(NM,NC);",		##-- mattrs() ~ meta (cat-)attributes
			"$INDX d2c(ND);",		##-- d2c(): doc->cat map : $vs->{d2c}
			##
			"ushort c2date(NC);",           ##-- c2date(): cat->date map : $vs->{c2date}
			"ushort dateslice();",          ##-- slice value
			"ushort mindate();",            ##-- minimum date (inlusive)
			"ushort maxdate();",		##-- maximum date (inclusive)
			##
			"$INDX qdocs(NnzQ);",		##-- query vector: nz doc-indices (item1)
			"      qvals(NnzQ);",		##-- query vector: nz doc-frequencies (item1)
			##
			"byte  groupas(NG);",           ##-- group-as 0:term, 1:meta
			"$INDX groupby(NG);",		##-- attribute indices over NA or NM for groupby()
			##
			"$INDX ghavingt(NHavingT);",	##-- admissible item2 term-ids, or empty for all
			"$INDX ghavingc(NHavingC);",	##-- admissible item2 cat-ids, or empty for all
		      ''
		       ),
	   OtherPars => join("\n    ",
			     'IV f1Hash;',		##-- output hash, ( pack($pack_ix, $slice) => $f1, ... )
			     'IV f12Hash;',		##-- output hash, ( pack($pack_ix, $tvals($groupby(),($ti2)), $slice) => $f12, ... )
                             'IV extendHash;'		##-- extend selections: ( $slice => { $groupkey => undef, ... } )
			    ),
	   GenericTypes => \@VAL_TYPE_CHRS,
	   Code => $code,
	   Doc => q{
Guts for term-document matrix co-frequency acquisition.
Collects co-frequency counts in the HASH-ref $f12Hash and indpendent item1 counts by slice in $f1Hash,
suitable for use with groupby-aggregation over both TERM and CATEGORY attributes.
},
	);
}
foreach my $itype (@INT_TYPES) {
  cof_tc_def($itype);
}


##--------------------------------------------------------------
## tym_gf_t_${ITYPE} : item2 freqs, tym=coo, output=HASH-ref (groupby terms only)
##  + ${ITYPE} is a piddle type for integer indices
##  + from lda/ftdm-kbest.perl inline test _f2_scan()

sub tym_gf_t_def {
  my $itype = shift;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	      );

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## tym_gf_t : code
  my $code = define_local(%defs).q{

  DIACOLLO_ITYPE t2,t2nxt, anzi_min=0,anzi_max=$SIZE(NnzA),anzi, ga;
  PDL_Ushort sliceby,adate;
  I32 keylen = ($SIZE(NG)+1)*sizeof(DIACOLLO_ITYPE);
  DIACOLLO_ITYPE *keybuf = (DIACOLLO_ITYPE*)malloc(keylen);
  HV *f12hv = $COMP(f12Hash) ? INT2PTR(HV*, $COMP(f12Hash)) : NULL;
  HV *f2hv  = INT2PTR(HV*, $COMP(f2Hash));
  SV **svpp;
  UV f2val0, f2val;

  DCDEBUG(fprintf(stderr,"tym_gf_t_%s()~%s: NnzA=%d, NT2=%d, NA=%d, NG=%d, slice=%d, mindate=%d, maxdate=%d, keylen=%d, f12hv=%p, f2hv=%p\n", DIACOLLO_INAME, "$GENERIC()", $SIZE(NnzA), $SIZE(NT2), $SIZE(NA), $SIZE(NG), $dateslice(), $mindate(), $maxdate(), keylen, f12hv, f2hv); fflush(stderr);)

  hv_clear(f2hv);

  sliceby           = $dateslice();
  keybuf[$SIZE(NG)] = sliceby;

  /*-- guts: compute independent item2 frequencies in f2hv --*/
  //DCDEBUG(fprintf(stderr,"tym_gf_%s(): loop(NT2)\n", DIACOLLO_VTYPE); fflush(stderr);)
  loop (NT2) %{
    t2    = $terms2();
    t2nxt = t2+1;

    //-- setup output hash key
    loop (NG) %{
      ga = $groupby();
      keybuf[NG] = $tattrs(NA=>ga,NT=>t2);
    %}

    $LB('t2', '$awhich(Ndims=>0,NnzA=>$_)', 'anzi_min','anzi_max', 'anzi');
    for ( ; anzi < $SIZE(NnzA) && $awhich(Ndims=>0,NnzA=>anzi) == t2; ++anzi) {

      //-- setup output date component, checking date-range
      adate = $awhich(Ndims=>1,NnzA=>anzi);
      if (adate < $mindate() || adate > $maxdate()) continue;
      if (sliceby != 0) {
        keybuf[$SIZE(NG)] = sliceby * (adate/sliceby);
      }

      //-- ignore possible keys not in f12hv
      if (f12hv && !hv_exists(f12hv, (char*)keybuf, keylen))
        continue;

      //-- set or update output hash value
      f2val = $avals(NnzA1=>anzi);
      svpp  = hv_fetch(f2hv, (char*)keybuf, keylen, TRUE);
      if (SvOK(*svpp)) {
        f2val0 = SvUV(*svpp);
	sv_setuv(*svpp, f2val0+f2val);
      }
      else {
        sv_setuv(*svpp, f2val);
      }
    }
    anzi_min = anzi;
  %}
  if (keybuf) free(keybuf);

}.undef_local(%defs);

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## tym_gf_t : ppdef
  my $INDX = $iinfo->{ppforcetype};
  vmsg("defining diacollo_tym_t_${itype}");
  vvpp_def("diacollo_tym_gf_t_${itype}",
	   Pars => join("\n    ",
			'',
			"$INDX awhich(Ndims,NnzA);",	##-- a() ~ tym: logical (T,Y) with ptr(0) : $vs->{tym}->_whichND
			"      avals(NnzA1);",		##-- a(): nonzeroes : $vs->{tym}->_vals
			##
			"$INDX tattrs(NA,NT);",		##-- tattrs() ~ term attributes
			##
			"ushort dateslice();",		##-- slice value
			"ushort mindate();",		##-- minimum date (inlusive)
			"ushort maxdate();",		##-- maximum date (inclusive)
			##
			"$INDX terms2(NT2);",		##-- item2 keys: term IDs (sorted)
			"$INDX groupby(NG);",		##-- attribute indices over NA for groupby()
			''
		       ),
	   OtherPars => 'IV f12Hash; IV f2Hash;',	##-- in,out hashes, ( pack($pack_ix, $tvals($groupby(),($ti2)), $slice) => $f12, ... )
	   GenericTypes => \@VAL_TYPE_CHRS,
	   Code => $code,
	   Doc => q{
Guts for independent group-frequency acquisition from a sparse term-year matrix.
Collects independent group-frequency counts in the HASH-ref f2Hash for any group corresponding to a term-id in $terms2()
whose packed group-key exists in $f12Hash.
Suitable for use with groupby-aggregation over TERM attributes only.
},
	);
}
foreach my $itype (@INT_TYPES) {
  tym_gf_t_def($itype);
}

##--------------------------------------------------------------
## gf_c_${ITYPE} : item2 freqs, cf=dense, output=HASH-ref (groupby cats only)
##  + ${ITYPE} is a piddle type for integer indices

sub gf_c_def {
  my $itype = shift;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	      );

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## gf_c : code
  my $code = define_local(%defs).q{

  DIACOLLO_ITYPE c2, ga;
  PDL_Ushort sliceby,cdate;
  I32 keylen = ($SIZE(NG)+1)*sizeof(DIACOLLO_ITYPE);
  DIACOLLO_ITYPE *keybuf = (DIACOLLO_ITYPE*)malloc(keylen);
  HV *f12hv = $COMP(f12Hash) ? INT2PTR(HV*, $COMP(f12Hash)) : NULL;
  HV *f2hv  = INT2PTR(HV*, $COMP(f2Hash));
  SV **svpp;
  UV f2val0, f2val;

  DCDEBUG(fprintf(stderr,"gf_c_%s()~%s: NC=%d, NC2=%d, NA=%d, NG=%d, slice=%d, keylen=%d, f12hv=%p, f2hv=%p\n", DIACOLLO_INAME, "$GENERIC()", $SIZE(NC), $SIZE(NC2), $SIZE(NA), $SIZE(NG), $dateslice(), keylen, f12hv, f2hv); fflush(stderr);)

  hv_clear(f2hv);

  sliceby           = $dateslice();
  keybuf[$SIZE(NG)] = sliceby;

  /*-- guts: compute independent item2 frequencies in f2hv --*/
  //DCDEBUG(fprintf(stderr,"gf_c_%s(): loop(NC2)\n", DIACOLLO_VTYPE); fflush(stderr);)
  loop (NC2) %{
    c2 = $cats2();

    //-- setup output hash key
    loop (NG) %{
      ga = $groupby();
      keybuf[NG] = $cattrs(NA=>ga,NC=>c2);
    %}

    //-- setup output date component
    cdate = $c2date(NC=>c2);
    if (sliceby != 0) {
      keybuf[$SIZE(NG)] = sliceby * (cdate/sliceby);
    }

    //-- ignore possible keys not in f12hv
    if (f12hv && !hv_exists(f12hv, (char*)keybuf, keylen))
      continue;

    //-- set or update output hash value
    f2val = $cf(NC=>c2);
    svpp  = hv_fetch(f2hv, (char*)keybuf, keylen, TRUE);
    if (SvOK(*svpp)) {
      f2val0 = SvUV(*svpp);
      sv_setuv(*svpp, f2val0+f2val);
    } else {
      sv_setuv(*svpp, f2val);
    }
  %}
  if (keybuf) free(keybuf);
}.undef_local(%defs);

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## gf_c : ppdef
  my $INDX = $iinfo->{ppforcetype};
  vmsg("defining diacollo_gf_c_${itype}");
  vvpp_def("diacollo_gf_c_${itype}",
	   Pars => join("\n    ",
			'',
			"cf(NC);",			##-- cf() ~ total cat frequencies
			##
			"$INDX cattrs(NA,NC);",		##-- tattrs() ~ cat attributes
			"ushort c2date(NC);",           ##-- c2date(): cat->date map : $vs->{c2date}
			"ushort dateslice();",          ##-- slice value
			##
			"$INDX cats2(NC2);",		##-- item2 keys: cats-IDs (sorted)
			"$INDX groupby(NG);",		##-- meta-attribute indices over NA for groupby()
			''
		       ),
	   OtherPars => 'IV f12Hash; IV f2Hash;',	##-- in,out hashes, ( pack($pack_ix, $tvals($groupby(),($ti2)), $slice) => $f12, ... )
	   GenericTypes => \@VAL_TYPE_CHRS,
	   Code => $code,
	   Doc => q{
Guts for independent group-frequency acquisition from a dense category-frequency vector $cf().
Collects independent group-frequency counts in the HASH-ref $f2Hash for any group corresponding to a cat-id $cats2()
whose packed group-key exists in $f12Hash.
Suitable for use with groupby-aggregation over CATEGORY attributes only.
},
	);
}
foreach my $itype (@INT_TYPES) {
  gf_c_def($itype);
}

##--------------------------------------------------------------
## gf_tc_${ITYPE} : item2 freqs, tdm=coo, output=HASH-ref (groupby terms+cats), slow O(Nnz) linear tdm-scan
##  + ${ITYPE} is a piddle type for integer indices

sub gf_tc_def {
  my $itype = shift;
  my $iinfo = typeinfo($itype);
  my %defs  = (
	       DIACOLLO_INAME => "\"$itype\"",
	       DIACOLLO_ITYPE => $iinfo->{ctype},
	      );

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## gf_tc : code
  my $code = define_local(%defs).q{

  DIACOLLO_ITYPE at,ad,ac, ga;
  PDL_Ushort sliceby,adate;
  I32 keylen = ($SIZE(NG)+1)*sizeof(DIACOLLO_ITYPE);
  DIACOLLO_ITYPE *keybuf = (DIACOLLO_ITYPE*)malloc(keylen);
  HV *f12hv = $COMP(f12Hash) ? INT2PTR(HV*, $COMP(f12Hash)) : NULL;
  HV *f2hv  = INT2PTR(HV*, $COMP(f2Hash));
  SV **svpp;
  UV f2val0, f2val;

  DCDEBUG(fprintf(stderr,"gf_tc_%s()~%s: NnzA=%d, NA=%d, NM=%d, ND=%d, NC=%d, NG=%d, slice=%d, mindate=%d, maxdate=%d, keylen=%d\n", DIACOLLO_INAME, "$GENERIC()", $SIZE(NnzA), $SIZE(NA), $SIZE(NM), $SIZE(ND), $SIZE(NC), $SIZE(NG), $dateslice(), $mindate(), $maxdate(), keylen); fflush(stderr);)

  hv_clear(f2hv);

  sliceby           = $dateslice();
  keybuf[$SIZE(NG)] = sliceby;

  /*-- guts: compute independent item2 frequencies in f2hv --*/
  loop (NnzA) %{
    at = $awhich(Ndims=>0);
    ad = $awhich(Ndims=>1);
    ac = $d2c(ND=>ad);
    adate = $c2date(NC=>ac);

    //-- check date restrictions
    if (adate < $mindate() || adate > $maxdate()) continue;

    //-- setup output hash key
    loop (NG) %{
      ga = $groupby();
      keybuf[NG] = $groupas()==0 ? $tattrs(NA=>ga,NT=>at) : $mattrs(NM=>ga,NC=>ac);
    %}
    //-- setup output date component
    if (sliceby != 0) {
      keybuf[$SIZE(NG)] = sliceby * (adate/sliceby);
    }

    //-- ignore possible keys not in f12hv
    if (f12hv && !hv_exists(f12hv, (char*)keybuf, keylen))
      continue;

    //-- set or update output hash value
    f2val = $avals(NnzA1=>NnzA);
    svpp  = hv_fetch(f2hv, (char*)keybuf, keylen, TRUE);
    if (SvOK(*svpp)) {
      f2val0 = SvUV(*svpp);
      sv_setuv(*svpp, f2val0+f2val);
    } else {
      sv_setuv(*svpp, f2val);
    }
  %}
  if (keybuf) free(keybuf);
}.undef_local(%defs);

  ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## gf_tc : ppdef
  my $INDX = $iinfo->{ppforcetype};
  vmsg("defining diacollo_gf_tc_${itype}");
  vvpp_def("diacollo_gf_tc_${itype}",
	   Pars => join("\n    ",
			'',
			"$INDX awhich(Ndims,NnzA);",	##-- a() ~ tdm: logical (T,D) $vs->{tdm}->_whichND
			"      avals(NnzA1);",		##-- a(): nonzeroes : $vs->{tdm}->_vals
			##
			"$INDX tattrs(NA,NT);",		##-- tattrs() ~ term attributes
			"$INDX mattrs(NM,NC);",		##-- mattrs() ~ meta (cat-)attributes
			"$INDX d2c(ND);",               ##-- d2c(): doc->cat map : $vs->{d2c}
			##
			"ushort c2date(NC);",           ##-- c2date(): cat->date map : $vs->{c2date}
			"ushort dateslice();",          ##-- slice value
			"ushort mindate();",            ##-- minimum date (inlusive)
			"ushort maxdate();",		##-- maximum date (inclusive)
			##
			"byte  groupas(NG);",           ##-- group-as 0:term, 1:meta
			"$INDX groupby(NG);",		##-- attribute indices over NA or NM for groupby()
			''
		       ),
	   OtherPars => 'IV f12Hash; IV f2Hash;',	##-- in,out hashes, ( pack($pack_ix, $tvals($groupby(),($ti2)), $slice) => $f12, ... )
	   GenericTypes => \@VAL_TYPE_CHRS,
	   Code => $code,
	   Doc => q{
Guts for independent group-frequency acquisition from a sparse term-document matrix.
Collects independent group-frequency counts in the HASH-ref $f2Hash for any group whose packed group-key exists in $f12Hash.
Suitable for use with groupby-aggregation over both TERM and CATEGORY attributes,
but uses a slow linear O(NnzA) scan over the tdm matrix, so avoid it if you can.
},

	);
}
foreach my $itype (@INT_TYPES) {
  gf_tc_def($itype);
}

##======================================================================
## Footer Administrivia
##======================================================================

##------------------------------------------------------
## pm additions: footer
pp_addpm(<<'EOPM');

##---------------------------------------------------------------------
=pod

=head1 ACKNOWLEDGEMENTS

Perl by Larry Wall.

PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others.

=cut

##----------------------------------------------------------------------
=pod

=head1 KNOWN BUGS

Probably many.

=cut


##---------------------------------------------------------------------
=pod

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=head2 Copyright Policy

Copyright (C) 2016, Bryan Jurish. All rights reserved.

This package is free software, and entirely without warranty.
You may redistribute it and/or modify it under the same terms
as Perl itself.

=head1 SEE ALSO

perl(1), PDL(3perl)

=cut

EOPM


# Always make sure that you finish your PP declarations with
# pp_done
pp_done();
##----------------------------------------------------------------------