##-*- Mode: CPerl -*-

##======================================================================
## Header Administrivia
##======================================================================

use PDL::VectorValued::Dev;
my $VERSION = '1.24.1'; ##-- update with perl-reversion from Perl::Version module
pp_setversion($VERSION);

##-- for integer-type keys
require "../Config.pm";
my $INT_TYPES = join('',@{$PDL::CCS::Config::ccsConfig{INT_TYPE_CHRS}});

##-- PDL::PP debugging
#$::PP_VERBOSE = 1;

##------------------------------------------------------
## pm headers
pp_addpm({At=>'Top'},<<'EOPM');

#use PDL::CCS::Version;
use strict;

=pod

=head1 NAME

PDL::CCS::MatrixOps - Low-level matrix operations for compressed storage sparse PDLs

=head1 SYNOPSIS

 use PDL;
 use PDL::CCS::MatrixOps;

 ##---------------------------------------------------------------------
 ## ... stuff happens

=cut

EOPM
## /pm additions
##------------------------------------------------------

##------------------------------------------------------
## Exports: None
#pp_export_nothing();

##------------------------------------------------------
## Includes / defines
pp_addhdr(<<'EOH');
#include <math.h> /*-- for NAN --*/
#include "../Utils/ccsutils.h"
EOH

##------------------------------------------------------
## index datatype
require "../Config.pm";
our $INDX = $PDL::CCS::Config::ccsConfig{INDX_SIG};
pp_addpm( $PDL::CCS::Config::ccsConfig{INDX_FUNCDEF} );
pp_addhdr( $PDL::CCS::Config::ccsConfig{INDX_TYPEDEF} );


##======================================================================
## C Utilities
##======================================================================
# (none)

##======================================================================
## PDL::PP Wrappers
##======================================================================


##======================================================================
## Operations: matmult2d
##======================================================================

# TODO: support BAD values in ccs_matmult2d_sdd (especially missing==BAD).
#  + Problematic because we use $zc() as an initializer, which for missing==BAD
#    winds up setting the entire result to BAD.
#  + missing==BAD support might need a temporary to count the number
#    of (non-)missing "N" values per "O", and only add in $zc() if required (in which
#    case we wouldn't want/need to pass in $zc() at all)
#  + probably doable with an 'indx [t]nnzc(N)' temporary

##--------------------------------------------------------------
pp_def
  ('ccs_matmult2d_sdd',
   Pars => ("\n    "
            .join("\n    ",
                  "$INDX ixa(Two=2,NnzA); nza(NnzA); missinga();",  ## a(M,N) (M~i, N~x): formerly here as a(N,M)
                  'b(O,M);',                                      ## b(O,M) (O~z, M~i)
                  'zc(O);',                                       ## zc(O)
                  '[o]c(O,N)',                                    ## c(O,N) (O~z, N~x)
                  '')),
   HandleBad => 1,

   OtherPars => "PDL_Indx sizeN;",
   RedoDimsCode => q{
     /*--  we're getting SIZE(N)==1 if c() is passed in as null here too --*/
     if ( CCS_PDL_IS_NULL($PDL(c)) )
       $SIZE(N) = $COMP(sizeN);
   },

   Code => q{
    broadcastloop %{
      //-- initialize: set output to zc()
      loop (O) %{
        $GENERIC(zc) zc_o = $zc();
        loop (N) %{ $c() = zc_o; %}
      %}
      //
      //-- main loop
      loop (NnzA) %{
        CCS_Indx mi = $ixa(Two=>0);
        CCS_Indx ni = $ixa(Two=>1);
        loop (O) %{
          //--# c(o,n) = sum for m=1 to M [a(m,n) * b(o,m)]
          $c(N=>ni) += $b(M=>mi) * ($nza() - $missinga());
        %}
      %}
    %}
    if ($PDLSTATEISBAD(nza)
        || $PDLSTATEISBAD(missinga)
        || $PDLSTATEISBAD(b)
        || $PDLSTATEISBAD(zc)) {
      $PDLSTATESETBAD(c);
    } else {
      $PDLSTATESETGOOD(c);
    }
  },

  Doc =>
(q{
Two-dimensional matrix multiplication of a sparse index-encoded PDL
$a() with a dense pdl $b(), with output to a dense pdl $c().

The sparse input PDL $a() should be passed here with 0th
dimension "M" and 1st dimension "N", just as for the
built-in PDL::Primitive::matmult().

"Missing" values in $a() are treated as $missinga(), which shouldn't
be BAD or infinite, but otherwise ought to be handled correctly.
The input pdl $zc() is used to pass the cached contribution of
a $missinga()-row ("M") to an output column ("O"), i.e.

 $zc = ((zeroes($M,1)+$missinga) x $b)->flat;

$SIZE(Two) must be 2.
}),
 ); ##--/ccs_matmult2d_sdd


##--------------------------------------------------------------
pp_def
  ('ccs_matmult2d_zdd',
   Pars => ("\n    "
            .join("\n    ",
                  "$INDX ixa(Two=2,NnzA); nza(NnzA);", ## a(M,N) (M~i, N~x)
                  'b(O,M);',                          ## b(O,M) (O~z, M~i)
                  '[o]c(O,N)',                        ## c(O,N) (O~z, N~x)
                  '')),

   OtherPars => "PDL_Indx sizeN;",
   RedoDimsCode => q{
     /*--  we're getting SIZE(N)==1 if c() is passed in as null here too --*/
     if ( CCS_PDL_IS_NULL($PDL(c)) )
       $SIZE(N) = $COMP(sizeN);
   },

   HandleBad => 1,
   Code => q{
     broadcastloop %{
       //-- initialize output to zero
       loop (N) %{
         loop (O) %{
           $c()=0;
         %}
       %}
       //
       //-- main loop over CCS-encoded a()
       loop (NnzA) %{
         CCS_Indx Mi = $ixa(Two=>0);
         CCS_Indx Ni = $ixa(Two=>1);
         loop (O) %{
           PDL_IF_BAD( if ($ISBAD(nza()) || $ISBAD(b(M=>Mi)) || $ISBAD(c(N=>Ni))) { $SETBAD(c(N=>Ni)); continue; }, )
           $c(N=>Ni) += $nza() * $b(M=>Mi);
         %}
       %}
     %}
     if ( $PDLSTATEISBAD(nza) || $PDLSTATEISBAD(b) ) {
       $PDLSTATESETBAD(c);
     } else {
       $PDLSTATESETGOOD(c);
     }
   },

  Doc => q{
Two-dimensional matrix multiplication of a sparse index-encoded PDL
$a() with a dense pdl $b(), with output to a dense pdl $c().

The sparse input PDL $a() should be passed here with 0th
dimension "M" and 1st dimension "N", just as for the
built-in PDL::Primitive::matmult().

"Missing" values in $a() are treated as zero.
$SIZE(Two) must be 2.
},
 ); ##--/ccs_matmult2d_zdd


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## ccs_vnorm: pp_def
pp_def
  ('ccs_vnorm',
   Pars => ("\n    "
            .join("\n    ",
                  "$INDX acols(NnzA); avals(NnzA);", ##-- logical (M,N)~(T,D) with acols~Mi
                  "float+ [o]vnorm(M);",             ##-- (M)~(T)
                  ''
                )),
   OtherPars => "PDL_Indx sizeM=>M;",
   HandleBad => 1,
   Code => q{
      broadcastloop %{
        CCS_Indx am;
        $GENERIC(avals) av;

        /*-- initialize --*/
        loop (M) %{ $vnorm() = 0; %}

        /*-- guts: compute vnorm[mi] = \sum_{ni=1}^N a[mi,ni]**2 --*/
        loop (NnzA) %{
          PDL_IF_BAD(if ($ISBAD(avals())) continue;,)
          am = $acols();
          av = $avals();
          $vnorm(M=>am) += av * av;
        %}

        /*-- finalize: set vnorm[*] = sqrt(vnorm[*]) --*/
        loop (M) %{ $vnorm() = sqrt($vnorm()); %}
      %}
      $PDLSTATESETGOOD(vnorm);
   },

   Doc=> q{
Computes the Euclidean lengths of each column-vector $a(i,*) of a sparse index-encoded pdl $a()
of logical dimensions (M,N), with output to a dense piddle $vnorm().
"Missing" values in $a() are treated as zero,
and $acols() specifies the (unsorted) indices along the logical dimension M of the corresponding non-missing
values in $avals().
This is basically the same thing as:

 $vnorm = ($a**2)->xchg(0,1)->sumover->sqrt;

... but should be must faster to compute for sparse index-encoded piddles.
},

   BadDoc => q{ccs_vnorm() always clears the bad-status flag on $vnorm().},
  ); ##-- /ccs_vnorm


##--------------------------------------------------------------
## ccs_vcos_zdd : ccs-matrix vs. dense-vector, output=dense, anorm=optional

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## ccs_vcos_zdd: pmcode
pp_add_exported('', "ccs_vcos_zdd");
pp_addpm <<'EOPM';

=pod

=head2 ccs_vcos_zdd

=for sig

  Signature: (
    indx ixa(2,NnzA); nza(NnzA);
    b(N);
    float+ [o]vcos(M);
    float+ [t]anorm(M);
    PDL_Indx sizeM=>M;
  )


Computes the vector cosine similarity of a dense row-vector $b(N) with respect to each column $a(i,*)
of a sparse index-encoded PDL $a() of logical dimensions (M,N), with output to a dense piddle
$vcos(M).
"Missing" values in $a() are treated as zero,
and magnitudes for $a() are passed in the optional parameter $anorm(), which will be implicitly
computed using L<ccs_vnorm|/ccs_vnorm> if the $anorm() parameter is omitted or empty.
This is basically the same thing as:

 $anorm //= ($a**2)->xchg(0,1)->sumover->sqrt;
 $vcos    = ($a * $b->slice("*1,"))->xchg(0,1)->sumover / ($anorm * ($b**2)->sumover->sqrt);

... but should be must faster to compute.

Output values in $vcos() are cosine similarities in the range [-1,1],
except for zero-magnitude vectors which will result in NaN values in $vcos().
If you need non-negative distances, follow this up with a:

 $vcos->minus(1,$vcos,1)
 $vcos->inplace->setnantobad->inplace->setbadtoval(0); ##-- minimum distance for NaN values

to get distances values in the range [0,2].  You can use PDL threading to batch-compute distances for
multiple $b() vectors simultaneously:

  $bx   = random($N, $NB);                   ##-- get $NB random vectors of size $N
  $vcos = ccs_vcos_zdd($ixa,$nza, $bx, $M);  ##-- $vcos is now ($M,$NB)


=for bad

ccs_vcos_zdd() always clears the bad status flag on the output piddle $vcos.

=cut

sub ccs_vcos_zdd {
  my ($ixa,$nza,$b) = @_;
  barf("Usage: ccs_vcos_zdd(ixa, nza, b, vcos?, anorm?, M?)") if (grep {!defined($_)} ($ixa,$nza,$b));

  my ($anorm,$vcos,$M);
  foreach (@_[3..$#_]) {
    if    (!defined($M) && !UNIVERSAL::isa($_,"PDL")) { $M=$_; }
    elsif (!defined($vcos))  { $vcos = $_; }  ##-- compat: pass $vcos() in first
    elsif (!defined($anorm)) { $anorm = $_; }
  }

  ##-- get M
  $M = $vcos->dim(0)  if (!defined($M) && defined($vcos) && !$vcos->isempty);
  $M = $anorm->dim(0) if (!defined($M) && defined($anorm) && !$anorm->isempty);
  $M = $ixa->slice("(0),")->max+1 if (!defined($M));

  ##-- compat: implicitly compute anorm() if required
  $anorm = $ixa->slice("(0),")->ccs_vnorm($nza, $M) if (!defined($anorm) || $anorm->isempty);

  ##-- guts
  $ixa->_ccs_vcos_zdd($nza,$b, $anorm, ($vcos//=PDL->null));
  return $vcos;
}

*PDL::ccs_vcos_zdd = \&ccs_vcos_zdd;

EOPM

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## ccs_vcos_zdd
pp_def
  ('_ccs_vcos_zdd',
   Pars => ("\n    "
            .join("\n    ",
                  "$INDX ixa(Two=2,NnzA); nza(NnzA);",   ##-- logical (M,N)
                  "b(N);",                             ##-- logical (1,N)
                  "float+ anorm(M);",                  ##-- dense (required)
                  "float+ [o]vcos(M);",
                 )),
   HandleBad => 1,
   Code => q{
     CCS_Indx an,am, bm;
     $GENERIC(anorm) bnorm;
     $GENERIC(nza)   av;

     broadcastloop %{
       /*-- cache bnorm as \sum_{i=1}^N b[i]**2 --*/
       bnorm = 0;
       loop (N) %{
         PDL_IF_BAD(if ($ISBAD(b())) continue;,)
         bnorm += $b() * $b();
       %}
       bnorm = sqrt(bnorm);
       if (bnorm == 0) {
         /*-- pathological case: return all NaN --*/
         loop(M) %{ $vcos() = NAN; %}
       }
       else {
         /*-- guts: initialize --*/
         loop (M) %{ $vcos() = 0; %}

         /*-- guts: compute \sum_{i=1}^N (a[i]*b[i]) in vcos() --*/
         loop (NnzA) %{
           am = $ixa(Two=>0);
           an = $ixa(Two=>1);
           PDL_IF_BAD(if ($ISBAD(nza()) || $ISBAD(b(N=>an))) continue;,)
           $vcos(M=>am) += $nza() * $b(N=>an);
         %}

         /*-- guts: factor out vector magnitudes (Euclidean norms ||a||*||b||), cached in anorm(), bnorm --*/
         loop (M) %{
           if ($anorm() != 0) {
             $vcos() /= ($anorm() * bnorm);
           } else {
             /*-- bogus anorm(), return NaN --*/
             $vcos() = NAN;
           }
         %}
       }
     %}
     $PDLSTATESETGOOD(vcos);
   },

   Doc=> q{Guts for L<ccs_vcos_zdd()|/ccs_vcos_zdd>, with slightly different calling conventions.},
   BadDoc=> q{Always clears the bad status flag on the output piddle $vcos.},
  ); ##-- /_ccs_vcos_zdd


##--------------------------------------------------------------
## ccs_vcos_pzd : ptr(1)-matrix vs. dense-vector, output=dense, anorm=optional

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## ccs_vcos_pzd
pp_def(
  'ccs_vcos_pzd',
  Pars => ("\n    "
           .join("\n    ",
                 "$INDX aptr(Nplus1); $INDX acols(NnzA); avals(NnzA);", ##-- logical (M,N)~(T,D) with ptr(1)
                 "$INDX brows(NnzB);                     bvals(NnzB);", ##-- logical (1,N)~(1,D)
                 "anorm(M);",          ##-- (M)~(T)
                 "float+ [o]vcos(M);", ##-- (M)~(T)
               )),
  HandleBad => 1,
  Code => q{
    CCS_Indx bn,bn1, alo,ahi, am,anzi;
    $GENERIC(anorm) bnorm;

    broadcastloop %{
      /*-- guts: initialize --*/
      bnorm = 0;
      loop (M) %{ $vcos() = 0; %}

      /*-- guts: compute \sum_{i=1}^N (a[i]*b[i]) in vcos(), caching bnorm as \sum_{i=1}^N b[i]**2 --*/
      loop (NnzB) %{
        bn  = $brows();
        bn1 = bn + 1;
        alo = $aptr(Nplus1=>bn);
        ahi = $aptr(Nplus1=>bn1);

        PDL_IF_BAD(if ($ISBAD(bvals())) continue;,)
        bnorm += $bvals() * $bvals();

        for (anzi=alo; anzi < ahi; ++anzi) {
          am = $acols(NnzA=>anzi);
          PDL_IF_BAD(if ($ISBAD(avals(NnzA=>anzi))) continue;,)
          $vcos(M=>am)  += $avals(NnzA=>anzi) * $bvals();
        }
      %}

      /*-- guts: finalize: factor out vector magnitudes (Euclidean norms ||a||*||b||), cached in anorm(), bnorm --*/
      bnorm = sqrt(bnorm);
      if (bnorm == 0) {
        /*-- bogus bnorm, return all NaN --*/
        loop (M) %{ $vcos() = NAN; %}
      } else {
        loop (M) %{
          if ($anorm() != 0 PDL_IF_BAD(&& $ISGOOD(anorm()),)) {
            $vcos() /= ($anorm() * bnorm);
          } else {
            /*-- bogus anorm(), return NaN --*/
            $vcos() = NAN;
          }
        %}
      }
    %}
    $PDLSTATESETGOOD(vcos);
  },

  BadDoc=> q{ccs_vcos_pzd() always clears the bad status flag on the output piddle $vcos.},
  Doc => q{
Computes the vector cosine similarity of a sparse index-encoded row-vector $b() of logical dimension (N)
with respect to each column $a(i,*) a sparse Harwell-Boeing row-encoded PDL $a() of logical dimensions (M,N),
with output to a dense piddle $vcos(M).
"Missing" values in $a() are treated as zero,
and magnitudes for $a() are passed in the obligatory parameter $anorm().
Usually much faster than L<ccs_vcos_zdd()|/ccs_vcos_zdd> if a CRS pointer over logical dimension (N) is available
for $a().
},
); ##-- /_ccs_vcos_pzd


##======================================================================
## Footer Administrivia
##======================================================================

##------------------------------------------------------
## pm additions: footer
pp_addpm(<<'EOPM');

##---------------------------------------------------------------------
=pod

=head1 ACKNOWLEDGEMENTS

Perl by Larry Wall.

PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others.

=cut

##----------------------------------------------------------------------
=pod

=head1 KNOWN BUGS

We should really implement matrix multiplication in terms of
inner product, and have a good sparse-matrix only implementation
of the former.

=cut


##---------------------------------------------------------------------
=pod

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=head2 Copyright Policy

All other parts Copyright (C) 2009-2024, Bryan Jurish. All rights reserved.

This package is free software, and entirely without warranty.
You may redistribute it and/or modify it under the same terms
as Perl itself.

=head1 SEE ALSO

perl(1), PDL(3perl)

=cut

EOPM


# Always make sure that you finish your PP declarations with
# pp_done
pp_done();
##----------------------------------------------------------------------