lib/PDL/Ufunc.pd - metacpan.org

use strict;
use warnings;
use PDL::Types qw(types ppdefs_complex ppdefs_all);

my $T = [map $_->ppsym, grep $_->integer, types];
my $A = [ppdefs_all];

pp_addpm({At=>'Top'},<<'EOD');
use strict;
use warnings;

=encoding utf8

=head1 NAME

PDL::Ufunc - primitive ufunc operations for pdl

=head1 DESCRIPTION

This module provides some primitive and useful functions defined
using PDL::PP based on functionality of what are sometimes called
I<ufuncs> (for example NumPY and Mathematica talk about these).
It collects all the functions generally used to C<reduce> or
C<accumulate> along a dimension. These all do their job across the
first dimension but by using the slicing functions you can do it
on any dimension.

The L<PDL::Reduce> module provides an alternative interface
to many of the functions in this module.

=head1 SYNOPSIS

 use PDL::Ufunc;

=cut

use PDL::Slices;
use Carp;
EOD

# helper functions
sub projectdocs {
    my $name = shift;
    my $op = shift;
    my $extras = shift;
    <<EOD;

=for ref

Project via $name to N-1 dimensions

This function reduces the dimensionality of an ndarray
by one by taking the $name along the 1st dimension.

By using L<xchg|PDL::Slices/xchg> etc. it is possible to use
I<any> dimension.

$extras
EOD

} # sub: projectdocs()

sub cumuprojectdocs {
    my $name = shift;
    my $op = shift;
    my $extras = shift;
    <<EOD;

=for ref

Cumulative $name

This function calculates the cumulative $name
along the 1st dimension.

By using L<xchg|PDL::Slices/xchg> etc. it is possible to use
I<any> dimension.

The sum is started so that the first element in the cumulative $name
is the first element of the parameter.

$extras
EOD

} # sub: cumuprojectdocs()

my %over =
    (
     sumover  => { name => 'sum',     op => '+=', init => 0, },
     prodover => { name => 'product', op => '*=', init => 1, leftzero => 'tmp == 0' },
     );

foreach my $func ( sort keys %over ) {
    my ($name, $op, $init, $leftzero) = @{$over{$func}}{qw(name op init leftzero)};
    pp_def(
	   $_->[0].$func,
	   HandleBad => 1,
	   Pars => "a(n); $_->[1] [o]b();",
	   $_->[2] ? (GenericTypes=>$_->[2]) : (),
           Code => <<EOF,
\$GENERIC(b) tmp = $init;
PDL_IF_BAD(int flag = 0;,)
loop(n) %{
  PDL_IF_BAD(if ( \$ISBAD(a()) ) continue; flag = 1;,)
  tmp $op \$a();
  @{[$leftzero ? "if ($leftzero) break;" : '']}
%}
PDL_IF_BAD(if ( !flag ) \$SETBAD(b()); else,)
\$b() = tmp;
EOF
	   Doc => projectdocs( $name, $_->[0].$func, $_->[3] ),
	   ) for (
      ['', 'int+', $A, ''],
      ['d', 'double', undef,
"Unlike L</$func>, the calculations are performed in double precision."
      ],
    );

    my $cfunc = "cumu${func}";
    pp_def(
	   $_->[0].$cfunc,
	   HandleBad => 1,
	   Pars => "a(n); $_->[1] [o]b(n);",
	   $_->[2] ? (GenericTypes=>$_->[2]) : (),
           Code => <<EOF,
\$GENERIC(b) tmp = $init;
loop(n) %{
  PDL_IF_BAD(if ( \$ISBAD(a()) ) { \$SETBAD(b()); continue; },)
  tmp $op \$a();
  \$b() = tmp;
%}
EOF
	   Doc => cumuprojectdocs( $name, $_->[0].$cfunc, $_->[3] ),
	   ) for (
      ['', 'int+', $A, ''],
      ['d', 'double', undef,
"Unlike L</$cfunc>, the calculations are performed in double precision."
      ],
    );
} # foreach: my $func

%over = (
  firstnonzeroover => { txt => 'first non-zero value', alltypes => 1,
    op => 'tmp = $a();', leftzero => 'tmp' },
  zcover => { def=>'char tmp', txt => '== 0', init => 1, alltypes => 1,
    op => 'tmp &= ($a() == 0);', leftzero => '!tmp' },
  andover => { def=>'char tmp', txt => 'logical and', init => 1, alltypes => 1,
    op => 'tmp &=  ($a() != 0);', leftzero => '!tmp' },
  orover => { def=>'char tmp', txt => 'logical or', alltypes => 1,
    op => 'tmp |= ($a() != 0);', leftzero => 'tmp' },
  xorover => { def=>'char tmp', txt => 'logical xor', alltypes => 1,
    op => 'tmp ^= ($a() != 0);', leftzero => 0 },
  bandover => { txt => 'bitwise and', init => '~0',
    op => 'tmp &= $a();', leftzero => '!tmp' },
  borover => { txt => 'bitwise or',
    op => 'tmp |= $a() ;', leftzero => '!~tmp' },
  bxorover  => { txt => 'bitwise xor',
    op => 'tmp ^= $a() ;', leftzero => 0 },
);
foreach my $func ( sort keys %over ) {
  my ($def,$txt,$init,$op,$leftzero) = @{$over{$func}}{qw(def txt init op leftzero)};
  $def ||= '$GENERIC() tmp';
  $init //= '0';
  pp_def(
    $func,
    HandleBad => 1,
    GenericTypes => $over{$func}{alltypes} ? [ppdefs_all] : $T,
    Pars => 'a(n); [o] b()',
    Code => pp_line_numbers(__LINE__, <<EOF),
$def = $init;
PDL_IF_BAD(int flag = 0;,)
loop(n) %{
  PDL_IF_BAD(if ( \$ISBAD(a()) ) continue; flag = 1;,)
  $op
  if ( $leftzero ) break;
%}
PDL_IF_BAD(if (!flag) { \$SETBAD(b()); \$PDLSTATESETBAD(b); } else,)
\$b() = tmp;
EOF
    Doc => projectdocs( $txt, $func,''),
    BadDoc =>
'If C<a()> contains only bad data (and its bad flag is set),
C<b()> is set bad. Otherwise C<b()> will have its bad flag cleared,
as it will not contain any bad values.',
  );
} # foreach: $func

pp_def('numdiff',
  Pars  => 'x(t); [o]dx(t)',
  Inplace   => 1,
  GenericTypes => [ppdefs_all],
  Code => <<'EOF',
/* do it in reverse so inplace works */
loop (t=::-1) %{
  $dx() = t ? $x() - $x(t=>t-1) : $x();
%}
EOF
  Doc => <<'EOF',
=for ref

Numerical differencing. DX(t) = X(t) - X(t-1), DX(0) = X(0).
Combined with C<slice('-1:0')>, can be used for backward differencing.

Unlike L</diff2>, output vector is same length.
Originally by Maggie J. Xiong.

Compare to L</cumusumover>, which acts as the converse of this.
See also L</diff2>, L</diffcentred>, L</partial>, L<PDL::Primitive/pchip_chim>.
EOF
);

pp_def('diffcentred',
  HandleBad => 1,
  Pars => 'a(n); [o]o(n)',
  GenericTypes => [ppdefs_all],
  Code => <<'EOF',
loop(n) %{
  PDL_Indx left = n-1, right = n+1;
  if (left < 0) left = $SIZE(n)-1;
  if (right >= $SIZE(n)) right = 0;
  PDL_IF_BAD(if ($ISBAD($a(n=>left)) || $ISBAD($a(n=>right))) {
    $SETBAD(o()); continue;
  },)
  $o() = ($a(n=>right) - $a(n=>left))/2;
%}
EOF
  Doc => <<'EOF',
=for ref

Calculates centred differences along a vector's 0th dimension.
Always periodic on boundaries; currently to change this, you must
pad your data, and/or trim afterwards. This is so that when using
with L</partial>, the size of data stays the same and therefore
compatible if differentiated along different dimensions, e.g.
calculating "curl".

By using L<PDL::Slices/xchg> etc. it is possible to use I<any> dimension.

See also L</diff2>, L</partial>, L</numdiff>, L<PDL::Primitive/pchip_chim>.
EOF
  BadDoc => <<'EOF',
A bad value at C<n> means the affected output values at C<n-2>,C<n>
(if in boounds) are set bad.
EOF
);

pp_add_exported('partial');
pp_addpm(<<'EOF');
=head2 partial

=for ref

Take a numerical partial derivative along a given dimension, either
forward, backward, or centred.

See also L</numdiff>, L</diffcentred>,
L<PDL::Primitive/pchip_chim>, L<PDL::Primitive/pchip_chsp>,
and L<PDL::Slices/mv>, which are currently used to implement this.

Can be used to implement divergence and curl calculations (adapted
from Luis Mochán's work at
https://sourceforge.net/p/pdl/mailman/message/58843767/):

  use v5.36;
  use PDL;
  sub curl ($f) {
    my ($f0, $f1, $f2) = $f->using(0..2);
    my $o = {dir=>'c'};
    pdl(
      $f2->partial(1,$o) - $f1->partial(2,$o),
      $f0->partial(2,$o) - $f2->partial(0,$o),
      $f1->partial(0,$o) - $f0->partial(1,$o),
    )->mv(-1,0);
  }
  sub div ($f) {
    my ($f0, $f1, $f2) = $f->using(0..2);
    my $o = {dir=>'c'};
    $f0->partial(0,$o) + $f1->partial(1,$o) + $f2->partial(2,$o);
  }
  sub trim3d ($f) { $f->slice(':,1:-2,1:-2,1:-2') } # adjust if change "dir"
  my $z=zeroes(5,5,5);
  my $v=pdl(-$z->yvals, $z->xvals, $z->zvals)->mv(-1,0);
  say trim3d(curl($v));
  say div($v);

=for usage

  $pdl->partial(2);           # along dim 2, centred
  $pdl->partial(2, {d=>'c'}); # along dim 2, centred
  $pdl->partial(2, {d=>'f'}); # along dim 2, forward
  $pdl->partial(2, {d=>'b'}); # along dim 2, backward
  $pdl->partial(2, {d=>'p'}); # along dim 2, piecewise cubic Hermite
  $pdl->partial(2, {d=>'s'}); # along dim 2, cubic spline

=cut

my %dirtype2func = (
  f => \&numdiff,
  b => sub { $_[0]->slice('-1:0')->numdiff },
  c => \&diffcentred,
  p => sub {(PDL::Primitive::pchip_chim($_[0]->xvals, $_[0]))[0]},
  s => sub {(PDL::Primitive::pchip_chsp([0,0], [0,0], $_[0]->xvals, $_[0]))[0]},
);
*partial = \&PDL::partial;
sub PDL::partial {
  my ($f, $dim, $opts) = @_;
  $opts ||= {};
  my $difftype = $opts->{dir} || $opts->{d} || 'c';
  my $func = $dirtype2func{$difftype} || barf "partial: unknown 'dir' option '$difftype', only know (@{[sort keys %dirtype2func]})";
  $f = $f->mv($dim, 0) if $dim;
  my $ret = $f->$func;
  $dim ? $ret->mv(0, $dim) : $ret;
}
EOF

pp_def('diff2',
  HandleBad => 1,
  Pars => 'a(n); [o]o(nminus1=CALC($SIZE(n) - 1))',
  GenericTypes => [ppdefs_all],
  Code => <<'EOF',
$GENERIC() lastval = PDL_IF_BAD($ISBAD($a(n=>0)) ? 0 :,) $a(n=>0);
loop(n=1) %{
  PDL_IF_BAD(if ($ISBAD($a()))
    $SETBAD(o(nminus1=>n-1));
  else,)
    $o(nminus1=>n-1) = $a() - lastval;
  PDL_IF_BAD(if ($ISGOOD($a())),) lastval = $a();
%}
EOF
  Doc => <<'EOF',
=for ref

Numerically forward differentiates a vector along 0th dimension.

By using L<PDL::Slices/xchg> etc. it is possible to use I<any> dimension.
Unlike L</numdiff>, output vector is one shorter.
Combined with C<slice('-1:0')>, can be used for backward differencing.

See also L</numdiff>, L</diffcentred>, L</partial>, L<PDL::Primitive/pchip_chim>.

=for example

  print pdl(q[3 4 2 3 2 3 1])->diff2;
  # [1 -2 1 -1 1 -2]
EOF
  BadDoc => <<'EOF',
On bad value, output value is set bad. On next good value, output value
is difference between that and last good value.
EOF
);

# this would need a lot of work to support bad values
# plus it gives me a chance to check out HandleBad => 0 ;)
#
pp_def('intover',
       HandleBad => 0,
       Pars => 'a(n); float+ [o]b();',
       Code =>
       '/* Integration formulae from Press et al 2nd Ed S 4.1 */
       switch ($SIZE(n)) {
      case 1:
          broadcastloop %{
          $b() = 0.; /* not a(n=>0); as interval has zero width */
          %}
          break;
        case 2:
          broadcastloop %{
          $b() = 0.5*($a(n=>0)+$a(n=>1));
          %}
          break;
        case 3:
          broadcastloop %{
          $b() = ($a(n=>0)+4*$a(n=>1)+$a(n=>2))/3.;
          %}
          break;
      case 4:
          broadcastloop %{
          $b() = ($a(n=>0)+$a(n=>3)+3.*($a(n=>1)+$a(n=>2)))*0.375;
          %}
          break;
      case 5:
          broadcastloop %{
          $b() = (14.*($a(n=>0)+$a(n=>4))
                   +64.*($a(n=>1)+$a(n=>3))
                   +24.*$a(n=>2))/45.;
          %}
          break;
      default:
          broadcastloop %{
        $GENERIC(b) tmp = 0;
        loop (n=3:-3) %{ tmp += $a(); %}
        loop (n=-3:-2) %{ tmp += (23./24.)*($a(n=>2)+$a()); %}
        loop (n=-2:-1) %{ tmp += (7./6.)  *($a(n=>1)+$a()); %}
        loop (n=-1:)   %{ tmp += (3./8.)  *($a(n=>0)+$a()); %}
        $b() = tmp;
          %}
      }
      ',
       Doc => projectdocs('integral','intover', <<'EOF'),
Notes:

C<intover> uses a point spacing of one (i.e., delta-h==1). You will
need to scale the result to correct for the true point delta.

For C<n E<gt> 3>, these are all C<O(h^4)> (like Simpson's rule), but are
integrals between the end points assuming the pdl gives values just at
these centres: for such `functions', sumover is correct to C<O(h)>, but
is the natural (and correct) choice for binned data, of course.
EOF
); # intover

sub synonym {
    my ($name, $synonym) = @_;
    pp_add_exported('', $synonym);
    pp_addpm(
"=head2 $synonym\n\n=for ref\n\nSynonym for L</$name>.\n\n=cut\n
*PDL::$synonym = *$synonym = \\&PDL::$name;"
    );
}

sub make_average {
  my ($prefix, $outpar_type, $extra) = @_;
  pp_def("${prefix}average",
	HandleBad => 1,
	Pars => "a(n); $outpar_type [o]b();",
        Code => <<'EOF',
$GENERIC(b) tmp = 0;
PDL_Indx cnt = 0;
loop(n) %{
  PDL_IF_BAD(if ( $ISBAD(a()) ) continue;,)
  cnt++;
  tmp += $a();
%}
if ( !cnt ) {
  PDL_IF_BAD($SETBAD(b()), $b() = PDL_IF_GENTYPE_INTEGER(0,NAN));
} else
  $b() = tmp / cnt;
EOF
	Doc => projectdocs( 'average', "${prefix}average", $extra||'' ),
	);
  synonym(map "$prefix$_", qw(average avgover));
}

make_average('', 'int+');
make_average('c', 'cdouble',
"Unlike L<average|/average>, the calculation is performed in complex double
precision."
);
make_average('d', 'double',
"Unlike L<average|/average>, the calculation is performed in double
precision."
);

for my $which (
	       [qw(minimum < minover)],
	       [qw(maximum > maxover)],
	       ) {
    my ($name, $op, $synonym) = @$which;

    pp_def($name,
	    HandleBad => 1,
	    Pars => 'a(n); [o]c();',
	    Code =>
'$GENERIC() cur = 0;
int flag = 0;
loop(n) %{
  PDL_IF_BAD(if ($ISBAD(a())) continue;,)
  if ( flag && !($a() '.$op.' cur) && !PDL_ISNAN_$PPSYM()(cur) ) continue;
  cur = $a(); flag = 1;
%}
if ( flag ) { $c() = cur; }
else        { $SETBAD(c()); $PDLSTATESETBAD(c); }',
	    Doc => projectdocs($name,$name,''),
	    BadDoc =>
'Output is set bad if no elements of the input are non-bad,
otherwise the bad flag is cleared for the output ndarray.

Note that C<NaNs> are considered to be valid values and will "win" over non-C<NaN>;
see L<isfinite|PDL::Math/isfinite> and L<badmask|PDL::Bad/badmask>
for ways of masking NaNs.
',
	    );
    synonym($name, $synonym);

    pp_def("${name}_ind",
	    HandleBad => 1,
	    Pars => 'a(n); indx [o] c();',
	    Code =>
'$GENERIC() cur = 0;
PDL_Indx curind = -1;
loop(n) %{
  PDL_IF_BAD(if ($ISBAD(a())) continue;,)
  if (curind != -1 && !($a() '.$op.' cur) && !PDL_ISNAN_$PPSYM()(cur)) continue;
  cur = $a(); curind = n;
%}
if ( curind != -1 ) { $c() = curind; }
else                { $SETBAD(c()); $PDLSTATESETBAD(c); }',
	    Doc => "Like $name but returns the first matching index rather than the value",
	    BadDoc =>
'Output is set bad if no elements of the input are non-bad,
otherwise the bad flag is cleared for the output ndarray.

Note that C<NaNs> are considered to be valid values and will "win" over non-C<NaN>;
see L<isfinite|PDL::Math/isfinite> and L<badmask|PDL::Bad/badmask>
for ways of masking NaNs.
',
	    );
    synonym("${name}_ind", "${synonym}_ind");

    pp_def("${name}_n_ind",
	    HandleBad => 1,
	    Pars => 'a(n); indx [o]c(m);',
	    OtherPars => 'PDL_Indx m_size => m;',
	    PMCode => PDL::PP::pp_line_numbers(__LINE__, <<EOF),
sub PDL::${name}_n_ind {
  my (\$a, \$c, \$m_size) = \@_;
  \$m_size //= ref(\$c) ? \$c->dim(0) : \$c; # back-compat with pre-2.077
  my \$set_out = 1;
  \$set_out = 0, \$c = null if !ref \$c;
  \$c = \$c->indx if !\$c->isnull;
  PDL::_${name}_n_ind_int(\$a, \$c, \$m_size);
  \$set_out ? \$_[1] = \$c : \$c;
}
EOF
	    RedoDimsCode => 'if($SIZE(m) > $SIZE(n)) $CROAK("m_size > n_size");',
	    Code =>
'$GENERIC() cur = 0; PDL_Indx curind; register PDL_Indx ns = $SIZE(n);
$PDLSTATESETGOOD(c);
loop(m) %{
  curind = ns;
  loop(n) %{
    PDL_Indx outer_m = m; int flag=0;
    loop (m=:outer_m) %{
      if ($c() == n) {flag=1; break;}
    %}
    if (!flag &&
      PDL_IF_BAD($ISGOOD(a()) &&,)
      ((curind == ns) || $a() '.$op.' cur || PDL_ISNAN_$PPSYM()(cur)))
        {cur = $a(); curind = n;}
  %}
  if (curind != ns) { $c() = curind; }
  else              { $SETBAD(c()); $PDLSTATESETBAD(c); }
%}',
	    Doc => <<EOF,
=for ref

Returns the index of first C<m_size> $name elements. As of 2.077, you can
specify how many by either passing in an ndarray of the given size
(DEPRECATED - will be converted to indx if needed and the input arg will
be set to that), or just the size, or a null and the size.

=for usage

  ${name}_n_ind(\$pdl, \$out = zeroes(5)); # DEPRECATED
  \$out = ${name}_n_ind(\$pdl, 5);
  ${name}_n_ind(\$pdl, \$out = null, 5);

EOF
	    BadDoc =>
'Output bad flag is cleared for the output ndarray if sufficient non-bad elements found,
else remaining slots in C<$c()> are set bad.

Note that C<NaNs> are considered to be valid values and will "win" over non-C<NaN>;
see L<isfinite|PDL::Math/isfinite> and L<badmask|PDL::Bad/badmask>
for ways of masking NaNs.
',
	    );
    synonym("${name}_n_ind", "${synonym}_n_ind");
} # foreach: $which

pp_def('minmaximum',
	HandleBad => 1,
	Pars => 'a(n); [o]cmin(); [o] cmax(); indx [o]cmin_ind(); indx [o]cmax_ind();',
	Code => <<'EOF',
$GENERIC() curmin = 0, curmax = 0; /* Handle null ndarray --CED */
PDL_Indx curmin_ind = 0, curmax_ind = 0; int flag = 0;
loop(n) %{
  PDL_IF_BAD(if ($ISBAD(a())) continue;,)
  if (PDL_ISNAN_$PPSYM()($a())) continue;
  if (!flag) {
    curmin = curmax = $a();
    curmin_ind = curmax_ind = n;
    flag = 1;
  } else {
    if ($a() < curmin) { curmin = $a(); curmin_ind = n; }
    if ($a() > curmax) { curmax = $a(); curmax_ind = n; }
  }
%}
if ( !flag ) { /* Handle null ndarray */
  $SETBAD(cmin()); $SETBAD(cmin_ind());
  $SETBAD(cmax()); $SETBAD(cmax_ind());
  $PDLSTATESETBAD(cmin); $PDLSTATESETBAD(cmin_ind);
  $PDLSTATESETBAD(cmax); $PDLSTATESETBAD(cmax_ind);
} else {
  $cmin() = curmin; $cmin_ind() = curmin_ind;
  $cmax() = curmax; $cmax_ind() = curmax_ind;
}
EOF
	Doc => '
=for ref

Find minimum and maximum and their indices for a given ndarray;

=for example

 pdl> $x=pdl [[-2,3,4],[1,0,3]]
 pdl> ($min, $max, $min_ind, $max_ind)=minmaximum($x)
 pdl> p $min, $max, $min_ind, $max_ind
 [-2 0] [4 3] [0 1] [2 2]

See also L</minmax>, which clumps the ndarray together.

=cut
',
	BadDoc =>
'If C<a()> contains only bad data, then the output ndarrays will
be set bad, along with their bad flag.
Otherwise they will have their bad flags cleared,
since they will not contain any bad values.',
	); # pp_def minmaximum
synonym(qw(minmaximum minmaxover));

# Generate small ops functions to do entire array
# How to handle a return value of BAD - ie what
# datatype to use?
for my $op ( ['avg','average','average'],
	     ['sum','sumover','sum'],
	     ['prod','prodover','product'],

	     ['davg','daverage','average (in double precision)'],
	     ['dsum','dsumover','sum (in double precision)'],
	     ['dprod','dprodover','product (in double precision)'],

	     ['zcheck','zcover','check for zero'],
	     ['and','andover','logical and'],
	     ['band','bandover','bitwise and'],
	     ['or','orover','logical or'],
	     ['bor','borover','bitwise or'],
	     ['xorall','xorover','logical xor'],
	     ['bxor','bxorover','bitwise xor'],
	     ['min','minimum','minimum'],
	     ['max','maximum','maximum'],
	     ['median', 'medover', 'median'],
	     ['mode', 'modeover', 'mode'],
	     ['oddmedian','oddmedover','oddmedian']) {
   my ($name, $func, $text) = @$op;
   pp_add_exported('', $name);
   pp_addpm(<<"EOD");
=head2 $name

=for ref

Return the $text of all elements in an ndarray.

See the documentation for L</$func> for more information.

=for usage

 \$x = $name(\$data);

=for bad

This routine handles bad values.

=cut

*$name = \\&PDL::$name;
sub PDL::$name { \$_[0]->flat->${func} }
EOD

} # for $op

pp_add_exported('','any all');
pp_addpm(<<'EOPM');

=head2 any

=for ref

Return true if any element in ndarray set

Useful in conditional expressions:

=for example

 if (any $x>15) { print "some values are greater than 15\n" }

=for bad

See L</or> for comments on what happens when all elements
in the check are bad.

=cut

*any = \&or;
*PDL::any = \&PDL::or;

=head2 all

=for ref

Return true if all elements in ndarray set

Useful in conditional expressions:

=for example

 if (all $x>15) { print "all values are greater than 15\n" }

=for bad

See L</and> for comments on what happens when all elements
in the check are bad.

=cut

*all = \&and;
*PDL::all = \&PDL::and;

=head2 minmax

=for ref

Returns a list with minimum and maximum values of an ndarray.

=for usage

 ($mn, $mx) = minmax($pdl);

This routine does I<not> broadcast over the dimensions of C<$pdl>;
it returns the minimum and maximum values of the whole ndarray.
See L</minmaximum> if this is not what is required.
The two values are returned as Perl scalars,
and therefore ignore whether the values are bad.

=for example

 pdl> $x = pdl [1,-2,3,5,0]
 pdl> ($min, $max) = minmax($x);
 pdl> p "$min $max\n";
 -2 5

=cut

*minmax = \&PDL::minmax;
sub PDL::minmax { map $_->sclr, ($_[0]->flat->minmaximum)[0,1] }

EOPM

pp_add_exported('', 'minmax');

my $chdr_qsort = 'PDL_TYPELIST_REAL(PDL_QSORT)';
my $chdr_qsorti = PDL::PP::pp_line_numbers(__LINE__, <<'EOF');
#define X(symbol, ctype, ppsym, ...) \
  static inline void qsort_ind_ ## ppsym(ctype* xx,  PDL_Indx* ix, PDL_Indx a, PDL_Indx b) { \
     PDL_Indx i,j; \
     PDL_Indx t; \
     ctype median; \
     i = a; j = b; \
     median = xx[ix[(i+j) / 2]]; \
     do { \
      while (xx[ix[i]] < median) \
           i++; \
        while (median < xx[ix[j]]) \
           j--; \
        if (i <= j) { \
           t = ix[i]; ix[i] = ix[j]; ix[j] = t; \
           i++; j--; \
        } \
     } while (i <= j); \
     if (a < j) \
        qsort_ind_ ## ppsym(xx,ix,a,j); \
     if (i < b) \
        qsort_ind_ ## ppsym(xx,ix,i,b); \
  }
PDL_TYPELIST_REAL(X)
#undef X
EOF
my $chdr_qsortvec_def = PDL::PP::pp_line_numbers(__LINE__, <<'EOF');
#define X(symbol, ctype, ppsym, ...) \
    /******* \
     * qsortvec helper routines \
     *   --CED 21-Aug-2003 \
     */ \
    /* Compare a vector in lexicographic order, return equivalent of "<=>". \
     */ \
  static inline signed int pdl_cmpvec_ ## ppsym(ctype *a, ctype *b, PDL_Indx n) { \
    PDL_Indx i; \
    for(i=0; i<n; a++,b++,i++) { \
     if( *a < *b ) return -1; \
     if( *a > *b ) return 1; \
    } \
    return 0; \
 }
PDL_TYPELIST_REAL(X)
#undef X
#define PDL_QSORTVEC(ppsym, RECURSE, INDEXTERM, swapcode) \
    PDL_Indx i,j, median_ind; \
    i = a;  \
    j = b; \
    median_ind = (i+j)/2; \
    do { \
      while( pdl_cmpvec_ ## ppsym( &(xx[n*INDEXTERM(i)]), &(xx[n*INDEXTERM(median_ind)]), n )  <  0 )  \
            i++; \
      while( pdl_cmpvec_ ## ppsym( &(xx[n*INDEXTERM(j)]), &(xx[n*INDEXTERM(median_ind)]), n )  >  0 )  \
            j--; \
      if(i<=j) { \
            PDL_Indx k; \
            swapcode \
            if (median_ind==i) \
              median_ind=j; \
            else if (median_ind==j) \
              median_ind=i; \
            i++;  \
            j--; \
      } \
    } while (i <= j); \
    if (a < j) \
      RECURSE( ppsym, a, j ); \
    if (i < b) \
      RECURSE( ppsym, i, b );
EOF
my $chdr_qsortvec = PDL::PP::pp_line_numbers(__LINE__, <<'EOF');
#define PDL_QSORTVEC_INDEXTERM(indexterm) indexterm
#define PDL_QSORTVEC_RECURSE(ppsym, ...) pdl_qsortvec_ ## ppsym(xx, n, __VA_ARGS__)
#define X(symbol, ctype, ppsym, ...) \
  static inline void pdl_qsortvec_ ## ppsym(ctype *xx, PDL_Indx n, PDL_Indx a, PDL_Indx b) { \
    PDL_QSORTVEC(ppsym, PDL_QSORTVEC_RECURSE, PDL_QSORTVEC_INDEXTERM, \
            ctype *aa = &xx[n*i]; \
            ctype *bb = &xx[n*j]; \
            for( k=0; k<n; aa++,bb++,k++ ) { \
              ctype z = *aa; \
              *aa = *bb; \
              *bb = z; \
            } \
    ) \
  }
PDL_TYPELIST_REAL(X)
#undef X
#undef PDL_QSORTVEC_INDEXTERM
#undef PDL_QSORTVEC_RECURSE
EOF
my $chdr_qsortvec_ind = PDL::PP::pp_line_numbers(__LINE__, <<'EOF');
#define PDL_QSORTVEC_INDEXTERM(indexterm) ix[indexterm]
#define PDL_QSORTVEC_RECURSE(ppsym, ...) pdl_qsortvec_ind_ ## ppsym(xx, ix, n, __VA_ARGS__)
#define X(symbol, ctype, ppsym, ...) \
  static inline void pdl_qsortvec_ind_ ## ppsym(ctype *xx, PDL_Indx *ix, PDL_Indx n, PDL_Indx a, PDL_Indx b) { \
    PDL_QSORTVEC(ppsym, PDL_QSORTVEC_RECURSE, PDL_QSORTVEC_INDEXTERM, \
            k = ix[i]; \
            ix[i] = ix[j]; \
            ix[j] = k; \
    ) \
  }
PDL_TYPELIST_REAL(X)
#undef X
#undef PDL_QSORTVEC_INDEXTERM
#undef PDL_QSORTVEC_RECURSE
EOF

# when copying the data over to the temporary array,
# ignore the bad values and then only send the number
# of good elements to the sort routines
# should use broadcastloop ?
my $copy_to_temp = pp_line_numbers(__LINE__, '
        if ($PDL(a)->nvals == 0)
          $CROAK("cannot process empty ndarray");
        PDL_Indx nn = PDL_IF_BAD(0,$SIZE(n)-1);
	loop(n) %{
           PDL_IF_BAD(if ( $ISGOOD(a()) ) { $tmp(n=>nn) = $a(); nn++; },
	   $tmp() = $a();)
        %}
        PDL_IF_BAD(if ( nn == 0 ) {
           $SETBAD(b());
        } else {
           nn -= 1;,{)
qsort_$PPSYM() ($P(tmp), 0, nn);');

my $find_median_average = '
           PDL_Indx nn1 = nn/2, nn2 = nn1+1;
           if (nn%2==0) {
	      $b() = $tmp(n => nn1);
           }
           else {
	      $b() = 0.5*( $tmp(n => nn1) + $tmp(n => nn2)  );
           }';
pp_def('medover',
       HandleBad => 1,
       Pars => 'a(n); [o]b(); [t]tmp(n);',
       Doc => projectdocs('median','medover',''),
       CHeader => 'PDL_TYPELIST_REAL(PDL_QSORT)',
       Code => $copy_to_temp . $find_median_average . '}',
       ); # pp_def: medover

my $find_median_lower = '
        PDL_Indx nn1 = nn/2;
	$b() = $tmp(n => nn1);';
pp_def('oddmedover',
       HandleBad => 1,
       Pars => 'a(n); [o]b(); [t]tmp(n);',
       Doc => projectdocs('oddmedian','oddmedover','

The median is sometimes not a good choice as if the array has
an even number of elements it lies half-way between the two
middle values - thus it does not always correspond to a data
value. The lower-odd median is just the lower of these two values
and so it ALWAYS sits on an actual data value which is useful in
some circumstances.
	'),
       CHeader => 'PDL_TYPELIST_REAL(PDL_QSORT)',
       Code => $copy_to_temp . $find_median_lower . '}',
); # pp_def: oddmedover

pp_def('modeover',
       HandleBad=>undef,
       Pars => 'data(n); [o]out(); [t]sorted(n);',
       GenericTypes=>$T,
       Doc=>projectdocs('mode','modeover','
The mode is the single element most frequently found in a
discrete data set.

It I<only> makes sense for integer data types, since
floating-point types are demoted to integer before the
mode is calculated.

C<modeover> treats BAD the same as any other value:  if
BAD is the most common element, the returned value is also BAD.
'),
	 CHeader => 'PDL_TYPELIST_INTEGER(PDL_QSORT)',
	 Code => <<'EOCODE',
	       PDL_Indx i = 0;
	       PDL_Indx most = 0;
	       $GENERIC() curmode = 0;
               $GENERIC() curval = 0;

               /* Copy input to buffer for sorting, and sort it */
               loop(n) %{ $sorted() = $data(); %}
               qsort_$PPSYM()($P(sorted),0,$SIZE(n)-1);

               /* Walk through the sorted data and find the most common elemen */
               loop(n) %{
                   if( n==0 || curval != $sorted() ) {
	             curval = $sorted();
	             i=0;
	           } else {
	             i++;
	             if(i>most){
	                most=i;
                        curmode = curval;
	             }
	          }
               %}
               $out() = curmode;
EOCODE
    );


my $find_pct_interpolate = '
           double np, pp1, pp2;
           np = nn * $p();
           PDL_Indx nn1 = PDLMIN(nn,PDLMAX(0,np));
           PDL_Indx nn2 = PDLMIN(nn,PDLMAX(0,np+1));
	   if (nn == 0) {
	      pp1 = 0;
	      pp2 = 0;
	   } else {
	      pp1 = (double)nn1/(double)(nn);
	      pp2 = (double)nn2/(double)(nn);
	   }
           if ( np <= 0.0 ) {
              $b() = $tmp(n => 0);
           } else if ( np >= nn ) {
              $b() = $tmp(n => nn);
           } else if ($tmp(n => nn2) == $tmp(n => nn1)) {
              $b() = $tmp(n => nn1);
           } else if ($p() == pp1) {
              $b() = $tmp(n => nn1);
           } else if ($p() == pp2) {
              $b() = $tmp(n => nn2);
           } else {
              $b() = (np - nn1)*($tmp(n => nn2) - $tmp(n => nn1)) + $tmp(n => nn1);
           }
';
pp_def('pctover',
        HandleBad => 1,
        Pars => 'a(n); p(); [o]b(); [t]tmp(n);',
        Doc => projectdocs('specified percentile', 'pctover',
'The specified
percentile must be between 0.0 and 1.0.  When the specified percentile
falls between data points, the result is interpolated.  Values outside
the allowed range are clipped to 0.0 or 1.0 respectively.  The algorithm
implemented here is based on the interpolation variant described at
L<http://en.wikipedia.org/wiki/Percentile> as used by Microsoft Excel
and recommended by NIST.
'),
       CHeader => 'PDL_TYPELIST_REAL(PDL_QSORT)',
       Code => $copy_to_temp . $find_pct_interpolate . '}',
);

pp_def('oddpctover',
        HandleBad => 1,
        Pars => 'a(n); p(); [o]b(); [t]tmp(n);',
        Doc => projectdocs('specified percentile', 'oddpctover',
'The specified
percentile must be between 0.0 and 1.0.  When the specified percentile
falls between two values, the nearest data value is the result.
The algorithm implemented is from the textbook version described
first at L<http://en.wikipedia.org/wiki/Percentile>.
'),
       CHeader => 'PDL_TYPELIST_REAL(PDL_QSORT)',
       Code =>
	   $copy_to_temp . '
           PDL_Indx np = PDLMAX(0,PDLMIN(nn,(nn+1)*$p()));
	   $b() = $tmp(n => np);
	   }',
);

for (
['','result is interpolated'],
['odd','nearest data value is the result'],
) {
pp_add_exported('', $_->[0].'pct');
pp_addpm(<<EOD);
=head2 $_->[0]pct

=for ref

Return the specified percentile of all elements in an ndarray. The
specified percentile (p) must be between 0.0 and 1.0.  When the
specified percentile falls between data points, the $_->[1].

=for usage

 \$x = $_->[0]pct(\$data, \$pct);

=cut

*$_->[0]pct = \\&PDL::$_->[0]pct;
sub PDL::$_->[0]pct {
	my(\$x, \$p) = \@_;
	\$x->flat->$_->[0]pctover(\$p, my \$tmp=PDL->nullcreate(\$x));
	\$tmp;
}
EOD
}

sub qsort_returnempty { 'if ($PDL(a)->nvals == 0) return PDL_err;' }

pp_def('qsort',
    HandleBad => 1,
    Inplace => 1,
    Pars => '!complex a(n); !complex [o]b(n);',
    CHeader => 'PDL_TYPELIST_REAL(PDL_QSORT)',
    Code => PDL::PP::pp_line_numbers(__LINE__, '
register PDL_Indx nn = PDL_IF_BAD(0,$SIZE(n));
char is_inplace = ($P(a) == $P(b));
PDL_IF_BAD(register PDL_Indx nb = $SIZE(n) - 1;,)
').qsort_returnempty().'
PDL_IF_BAD(
loop(n) %{
  if ($ISGOOD(a())) {
    if (!is_inplace) $b(n=>nn) = $a();
    nn++;
    continue;
  }
  while (is_inplace && $ISBAD(b(n=>nb)) && nb > n) nb--;
  if (nb > n && is_inplace) { $a() = $b(n=>nb); nn++; }
  if (nb > n || !is_inplace) { $SETBAD(b(n=>nb)); nb--; }
  if (nb < n) break;
%}
,
if (!is_inplace) loop(n) %{ $b() = $a(); %}
)
if ( nn == 0 ) continue; nn -= 1;
qsort_$PPSYM() ($P(b), 0, nn);',
    Doc => 'Quicksort a vector into ascending order.',
    BadDoc =>
'
Bad values are moved to the end of the array:

 pdl> p $y
 [42 47 98 BAD 22 96 74 41 79 76 96 BAD 32 76 25 59 BAD 96 32 BAD]
 pdl> p qsort($y)
 [22 25 32 32 41 42 47 59 74 76 76 79 96 96 96 98 BAD BAD BAD BAD]
',
);

pp_def('qsorti',
       HandleBad => 1,
       Pars => '!complex a(n); indx [o]indx(n);',
       CHeader => $chdr_qsorti,
       Code => PDL::PP::pp_line_numbers(__LINE__, '
register PDL_Indx nn = PDL_IF_BAD(0,$SIZE(n)-1), nb = $SIZE(n) - 1; (void)nb;
').qsort_returnempty().'
loop(n) %{
  PDL_IF_BAD(if ($ISBAD(a())) { $indx(n=>nb) = n; nb--; }
  else                        { $indx(n=>nn) = n; nn++; } /* play safe since nn used more than once */
  ,$indx() = n;)
%}
PDL_IF_BAD(if ( nn == 0 ) continue; nn -= 1;,)
qsort_ind_$PPSYM() ($P(a), $P(indx), 0, nn);',
       BadDoc =>
'Bad elements are moved to the end of the array:

 pdl> p $y
 [42 47 98 BAD 22 96 74 41 79 76 96 BAD 32 76 25 59 BAD 96 32 BAD]
 pdl> p $y->index( qsorti($y) )
 [22 25 32 32 41 42 47 59 74 76 76 79 96 96 96 98 BAD BAD BAD BAD]
',
       Doc => '
=for ref

Quicksort a vector and return index of elements in ascending order.

=for example

 $ix = qsorti $x;
 print $x->index($ix); # Sorted list
'
);

# move all bad values to the end of the array
#
pp_def('qsortvec',
    HandleBad => 1,
    Inplace => 1,
    Pars => '!complex a(n,m); !complex [o]b(n,m);',
    CHeader => $chdr_qsortvec_def . $chdr_qsortvec,
    Code => '
register PDL_Indx nn = PDL_IF_BAD(0,$SIZE(m)-1);
char is_inplace = ($P(a) == $P(b));
'.qsort_returnempty().'
PDL_IF_BAD(register PDL_Indx nb = $SIZE(m) - 1; loop(m) %{
  char allgood_a = 1;
  loop(n) %{ if ( $ISBAD(a()) ) { allgood_a = 0; break; } %}
  PDL_Indx copy_dest = allgood_a ? nn++ : nb--;
  if (is_inplace) {
    if (allgood_a) continue; /* nothing to do */
    char anybad_b = 0;
    do {
      anybad_b = 0;
      loop(n) %{ if ($ISBAD(b(m=>copy_dest))) { anybad_b = 1; break; } %}
      if (anybad_b) copy_dest = nb--;
    } while (anybad_b);
    if (m != copy_dest)
      loop(n) %{
        /* as in-place we know same badval source and dest */
        $GENERIC() tmp = $b(m=>copy_dest);
        $b(m=>copy_dest) = $a();
        $a() = tmp;
      %}
    if (m >= nb-1) { nn = nb+1; break; } /* run out of "good" vectors */
  } else {
    loop(n) %{
      if ($ISBAD(a())) $SETBAD(b(m=>copy_dest));
      else             $b(m=>copy_dest) = $a();
    %}
  }
%}
if ( nn == 0 ) continue; nn -= 1;,
if (!is_inplace) { loop(n,m) %{ $b() = $a(); %} }
)
pdl_qsortvec_$PPSYM() ($P(b), $SIZE(n), 0, nn);',
    Doc => '
=for ref

Sort a list of vectors lexicographically.

The 0th dimension of the source ndarray is dimension in the vector;
the 1st dimension is list order.  Higher dimensions are broadcasted over.

=for example

 print qsortvec pdl([[1,2],[0,500],[2,3],[4,2],[3,4],[3,5]]);
 [
  [  0 500]
  [  1   2]
  [  2   3]
  [  3   4]
  [  3   5]
  [  4   2]
 ]

=cut
',
    BadDoc => '
Vectors with bad components are moved to the end of the array:

  pdl> p $p = pdl("[0 0] [-100 0] [BAD 0] [100 0]")->qsortvec

  [
   [-100    0]
   [   0    0]
   [ 100    0]
   [ BAD    0]
  ]
',
);

pp_def('qsortveci',
    HandleBad => 1,
    Pars => '!complex a(n,m); indx [o]indx(m);',
    CHeader => $chdr_qsortvec_def . $chdr_qsortvec_ind,
    Code =>
'register PDL_Indx nn = PDL_IF_BAD(0,$SIZE(m)-1);
PDL_IF_BAD(register PDL_Indx nb = $SIZE(m) - 1;,)
'.qsort_returnempty().'
loop(m) %{
  PDL_IF_BAD(
  char allgood_a = 1;
  loop(n) %{ if ( $ISBAD(a()) ) { allgood_a = 0; break; } %}
  PDL_Indx copy_dest = allgood_a ? nn++ : nb--;
  $indx(m=>copy_dest) = m;
  ,
  $indx()=m;
  )
%}
PDL_IF_BAD(if ( nn == 0 ) continue; nn -= 1;,)
pdl_qsortvec_ind_$PPSYM() ($P(a), $P(indx), $SIZE(n), 0, nn);',
    Doc => '
=for ref

Sort a list of vectors lexicographically, returning the indices of the
sorted vectors rather than the sorted list itself.

As with C<qsortvec>, the input PDL should be an NxM array containing M
separate N-dimensional vectors.  The return value is an integer M-PDL
containing the M-indices of original array rows, in sorted order.

As with C<qsortvec>, the zeroth element of the vectors runs slowest in the
sorted list.

Additional dimensions are broadcasted over: each plane is sorted separately,
so qsortveci may be thought of as a collapse operator of sorts (groan).

=cut
',
    BadDoc => '
Vectors with bad components are moved to the end of the array as
for L</qsortvec>.
',
);

pp_def('magnover',
  HandleBad => 1,
  Pars => "a(n); float+ [o]b();",
  GenericTypes => $A,
  Code => <<'EOF',
long double sum=0;
PDL_IF_BAD(int flag = 0;,)
loop(n) %{
  PDL_IF_BAD(if ($ISBAD(a())) continue; flag = 1;,)
  sum += PDL_IF_GENTYPE_REAL(
    $a()*$a(),
    creall($a())*creall($a()) + cimagl($a())*cimagl($a())
  );
%}
PDL_IF_BAD(if ( !flag ) { $SETBAD(b()); continue; },)
$b() = sum == 0 ? 0 : sqrtl(sum);
EOF
  Doc => projectdocs( 'Euclidean (aka Pythagorean) distance', 'magnover', <<'EOF' ),
Minimum C<float> precision output.
See also L<PDL::Primitive/inner>, L<PDL::Primitive/norm>.
EOF
);

pp_addpm({At=>'Bot'},<<'EOD');

=head1 AUTHOR

Copyright (C) Tuomas J. Lukka 1997 (lukka@husc.harvard.edu).
Contributions by Christian Soeller (c.soeller@auckland.ac.nz)
and Karl Glazebrook (kgb@aaoepp.aao.gov.au).  All rights
reserved. There is no warranty. You are allowed to redistribute this
software / documentation under certain conditions. For details, see
the file COPYING in the PDL distribution. If this file is separated
from the PDL distribution, the copyright notice should be included in
the file.

=cut
EOD

pp_done();
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	Go to GitHub issues (only if GitHub is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)
Keyboard Shortcuts