NAME
PDL::Stats::Distr -- parameter estimations and probability density functions for distributions.
DESCRIPTION
Parameter estimate is maximum likelihood estimate when there is closed form estimate, otherwise it is method of moments estimate.
SYNOPSIS
use PDL::LiteF;
use PDL::Stats::Distr;
# do a frequency (probability) plot with fitted normal curve
my $data = grandom(100)->abs;
my ($xvals, $hist) = $data->hist;
# turn frequency into probability
$hist /= $data->nelem;
# get maximum likelihood estimates of normal curve parameters
my ($m, $v) = $data->mle_gaussian();
# fitted normal curve probabilities
my $p = $xvals->pdf_gaussian($m, $v);
use PDL::Graphics::PGPLOT::Window;
my $win = pgwin( Dev=>"/xs" );
$win->bin( $hist );
$win->hold;
$win->line( $p, {COLOR=>2} );
$win->close;
Or, play with different distributions with plot_distr :)
$data->plot_distr( 'gaussian', 'lognormal' );
my ($a, $b) = $data->mme_beta();
beta distribution. pdf: f(x; a,b) = 1/B(a,b) x^(a-1) (1-x)^(b-1)
probability density function for beta distribution. x defined on [0,1].
my ($n, $p) = $data->mme_binomial;
binomial distribution. pmf: f(k; n,p) = (n k) p^k (1-p)^(n-k) for k = 0,1,2..n ', );
pp_def('pmf_binomial', Pars => 'ushort x(); ushort n(); p(); float+ [o]out()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($n()) || $ISBAD($p())) { $SETBAD( $out() ); continue; },) $GENERIC(out) bc = gsl_sf_choose($n(), $x()); $out() = bc * pow($p(), $x()) * pow(1-$p(), $n() - $x()); ', Doc => ' =for ref
probability mass function for binomial distribution. ',
);
pp_def('mle_exp', Pars => 'a(n); float+ [o]l()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(l) sa = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); PDL_IF_BAD(N++;,) %} PDL_IF_BAD(if (sa <= 0) { $SETBAD(l()); continue; },) $l() = N / sa; ', Doc => ' =for usage
my $lamda = $data->mle_exp;
exponential distribution. mle same as method of moments estimate. ', );
pp_def('pdf_exp', Pars => 'x(); l(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ( $ISBAD($x()) || $ISBAD($l()) ) { $SETBAD( $p() ); continue; },) $p() = $l() * exp( -1 * $l() * $x() ); ', Doc => ' =for ref
probability density function for exponential distribution. ', );
pp_def('mme_gamma', Pars => 'a(n); float+ [o]shape(); float+ [o]scale()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(shape) sa = 0, a2 = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); a2 += pow($a(), 2); PDL_IF_BAD(N++;,) %} PDL_IF_BAD(if (N < 1) { $SETBAD(shape()); $SETBAD(scale()); continue; },) $GENERIC(shape) m = sa / N; $GENERIC(shape) v = a2 / N - pow(m, 2); $shape() = pow(m, 2) / v; $scale() = v / m; ', Doc => ' =for usage
my ($shape, $scale) = $data->mme_gamma();
two-parameter gamma distribution ', );
pp_def('pdf_gamma', Pars => 'x(); a(); t(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($a()) || $ISBAD($t())) { $SETBAD( $p() ); continue; },) double g = gsl_sf_gamma( $a() ); $p() = pow($x(), $a()-1) * exp(-1*$x() / $t()) / (pow($t(), $a()) * g); ', Doc => ' =for ref
probability density function for two-parameter gamma distribution. ', );
pp_def('mle_gaussian', Pars => 'a(n); float+ [o]m(); float+ [o]v()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(m) sa = 0, a2 = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); a2 += pow($a(), 2); PDL_IF_BAD(N++;,) %} if (N < 1) { $SETBAD(m()); $SETBAD(v()); continue; } $m() = sa / N; $v() = a2 / N - pow($m(),2); ', Doc => ' =for usage
my ($m, $v) = $data->mle_gaussian();
gaussian aka normal distribution. same results as $data->average and $data->var. mle same as method of moments estimate. ', );
pp_def('pdf_gaussian', Pars => 'x(); m(); v(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($m()) || $ISBAD($v())) { $SETBAD( $p() ); continue; },) $p() = 1 / sqrt($v() * 2 * M_PI) * exp( -1 * pow($x() - $m(), 2) / (2*$v()) ); ', Doc => ' =for ref
probability density function for gaussian distribution. ', );
pp_def('mle_geo', Pars => 'a(n); float+ [o]p();', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(p) sa = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); PDL_IF_BAD(N++;,) %} if (N < 1) { $SETBAD(p()); continue; } $p() = 1 / (1 + sa/N); ', Doc => ' =for ref
geometric distribution. mle same as method of moments estimate. ', );
pp_def('pmf_geo', Pars => 'ushort x(); p(); float+ [o]out()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($p())) { $SETBAD( $out() ); continue; },) $out() = pow(1-$p(), $x()) * $p(); ', Doc => ' =for ref
probability mass function for geometric distribution. x >= 0. ', );
pp_def('mle_geosh', Pars => 'a(n); float+ [o]p();', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(p) sa = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); PDL_IF_BAD(N++;,) %} if (sa <= 0) { $SETBAD(p()); continue; } $p() = N / sa; ', Doc => ' =for ref
shifted geometric distribution. mle same as method of moments estimate. ', );
pp_def('pmf_geosh', Pars => 'ushort x(); p(); float+ [o]out()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($p())) { $SETBAD( $out() ); continue; },) if ( $x() < 1 ) { $CROAK( "x >= 1 please" ); } $out() = pow(1-$p(), $x()-1) * $p(); ', Doc => ' =for ref
probability mass function for shifted geometric distribution. x >= 1. ', );
pp_def('mle_lognormal', Pars => 'a(n); float+ [o]m(); float+ [o]v()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(m) sa = 0, a2 = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += log($a()); PDL_IF_BAD(N++;,) %} if (N < 1) { $SETBAD(m()); $SETBAD(v()); continue; } $m() = sa / N; loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) a2 += pow(log($a()) - $m(), 2); %} $v() = a2 / N; ', Doc => ' =for usage
my ($m, $v) = $data->mle_lognormal();
lognormal distribution. maximum likelihood estimation. ', );
pp_def('mme_lognormal', Pars => 'a(n); float+ [o]m(); float+ [o]v()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(m) sa = 0, a2 = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); a2 += pow($a(), 2); PDL_IF_BAD(N++;,) %} if (N < 1) { $SETBAD(m()); $SETBAD(v()); continue; } $m() = 2 * log(sa / N) - 1/2 * log( a2 / N ); $v() = log( a2 / N ) - 2 * log( sa / N ); ', Doc => ' =for usage
my ($m, $v) = $data->mme_lognormal();
lognormal distribution. method of moments estimation. ', );
pp_def('pdf_lognormal', Pars => 'x(); m(); v(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ( $ISBAD($x()) || $ISBAD($m()) || $ISBAD($v()) ) { $SETBAD( $p() ); continue; },) if (!($x() > 0 && $v() > 0)) $CROAK( "x and v > 0 please" ); $p() = 1 / ($x() * sqrt($v() * 2 * M_PI)) * exp( -1 * pow(log($x()) - $m(), 2) / (2*$v()) ); ', Doc => ' =for ref
probability density function for lognormal distribution. x > 0. v > 0. ', );
pp_def('mme_nbd', Pars => 'a(n); float+ [o]r(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(p) sa = 0, a2 = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); a2 += pow($a(), 2); PDL_IF_BAD(N++;,) %} PDL_IF_BAD(if (N < 1) { $SETBAD(r()); $SETBAD(p()); continue; },) $GENERIC(p) m = sa / N; $GENERIC(p) v = a2 / N - pow(m, 2); $r() = pow(m, 2) / (v - m); $p() = m / v; ', Doc => ' =for usage
my ($r, $p) = $data->mme_nbd();
negative binomial distribution. pmf: f(x; r,p) = (x+r-1 r-1) p^r (1-p)^x for x=0,1,2... ', );
pp_def('pmf_nbd', Pars => 'ushort x(); r(); p(); float+ [o]out()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ( $ISBAD($x()) || $ISBAD($r()) || $ISBAD($p()) ) { $SETBAD( $out() ); continue; },) $GENERIC(out) nbc = gsl_sf_gamma($x()+$r()) / (gsl_sf_fact($x()) * gsl_sf_gamma($r())); $out() = nbc * pow($p(),$r()) * pow(1-$p(), $x()); ', Doc => ' =for ref
probability mass function for negative binomial distribution. ', );
pp_def('mme_pareto', Pars => 'a(n); float+ [o]k(); float+ [o]xm()', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(xm) sa = 0, min = $a(n=>0); PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); if (min > $a()) min = $a(); PDL_IF_BAD(N++;,) %} if (min <= 0) $CROAK("min <= 0!"); $k() = (sa - min) / ( N*( sa/N - min ) ); $xm() = (N * $k() - 1) * min / ( N * $k() ); ', Doc => ' =for usage
my ($k, $xm) = $data->mme_pareto();
pareto distribution. pdf: f(x; k,xm) = k xm^k / x^(k+1) for x >= xm > 0. ', );
pp_def('pdf_pareto', Pars => 'x(); k(); xm(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => ' PDL_IF_BAD(if ( $ISBAD($x()) || $ISBAD($k()) || $ISBAD($xm()) ) { $SETBAD( $p() ); continue; },) if (!($xm() > 0 && $x() >= $xm() )) $CROAK("x >= xm > 0 please"); $p() = $k() * pow($xm(),$k()) / pow($x(), $k()+1); ', Doc => ' =for ref
probability density function for pareto distribution. x >= xm > 0. ', );
pp_def('mle_poisson', Pars => 'a(n); float+ [o]l();', GenericTypes => $F, HandleBad => 1, Code => ' $GENERIC(l) sa = 0; PDL_Indx N = PDL_IF_BAD(0,$SIZE(n)); loop (n) %{ PDL_IF_BAD(if ($ISBAD($a())) continue;,) sa += $a(); PDL_IF_BAD(N++;,) %} if (N < 1) { $SETBAD(l()); continue; } $l() = sa / N; ', Doc => ' =for usage
my $lamda = $data->mle_poisson();
poisson distribution. pmf: f(x;l) = e^(-l) * l^x / x! ', );
pp_def('pmf_poisson', Pars => 'x(); l(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => q{ PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($l())) { $SETBAD($p()); continue; },) if ($x() < 0) { $p() = 0; } else if ($x() < GSL_SF_FACT_NMAX / 2) { /* Exact formula */ $p() = exp( -1 * $l()) * pow($l(),$x()) / gsl_sf_fact( (unsigned int) $x() ); } else { /* Use Stirling's approximation. See * http://en.wikipedia.org/wiki/Stirling%27s_approximation */ double log_p = $x() - $l() + $x() * log($l() / $x()) - 0.5 * log(2*M_PI * $x()) - 1. / 12. / $x() + 1 / 360. / $x()/$x()/$x() - 1. / 1260. / $x()/$x()/$x()/$x()/$x(); $p() = exp(log_p); } }, Doc => q{ =for ref
Probability mass function for poisson distribution. Uses Stirling's formula for x > 85. }, );
pp_def('pmf_poisson_stirling', Pars => 'x(); l(); [o]p()', GenericTypes => $F, HandleBad => 1, Code => q{ PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($l())) { $SETBAD($p()); continue; },) if ($x() < 0) { $p() = 0; } else if ($x() == 0) { $p() = exp(-$l()); } else { /* Use Stirling's approximation. See * http://en.wikipedia.org/wiki/Stirling%27s_approximation */ double log_p = $x() - $l() + $x() * log($l() / $x()) - 0.5 * log(2*M_PI * $x()) - 1. / 12. / $x() + 1 / 360. / $x()/$x()/$x() - 1. / 1260. / $x()/$x()/$x()/$x()/$x(); $p() = exp(log_p); } }, Doc => q{ =for ref
Probability mass function for poisson distribution. Uses Stirling's formula for all values of the input. See http://en.wikipedia.org/wiki/Stirling's_approximation for more info. }, );
pp_def('pmf_poisson_factorial', Pars => 'ushort x(); l(); float+ [o]p()', GenericTypes => $F, HandleBad => 1, Code => q{ PDL_IF_BAD(if ($ISBAD($x()) || $ISBAD($l())) { $SETBAD($p()); continue; },) if ($x() < GSL_SF_FACT_NMAX) { $p() = exp( -1 * $l()) * pow($l(),$x()) / gsl_sf_fact( $x() ); } else { /* bail out */ $p() = 0; } }, PMCode => pp_line_numbers(__LINE__, <<'EOF'), sub PDL::pmf_poisson_factorial { my ($x, $l) = @_; my $pdlx = PDL->topdl($x); croak "Does not support input greater than 170. Please use pmf_poisson or pmf_poisson_stirling instead." if any($pdlx >= 170); PDL::_pmf_poisson_factorial_int($pdlx, $l, my $p = PDL->null); $p; } EOF Doc => <<'EOD', =for ref
Probability mass function for poisson distribution. Input is limited to x < 170 to avoid gsl_sf_fact() overflow. EOD );
pp_addpm {At=>'Bot'}, pp_line_numbers(__LINE__, <<'EOD');
plot_distr
Plots data distribution. When given specific distribution(s) to fit, returns % ref to sum log likelihood and parameter values under fitted distribution(s). See FUNCTIONS above for available distributions.
Default options (case insensitive):
MAXBN => 20,
# see PDL::Graphics::PGPLOT::Window for next options
WIN => undef, # pgwin object. not closed here if passed
# allows comparing multiple distr in same plot
# set env before passing WIN
DEV => '/xs' , # open and close dev for plotting if no WIN
# defaults to '/png' in Windows
COLOR => 1, # color for data distr
Usage:
# yes it threads :)
my $data = grandom( 500, 3 )->abs;
# ll on plot is sum across 3 data curves
my ($ll, $pars)
= $data->plot_distr( 'gaussian', 'lognormal', {DEV=>'/png'} );
# pars are from normalized data (ie data / bin_size)
print "$_\t@{$pars->{$_}}\n" for (sort keys %$pars);
print "$_\t$ll->{$_}\n" for (sort keys %$ll);
DEPENDENCIES
GSL - GNU Scientific Library
SEE ALSO
PDL::Graphics::PGPLOT
PDL::GSL::CDF
AUTHOR
Copyright (C) 2009 Maggie J. Xiong <maggiexyz users.sourceforge.net>, David Mertens
All rights reserved. There is no warranty. You are allowed to redistribute this software / documentation as described in the file COPYING in the PDL distribution.