use strict;
use warnings;
my $F = [qw(F D)];
pp_addpm({At=>'Top'}, <<'EOD');
=encoding utf8
=head1 NAME
PDL::Stats::TS -- basic time series functions
=head1 DESCRIPTION
The terms FUNCTIONS and METHODS are arbitrarily used to refer to
methods that are threadable and methods that are NOT threadable,
respectively. Plots require L<PDL::Graphics::Simple>.
***EXPERIMENTAL!*** In particular, bad value support is spotty and may be shaky. USE WITH DISCRETION!
=head1 SYNOPSIS
use PDL::LiteF;
use PDL::Stats::TS;
my $r = $data->acf(5);
=cut
use strict;
use warnings;
use Carp;
use PDL::LiteF;
use PDL::Stats::Basic;
use PDL::Stats::Kmeans;
EOD
pp_addhdr('
#include <math.h>
#define Z10 1.64485362695147
#define Z05 1.95996398454005
#define Z01 2.5758293035489
#define Z001 3.29052673149193
'
);
pp_def('acf',
Pars => 'x(t); [o]r(h)',
OtherPars => 'IV lag=>h',
GenericTypes => $F,
Code => '
$GENERIC(x) s, s2, m, cov0, covh;
s=0; s2=0; m=0; cov0=0; covh=0;
PDL_Indx T, i;
T = $SIZE(t);
loop(t) %{
s += $x();
s2 += $x()*$x();
%}
m = s/T;
cov0 = s2 - T * m * m;
loop (h) %{
if (h) {
covh = 0;
for (i=0; i<T-h; i++) {
covh += ($x(t=>i) - m) * ($x(t=>i+h) - m);
}
$r() = covh / cov0;
}
else {
$r() = 1;
}
%}
',
PMCode => pp_line_numbers(__LINE__, <<'EOF'),
sub PDL::acf {
my ($self, $h) = @_;
$h ||= $self->dim(0) - 1;
PDL::_acf_int($self, my $r = PDL->null, $h+1);
$r;
}
EOF
Doc => <<'EOD',
=for ref
Autocorrelation function for up to lag h. If h is not specified it's set to t-1 by default.
acf does not process bad values.
=for example
usage:
pdl> $a = sequence 10
# lags 0 .. 5
pdl> p $a->acf(5)
[1 0.7 0.41212121 0.14848485 -0.078787879 -0.25757576]
EOD
);
pp_def('acvf',
Pars => 'x(t); [o]v(h)',
OtherPars => 'IV lag=>h;',
GenericTypes => $F,
Code => '
$GENERIC(x) s, s2, m, covh;
s=0; s2=0; m=0; covh=0;
long T, i;
T = $SIZE(t);
loop(t) %{
s += $x();
s2 += $x()*$x();
%}
m = s/T;
loop (h) %{
if (h) {
covh = 0;
for (i=0; i<T-h; i++) {
covh += ($x(t=>i) - m) * ($x(t=>i+h) - m);
}
$v() = covh;
}
else {
$v() = s2 - T * m * m;
}
%}
',
PMCode => pp_line_numbers(__LINE__, <<'EOF'),
sub PDL::acvf {
my ($self, $h) = @_;
$h ||= $self->dim(0) - 1;
PDL::_acvf_int($self, my $v = PDL->null, $h+1);
$v;
}
EOF
Doc => <<'EOD',
=for ref
Autocovariance function for up to lag h. If h is not specified it's set to t-1 by default.
acvf does not process bad values.
=for example
usage:
pdl> $a = sequence 10
# lags 0 .. 5
pdl> p $a->acvf(5)
[82.5 57.75 34 12.25 -6.5 -21.25]
# autocorrelation
pdl> p $a->acvf(5) / $a->acvf(0)
[1 0.7 0.41212121 0.14848485 -0.078787879 -0.25757576]
EOD
);
pp_def('dseason',
Pars => 'x(t); indx d(); [o]xd(t)',
GenericTypes => $F,
HandleBad => 1,
Code => '
PDL_Indx i, max = PDL_IF_BAD(,$SIZE(t))-1, min = PDL_IF_BAD(-1,0);
PDL_Indx q = ($d() % 2)? ($d() - 1) / 2 : $d() / 2;
/*find good min and max ind*/
loop (t) %{
PDL_IF_BAD(if ($ISBAD($x())) continue;,)
if (min < 0) min = t;
max = t;
%}
if ($d() % 2) {
loop(t) %{
PDL_IF_BAD(if (t < min || t > max) { $SETBAD(xd()); continue; },)
$GENERIC(x) sum = 0; PDL_IF_BAD(PDL_Indx dd = 0;,)
for (i=-q; i<=q; i++) {
PDL_Indx ti = (t+i < min)? min
: (t+i > max)? max
: t+i
;
PDL_IF_BAD(if ($ISBAD($x(t=>ti))) continue; dd++;,)
sum += $x(t=>ti);
}
PDL_IF_BAD(if (!dd) { $SETBAD(xd()); continue; },)
$xd() = sum / PDL_IF_BAD(dd,$d());
%}
} else {
loop(t) %{
PDL_IF_BAD(if (t < min || t > max) { $SETBAD(xd()); continue; },)
$GENERIC(x) sum = 0; PDL_IF_BAD(PDL_Indx dd = 0;,)
for (i=-q; i<=q; i++) {
PDL_Indx ti = (t+i < min)? min
: (t+i > max)? max
: t+i
;
PDL_IF_BAD(if ($ISBAD($x(t=>ti))) continue; dd++;,)
sum += (i == q || i == -q)? .5 * $x(t=>ti) : $x(t=>ti);
}
PDL_IF_BAD(if (!dd) { $SETBAD(xd()); continue; }
dd--;
if ( ($ISBAD(x(t=>t-q)) && $ISGOOD(x(t=>t+q)) )
|| ($ISBAD(x(t=>t+q)) && $ISGOOD(x(t=>t-q)) ) )
dd += .5;
,)
$xd() = sum / PDL_IF_BAD(dd,$d());
%}
}
',
Doc => 'Deseasonalize data using moving average filter the size of period d.',
);
pp_def('fill_ma',
Pars => 'x(t); indx q(); [o]xf(t)',
GenericTypes => $F,
HandleBad => 1,
Code => '
$GENERIC(x) sum, xx;
PDL_Indx i, n, max = $SIZE(t) - 1;
loop(t) %{
PDL_IF_BAD(if ($ISBAD(x())) {
n=0; sum=0;
for (i=-$q(); i<=$q(); i++) {
xx = (t+i < 0)? $x(t=>0)
: (t+i > max)? $x(t=>max)
: $x(t=>t+i)
;
if ($ISGOODVAR(xx,x)) {
sum += xx;
n ++;
}
}
if (n) {
$xf() = sum / n;
}
else {
$SETBAD(xf());
}
continue;
},)
$xf() = $x();
%}
',
PMCode => pp_line_numbers(__LINE__, <<'EOF'),
sub PDL::fill_ma {
my ($x, $q) = @_;
PDL::_fill_ma_int($x, $q, my $x_filled = PDL->null);
$x_filled->check_badflag;
# carp "ma window too small, still has bad value"
# if $x_filled->badflag;
return $x_filled;
}
EOF
Doc => <<'EOD',
=for ref
Fill missing value with moving average. xf(t) = sum(x(t-q .. t-1, t+1 .. t+q)) / 2q.
=for bad
fill_ma does handle bad values. Output pdl bad flag is cleared unless the specified window size q is too small and there are still bad values.
EOD
);
pp_def('filter_exp',
Pars => 'x(t); a(); [o]xf(t)',
GenericTypes => $F,
Code => '
$GENERIC(x) b, m;
b = 1 - $a();
loop(t) %{
if (t) {
m = $a() * $x() + b * m;
}
else {
m = $x();
}
$xf() = m;
%}
',
Doc => 'Filter, exponential smoothing. xf(t) = a * x(t) + (1-a) * xf(t-1)',
);
pp_def('filter_ma',
Pars => 'x(t); indx q(); [o]xf(t)',
GenericTypes => $F,
Code => '
$GENERIC(x) sum;
PDL_Indx i, n, max;
n = 2 * $q() + 1;
max = $SIZE(t) - 1;
loop(t) %{
sum = 0;
for (i=-$q(); i<=$q(); i++) {
sum += (t+i < 0)? $x(t=>0)
: (t+i > max)? $x(t=>max)
: $x(t=>t+i)
;
}
$xf() = sum / n;
%}
',
Doc => 'Filter, moving average. xf(t) = sum(x(t-q .. t+q)) / (2q + 1)',
);
pp_def('mae',
Pars => 'a(n); b(n); [o]c()',
GenericTypes => $F,
HandleBad => 1,
Code => '
$GENERIC(c) sum;
sum = 0;
PDL_Indx N = PDL_IF_BAD(0,$SIZE(n));
loop(n) %{
PDL_IF_BAD(if ($ISBAD($a()) || $ISBAD(b())) continue; N++;,)
sum += fabs( $a() - $b() );
%}
if (N < 1) { $SETBAD(c()); continue; }
$c() = sum / N;
',
Doc => 'Mean absolute error. MAE = 1/n * sum( abs(y - y_pred) )',
);
pp_def('mape',
Pars => 'a(n); b(n); [o]c()',
GenericTypes => $F,
HandleBad => 1,
Code => '
$GENERIC(c) sum;
sum = 0;
PDL_Indx N = PDL_IF_BAD(0,$SIZE(n));
loop(n) %{
PDL_IF_BAD(if ($ISBAD($a()) || $ISBAD(b())) continue; N++;,)
sum += fabs( ($a() - $b()) / $a() );
%}
if (N < 1) { $SETBAD(c()); continue; }
$c() = sum / N;
',
Doc => 'Mean absolute percent error. MAPE = 1/n * sum(abs((y - y_pred) / y))',
);
pp_def('wmape',
Pars => 'a(n); b(n); [o]c()',
GenericTypes => $F,
HandleBad => 1,
Code => '
$GENERIC(c) sum_e=0, sum=0;
loop(n) %{
PDL_IF_BAD(if ($ISBAD($a()) || $ISBAD(b())) continue;,)
sum_e += fabs( $a() - $b() );
sum += fabs( $a() );
%}
if (!sum) { $SETBAD(c()); continue; }
$c() = sum_e / sum;
',
Doc => 'Weighted mean absolute percent error. avg(abs(error)) / avg(abs(data)). Much more robust compared to mape with division by zero error (cf. Schütz, W., & Kolassa, 2006).',
);
pp_def('portmanteau',
Pars => 'r(h); longlong t(); [o]Q()',
GenericTypes => $F,
Code => '
$GENERIC(r) sum;
sum = 0;
loop(h) %{
if (h)
sum += $r()*$r() / ($t() - h);
%}
$Q() = $t() * ($t()+2) * sum;
',
Doc => '
=for ref
Portmanteau significance test (Ljung-Box) for autocorrelations.
=for example
Usage:
pdl> $a = sequence 10
# acf for lags 0-5
# lag 0 excluded from portmanteau
pdl> p $chisq = $a->acf(5)->portmanteau( $a->nelem )
11.1753902662994
# get p-value from chisq distr
pdl> use PDL::GSL::CDF
pdl> p 1 - gsl_cdf_chisq_P( $chisq, 5 )
0.0480112934306748
',
);
pp_def('pred_ar',
Pars => 'x(p); b(p); [o]pred(t)',
OtherPars => 'IV end=>t;',
GenericTypes => $F,
Code => '
PDL_Indx ord = $SIZE(p);
$GENERIC(x) xt, xp[ord];
loop (t) %{
if (t < ord) {
xp[t] = $x(p=>t);
$pred() = xp[t];
}
else {
xt = 0;
loop(p) %{
xt += xp[p] * $b(p=>ord-p-1);
xp[p] = (p < ord - 1)? xp[p+1] : xt;
%}
$pred() = xt;
}
%}
',
PMCode => pp_line_numbers(__LINE__, <<'EOF'),
sub PDL::pred_ar {
my ($x, $b, $t, $opt) = @_;
my %opt = ( CONST => 1 );
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
$b = PDL->topdl($b); # allows passing simple number
my $ext;
if ($opt{CONST}) {
my $t_ = $t - ( $x->dim(0) - $b->dim(0) + 1 );
PDL::_pred_ar_int($x->slice([-$b->dim(0)+1,-1]), $b->slice('0:-2'), $ext = PDL->null, $t_);
$ext->slice([$b->dim(0)-1,-1]) += $b->slice(-1);
return $x->append( $ext->slice([$b->dim(0)-1,-1]) );
} else {
my $t_ = $t - ( $x->dim(0) - $b->dim(0) );
PDL::_pred_ar_int($x->slice([-$b->dim(0),-1]), $b, $ext = PDL->null, $t_);
return $x->append($ext->slice([$b->dim(0),-1]));
}
}
EOF
Doc => <<'EOD',
=for ref
Calculates predicted values up to period t (extend current series up to period t) for autoregressive series, with or without constant. If there is constant, it is the last element in b, as would be returned by ols or ols_t.
pred_ar does not process bad values.
=for options
CONST => 1,
=for example
Usage:
pdl> $x = sequence 2
# last element is constant
pdl> $b = pdl(.8, -.2, .3)
pdl> p $x->pred_ar($b, 7)
[0 1 1.1 0.74 0.492 0.3656 0.31408]
# no constant
pdl> p $x->pred_ar($b(0:1), 7, {const=>0})
[0 1 0.8 0.44 0.192 0.0656 0.01408]
EOD
);
pp_addpm pp_line_numbers(__LINE__, <<'EOD');
=head2 season_m
Given length of season, returns seasonal mean and variance for each period
(returns seasonal mean only in scalar context).
=for options
Default options (case insensitive):
START_POSITION => 0, # series starts at this position in season
MISSING => -999, # internal mark for missing points in season
PLOT => 0, # boolean
# see PDL::Graphics::Simple for next options
WIN => undef, # pass pgswin object for more plotting control
COLOR => 1,
=for usage
my ($m, $ms) = $data->season_m( 24, { START_POSITION=>2 } );
=cut
*season_m = \&PDL::season_m;
sub PDL::season_m {
my ($self, $d, $opt) = @_;
my %opt = (
START_POSITION => 0, # series starts at this position in season
MISSING => -999, # internal mark for missing points in season
PLOT => 0,
WIN => undef, # pass pgswin object for more plotting control
COLOR => 1,
);
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
my $n_season = ($self->dim(0) + $opt{START_POSITION}) / $d;
$n_season = pdl($n_season)->ceil->sum->sclr;
my @dims = $self->dims;
$dims[0] = $n_season * $d;
my $data = zeroes( @dims ) + $opt{MISSING};
$data->slice([$opt{START_POSITION},$opt{START_POSITION} + $self->dim(0)-1]) .= $self;
$data->badflag(1);
$data->inplace->setvaltobad( $opt{MISSING} );
my $s = sequence $d;
$s = $s->dummy(1, $n_season)->flat;
$s = $s->iv_cluster();
my ($m, $ms) = $data->centroid( $s );
if ($opt{PLOT}) {
require PDL::Graphics::Simple;
my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin();
my $seq = sequence($d);
my $errb_length = sqrt( $ms / $s->sumover )->squeeze;
my $col = $opt{COLOR};
my @plots = map +(with=>'lines', ke=>"Data $col", style=>$col++, $seq, $_), $m->dog;
push @plots, with=>'errorbars', ke=>'Error', style=>$opt{COLOR}, $seq, $m->squeeze, $errb_length
if $m->squeeze->ndims < 2 && ($errb_length > 0)->any;
$w->plot(@plots, { xlabel=>'period', ylabel=>'mean' });
}
return wantarray? ($m, $ms) : $m;
}
=head2 plot_dseason
=for ref
Plots deseasonalized data and original data points. Opens and closes
default window for plotting unless a C<WIN> object is passed in
options. Returns deseasonalized data.
=for options
Default options (case insensitive):
WIN => undef,
COLOR => 1, # data point color
=cut
*plot_dseason = \&PDL::plot_dseason;
sub PDL::plot_dseason {
require PDL::Graphics::Simple;
my ($self, $d, $opt) = @_;
!defined($d) and croak "please set season period length";
$self = $self->squeeze;
my %opt = (
WIN => undef,
COLOR => 1, # data point color
);
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
my $dsea = $self->dseason($d);
my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin();
my $seq = sequence($self->dim(0));
my $col = $opt{COLOR};
my @plots = map +(with=>'lines', ke=>"Data $col", style=>$col++, $seq, $_), $dsea->dog;
$col = $opt{COLOR};
push @plots, map +(with=>'points', ke=>"De-seasonalised $col", style=>$col++, $seq, $_), $self->dog;
$w->plot(@plots, { xlabel=>'T', ylabel=>'DV' });
return $dsea;
}
=head1 METHODS
=head2 plot_acf
=for ref
Plots and returns autocorrelations for a time series.
=for options
Default options (case insensitive):
SIG => 0.05, # can specify .10, .05, .01, or .001
WIN => undef,
=for usage
Usage:
pdl> $a = sequence 10
pdl> p $r = $a->plot_acf(5)
[1 0.7 0.41212121 0.14848485 -0.078787879 -0.25757576]
=cut
*plot_acf = \&PDL::plot_acf;
sub PDL::plot_acf {
require PDL::Graphics::Simple;
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
my ($self, $h) = @_;
my $r = $self->acf($h);
my %opt = (
SIG => 0.05,
WIN => undef,
);
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
my $y_sig = ($opt{SIG} == 0.10)? 1.64485362695147
: ($opt{SIG} == 0.05)? 1.95996398454005
: ($opt{SIG} == 0.01)? 2.5758293035489
: ($opt{SIG} == 0.001)? 3.29052673149193
: 0
;
unless ($y_sig) {
carp "SIG outside of recognized value. default to 0.05";
$y_sig = 1.95996398454005;
}
my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin();
my $seq = pdl(-1,$h+1);
my $y_seq = ones(2) * $y_sig / sqrt($self->dim(0)) * -1;
$w->plot(
with=>'lines', $seq, zeroes(2), # x axis
with=>'lines', style=>2, $seq, $y_seq,
with=>'lines', style=>2, $seq, -$y_seq,
(map +(with=>'lines', ones(2)*$_, pdl(0, $r->slice("($_)"))), 0..$h), { xlabel=>'lag', ylabel=>'acf', }
);
$r;
}
=head1 REFERENCES
Brockwell, P.J., & Davis, R.A. (2002). Introduction to Time Series and Forecasting (2nd ed.). New York, NY: Springer.
Schütz, W., & Kolassa, S. (2006). Foresight: advantages of the MAD/Mean ratio over the MAPE. Retrieved Jan 28, 2010, from http://www.saf-ag.com/226+M5965d28cd19.html
=head1 AUTHOR
Copyright (C) 2009 Maggie J. Xiong <maggiexyz users.sourceforge.net>
All rights reserved. There is no warranty. You are allowed to redistribute this software / documentation as described in the file COPYING in the PDL distribution.
=cut
EOD
pp_done();