PointEstimation.pm - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
—
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
              package Statistics::PointEstimation;
use strict;
use Carp;
use vars qw($VERSION @ISA $AUTOLOAD);
use Statistics::Distributions qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
use Statistics::Descriptive;
use POSIX;
@ISA= qw (Statistics::Descriptive::Full);
$VERSION = '1.1';
my %confidence_interval=  #data related to confidence interval 
(
        "significance" => undef,
        "alpha" => undef,
        "df" =>undef,
        "standard_error" => undef,
        "t_value" =>undef, 
        "t_statistic" =>undef,
        "t_prob" =>undef,
        "delta" =>undef,
        "upper_clm" => undef,
        "lower_clm" =>undef,
        "valid"  =>undef
);
         
sub new{
        my $proto = shift;
        my $class = ref($proto) || $proto;
        my $self = $class->SUPER::new();  
        my %confidence=%confidence_interval;
        $self->{confidence}=\%confidence;
        bless ($self, $class);  
        return $self;
}
sub compute_confidence_interval{
        my $self=shift;
        croak "sample size must be >1 to compute the confidence interval \n" if($self->count()<=1);
        $self->{'significance'}=95 if (!defined($self->{'significance'}));
        $self->{df}=$self->count()-1;
        $self->{alpha}=(100-$self->{significance})/2;
        $self->{alpha}/=100;
        $self->{standard_error}=$self->standard_deviation()/sqrt($self->count());
        $self->{t_value}=abs tdistr($self->{df},$self->{alpha});
        $self->{delta}=$self->{t_value}*$self->{standard_error};
        $self->{upper_clm}=$self->mean() +$self->{delta};
        $self->{lower_clm}=$self->mean() -$self->{delta};
        $self->{t_statistic}=$self->{standard_error}
                                                ?($self->mean()/$self->{standard_error}):0;
        $self->{t_prob}=1- abs (tprob($self->{df},-1*$self->{t_statistic})-tprob($self->{df},$self->{t_statistic})) ;
        $self->{valid}=1;
        return 1;
}
sub add_data{
        my $self = shift;
        my $aref;
        if (ref $_[0] eq 'ARRAY') {
                $aref = $_[0];
        }
        else {
                $aref = \@_;
        }
        my $significance=$self->{'significance'} if (defined($self->{'significance'}));
        $self->SUPER::add_data($aref);
        $self->{'significance'}=$significance;
        $self->compute_confidence_interval() if ((defined($self->{count}))&&($self->{count}>1)) ;
        return 1;
}
sub set_significance{   # set the significance level. usually 90, 95 or 99 
        my $self=shift;
        my $significance=shift;
        $self->{'significance'}=$significance if (($significance>0)&&($significance<100));
        $self->compute_confidence_interval() if((defined($self->{count}))&&($self->{count}>1));
        return 1;
}
sub print_confidence_interval{
        my $self=shift;
        print "mean:",$self->mean(),"\n";
        print "variance:",$self->variance(),"\n";
        my $confidence=\%confidence_interval;
        foreach my $k ( keys %$confidence)
        {
                print "$k:", $self->{$k}," \n";
        }
        return 1;
}
sub output_confidence_interval{
        my $self=shift;
        croak "sample size must be >1 to compute the confidence interval\n" if($self->{valid}!=1);
        my $title=shift;
        print "Summary  from the observed values of the sample $title:\n";
        print "\tsample size= ", $self->count()," , degree of freedom=", $self->df(), "\n";
        print "\tmean=", $self->mean()," , variance=", $self->variance(),"\n";
        print "\tstandard deviation=", $self->standard_deviation()," , standard error=", $self->standard_error(),"\n";
        print "\t the estimate of the mean is ", $self->mean()," +/- ",$self->delta(),"\n\t",
                " or (",$self->lower_clm()," to ",$self->upper_clm," ) with ",$self->significance," % of confidence\n"; 
        print "\t t-statistic=T=",$self->t_statistic()," , Prob >|T|=",$self->t_prob(),"\n";
}
sub AUTOLOAD{
        my $self = shift;
        my $type = ref($self)
        or croak "$self is not an object";
        my $name = $AUTOLOAD;
        $self->{_confidence}=\%confidence_interval;
        $name =~ s/.*://;     
        return if $name eq "DESTROY";
        if (exists $self->{_permitted}->{$name} ) {
                return $self->{$name};
        }
        elsif(exists $self->{'_confidence'}->{$name})
        {
                return $self->{$name};
        }
        else
        {
                croak "Can't access `$name' field in class $type";
        }
}
1;
package Statistics::PointEstimation::Sufficient;
use strict;
use Carp;
use vars qw($VERSION $AUTOLOAD @ISA);
use POSIX;
@ISA=qw (Statistics::PointEstimation);
$VERSION='1.1';
my %fields=  #data related to confidence interval 
(
        "count"=>undef, 
        "mean" =>undef,
        "variance" => undef,
        "standard_deviation" =>undef,
        "significance" => undef,
        "alpha" => undef,
        "df" =>undef,
        "standard_error" => undef,
        "t_value" =>undef, 
        "t_statistic" =>undef,
        "t_prob" =>undef,
        "delta" =>undef,
        "upper_clm" => undef,
        "lower_clm" =>undef,
        "valid"  =>undef
);
sub new{
        my $proto = shift;
        my $class = ref($proto) || $proto;
        my $self = {%fields};  
        bless ($self, $class);  
        return $self;
}
sub add_data{
     croak "the add_data() method is not supported in Statistics::PointEstimation::Sufficient\n";
}
sub load_data{
        my $self=shift;
        my ($count,$mean,$variance)=@_;
        $self->{count}=$count;
        $self->{mean}=$mean;
        $self->{variance}=$variance;
        $self->{standard_deviation}=sqrt($variance);
        $self->compute_confidence_interval() if ($self->count()>1) ;
        return;
}
sub AUTOLOAD{
        my $self = shift;
        my $type = ref($self)
        or croak "$self is not an object";
        $self->{_confidence}=\%fields;
        my $name = $AUTOLOAD;
        $name =~ s/.*://;     
        return if $name eq "DESTROY";
        if(exists $self->{_confidence}->{$name})
        {
                return $self->{$name};
        }
        else
        {
                croak "Can't access `$name' field in class $type";
        }
}
1;
  
__END__
=head1 NAME
Statistics::PointEstimation - Perl module for computing confidence intervals in parameter estimation with Student's T distribution
Statistics::PointEstimation::Sufficient - Perl module for computing the confidence intervals using sufficient statistics
=head1 SYNOPSIS
  # example for Statistics::PointEstimation
  use Statistics::PointEstimation;
  my @r=();
  for($i=1;$i<=32;$i++) #generate a uniformly distributed sample with mean=5   
  {
          $rand=rand(10);
          push @r,$rand;
  }
  my $stat = new Statistics::PointEstimation;
  $stat->set_significance(95); #set the significance(confidence) level to 95%
  $stat->add_data(@r);
  $stat->output_confidence_interval(); #output summary
  $stat->print_confidence_interval();  #output the data hash related to confidence interval estimation
  #the following is the same as $stat->output_confidence_interval();
  print "Summary  from the observed values of the sample:\n";
  print "\tsample size= ", $stat->count()," , degree of freedom=", $stat->df(), "\n";
  print "\tmean=", $stat->mean()," , variance=", $stat->variance(),"\n";
  print "\tstandard deviation=", $stat->standard_deviation()," , standard error=", $stat->standard_error(),"\n";
  print "\t the estimate of the mean is ", $stat->mean()," +/- ",$stat->delta(),"\n\t",
  " or (",$stat->lower_clm()," to ",$stat->upper_clm," ) with ",$stat->significance," % of confidence\n";
  print "\t t-statistic=T=",$stat->t_statistic()," , Prob >|T|=",$stat->t_prob(),"\n";
  #example for Statistics::PointEstimation::Sufficient
  use strict;
  use Statistics::PointEstimation;
  my ($count,$mean,$variance)=(30,3.996,1.235); 
  my $stat = new Statistics::PointEstimation::Sufficient;
  $stat->set_significance(99);
  $stat->load_data($count,$mean,$variance);
  $stat->output_confidence_interval();
  $stat->set_significance(95);
  $stat->output_confidence_interval();
=head1 DESCRIPTION
=head2  Statistics::PointEstimation
  This module is a subclass of Statistics::Descriptive::Full. It uses T-distribution for point estimation 
  assuming the data is normally distributed or the sample size is sufficiently large. It overrides the 
  add_data() method in Statistics::Descriptive to compute the confidence interval with the specified significance
   level (default is 95%). It also computes the t-statistic=T and Prob>|T| in case of hypothesis 
  testing of paired T-tests.
=head2  Statistics::PointEstimation::Sufficient
 This module is a subclass of Statistics::PointEstimation. Instead of taking the real data points as the input, 
 it will compute the confidence intervals based on the sufficient statistics and the sample size inputted. 
 To use this module, you need to pass the sample size, the sample mean , and the sample variance into the load_data()
 function. The output will be exactly the same as the Statistics::PointEstimation Module.
  
=head1 AUTHOR
Yun-Fang Juan , Yahoo! Inc.  (yunfang@yahoo-inc.com)
=head1 SEE ALSO
Statistics::Descriptive Statistics::Distributions
=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)