1*0e209d39SAndroid Build Coastguard Worker#/** 2*0e209d39SAndroid Build Coastguard Worker# * © 2016 and later: Unicode, Inc. and others. 3*0e209d39SAndroid Build Coastguard Worker# * License & terms of use: http://www.unicode.org/copyright.html 4*0e209d39SAndroid Build Coastguard Worker# ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker# * Copyright (C) 2002-2004, International Business Machines Corporation and * 6*0e209d39SAndroid Build Coastguard Worker# * others. All Rights Reserved. * 7*0e209d39SAndroid Build Coastguard Worker# ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker# */ 9*0e209d39SAndroid Build Coastguard Workerpackage Dataset; 10*0e209d39SAndroid Build Coastguard Workeruse Statistics::Descriptive; 11*0e209d39SAndroid Build Coastguard Workeruse Statistics::Distributions; 12*0e209d39SAndroid Build Coastguard Workeruse strict; 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker# Create a new Dataset with the given data. 15*0e209d39SAndroid Build Coastguard Workersub new { 16*0e209d39SAndroid Build Coastguard Worker my ($class) = shift; 17*0e209d39SAndroid Build Coastguard Worker my $self = bless { 18*0e209d39SAndroid Build Coastguard Worker _data => \@_, 19*0e209d39SAndroid Build Coastguard Worker _scale => 1.0, 20*0e209d39SAndroid Build Coastguard Worker _mean => 0.0, 21*0e209d39SAndroid Build Coastguard Worker _error => 0.0, 22*0e209d39SAndroid Build Coastguard Worker }, $class; 23*0e209d39SAndroid Build Coastguard Worker 24*0e209d39SAndroid Build Coastguard Worker my $n = @_; 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker if ($n >= 1) { 27*0e209d39SAndroid Build Coastguard Worker my $stats = Statistics::Descriptive::Full->new(); 28*0e209d39SAndroid Build Coastguard Worker $stats->add_data(@{$self->{_data}}); 29*0e209d39SAndroid Build Coastguard Worker $self->{_mean} = $stats->mean(); 30*0e209d39SAndroid Build Coastguard Worker 31*0e209d39SAndroid Build Coastguard Worker if ($n >= 2) { 32*0e209d39SAndroid Build Coastguard Worker # Use a t distribution rather than Gaussian because (a) we 33*0e209d39SAndroid Build Coastguard Worker # assume an underlying normal dist, (b) we do not know the 34*0e209d39SAndroid Build Coastguard Worker # standard deviation -- we estimate it from the data, and (c) 35*0e209d39SAndroid Build Coastguard Worker # we MAY have a small sample size (also works for large n). 36*0e209d39SAndroid Build Coastguard Worker my $t = Statistics::Distributions::tdistr($n-1, 0.005); 37*0e209d39SAndroid Build Coastguard Worker $self->{_error} = $t * $stats->standard_deviation(); 38*0e209d39SAndroid Build Coastguard Worker } 39*0e209d39SAndroid Build Coastguard Worker } 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker $self; 42*0e209d39SAndroid Build Coastguard Worker} 43*0e209d39SAndroid Build Coastguard Worker 44*0e209d39SAndroid Build Coastguard Worker# Set a scaling factor for all data; 1.0 means no scaling. 45*0e209d39SAndroid Build Coastguard Worker# Scale must be > 0. 46*0e209d39SAndroid Build Coastguard Workersub setScale { 47*0e209d39SAndroid Build Coastguard Worker my ($self, $scale) = @_; 48*0e209d39SAndroid Build Coastguard Worker $self->{_scale} = $scale; 49*0e209d39SAndroid Build Coastguard Worker} 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker# Multiply the scaling factor by a value. 52*0e209d39SAndroid Build Coastguard Workersub scaleBy { 53*0e209d39SAndroid Build Coastguard Worker my ($self, $a) = @_; 54*0e209d39SAndroid Build Coastguard Worker $self->{_scale} *= $a; 55*0e209d39SAndroid Build Coastguard Worker} 56*0e209d39SAndroid Build Coastguard Worker 57*0e209d39SAndroid Build Coastguard Worker# Return the mean. 58*0e209d39SAndroid Build Coastguard Workersub getMean { 59*0e209d39SAndroid Build Coastguard Worker my $self = shift; 60*0e209d39SAndroid Build Coastguard Worker return $self->{_mean} * $self->{_scale}; 61*0e209d39SAndroid Build Coastguard Worker} 62*0e209d39SAndroid Build Coastguard Worker 63*0e209d39SAndroid Build Coastguard Worker# Return a 99% error based on the t distribution. The dataset 64*0e209d39SAndroid Build Coastguard Worker# is described as getMean() +/- getError(). 65*0e209d39SAndroid Build Coastguard Workersub getError { 66*0e209d39SAndroid Build Coastguard Worker my $self = shift; 67*0e209d39SAndroid Build Coastguard Worker return $self->{_error} * $self->{_scale}; 68*0e209d39SAndroid Build Coastguard Worker} 69*0e209d39SAndroid Build Coastguard Worker 70*0e209d39SAndroid Build Coastguard Worker# Divide two Datasets and return a new one, maintaining the 71*0e209d39SAndroid Build Coastguard Worker# mean+/-error. The new Dataset has no data points. 72*0e209d39SAndroid Build Coastguard Workersub divide { 73*0e209d39SAndroid Build Coastguard Worker my $self = shift; 74*0e209d39SAndroid Build Coastguard Worker my $rhs = shift; 75*0e209d39SAndroid Build Coastguard Worker 76*0e209d39SAndroid Build Coastguard Worker my $minratio = ($self->{_mean} - $self->{_error}) / 77*0e209d39SAndroid Build Coastguard Worker ($rhs->{_mean} + $rhs->{_error}); 78*0e209d39SAndroid Build Coastguard Worker my $maxratio = ($self->{_mean} + $self->{_error}) / 79*0e209d39SAndroid Build Coastguard Worker ($rhs->{_mean} - $rhs->{_error}); 80*0e209d39SAndroid Build Coastguard Worker 81*0e209d39SAndroid Build Coastguard Worker my $result = Dataset->new(); 82*0e209d39SAndroid Build Coastguard Worker $result->{_mean} = ($minratio + $maxratio) / 2; 83*0e209d39SAndroid Build Coastguard Worker $result->{_error} = $result->{_mean} - $minratio; 84*0e209d39SAndroid Build Coastguard Worker $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 85*0e209d39SAndroid Build Coastguard Worker $result; 86*0e209d39SAndroid Build Coastguard Worker} 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker1; 89