1 use std::{mem, ops}; 2 3 use crate::stats::float::Float; 4 use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; 5 use crate::stats::univariate::Percentiles; 6 use crate::stats::univariate::Resamples; 7 #[cfg(feature = "rayon")] 8 use rayon::prelude::*; 9 10 /// A collection of data points drawn from a population 11 /// 12 /// Invariants: 13 /// 14 /// - The sample contains at least 2 data points 15 /// - The sample contains no `NaN`s 16 #[repr(transparent)] 17 pub struct Sample<A>([A]); 18 19 // TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module 20 impl<A> Sample<A> 21 where 22 A: Float, 23 { 24 /// Creates a new sample from an existing slice 25 /// 26 /// # Panics 27 /// 28 /// Panics if `slice` contains any `NaN` or if `slice` has less than two elements 29 #[cfg_attr(feature = "cargo-clippy", allow(clippy::new_ret_no_self))] new(slice: &[A]) -> &Sample<A>30 pub fn new(slice: &[A]) -> &Sample<A> { 31 assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan())); 32 33 unsafe { mem::transmute(slice) } 34 } 35 36 /// Returns the biggest element in the sample 37 /// 38 /// - Time: `O(length)` max(&self) -> A39 pub fn max(&self) -> A { 40 let mut elems = self.iter(); 41 42 match elems.next() { 43 Some(&head) => elems.fold(head, |a, &b| a.max(b)), 44 // NB `unreachable!` because `Sample` is guaranteed to have at least one data point 45 None => unreachable!(), 46 } 47 } 48 49 /// Returns the arithmetic average of the sample 50 /// 51 /// - Time: `O(length)` mean(&self) -> A52 pub fn mean(&self) -> A { 53 let n = self.len(); 54 55 self.sum() / A::cast(n) 56 } 57 58 /// Returns the median absolute deviation 59 /// 60 /// The `median` can be optionally passed along to speed up (2X) the computation 61 /// 62 /// - Time: `O(length)` 63 /// - Memory: `O(length)` median_abs_dev(&self, median: Option<A>) -> A where usize: cast::From<A, Output = Result<usize, cast::Error>>,64 pub fn median_abs_dev(&self, median: Option<A>) -> A 65 where 66 usize: cast::From<A, Output = Result<usize, cast::Error>>, 67 { 68 let median = median.unwrap_or_else(|| self.percentiles().median()); 69 70 // NB Although this operation can be SIMD accelerated, the gain is negligible because the 71 // bottle neck is the sorting operation which is part of the computation of the median 72 let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::<Vec<_>>(); 73 74 let abs_devs: &Self = Self::new(&abs_devs); 75 76 abs_devs.percentiles().median() * A::cast(1.4826) 77 } 78 79 /// Returns the median absolute deviation as a percentage of the median 80 /// 81 /// - Time: `O(length)` 82 /// - Memory: `O(length)` median_abs_dev_pct(&self) -> A where usize: cast::From<A, Output = Result<usize, cast::Error>>,83 pub fn median_abs_dev_pct(&self) -> A 84 where 85 usize: cast::From<A, Output = Result<usize, cast::Error>>, 86 { 87 let _100 = A::cast(100); 88 let median = self.percentiles().median(); 89 let mad = self.median_abs_dev(Some(median)); 90 91 (mad / median) * _100 92 } 93 94 /// Returns the smallest element in the sample 95 /// 96 /// - Time: `O(length)` min(&self) -> A97 pub fn min(&self) -> A { 98 let mut elems = self.iter(); 99 100 match elems.next() { 101 Some(&elem) => elems.fold(elem, |a, &b| a.min(b)), 102 // NB `unreachable!` because `Sample` is guaranteed to have at least one data point 103 None => unreachable!(), 104 } 105 } 106 107 /// Returns a "view" into the percentiles of the sample 108 /// 109 /// This "view" makes consecutive computations of percentiles much faster (`O(1)`) 110 /// 111 /// - Time: `O(N log N) where N = length` 112 /// - Memory: `O(length)` percentiles(&self) -> Percentiles<A> where usize: cast::From<A, Output = Result<usize, cast::Error>>,113 pub fn percentiles(&self) -> Percentiles<A> 114 where 115 usize: cast::From<A, Output = Result<usize, cast::Error>>, 116 { 117 use std::cmp::Ordering; 118 119 // NB This function assumes that there are no `NaN`s in the sample 120 fn cmp<T>(a: &T, b: &T) -> Ordering 121 where 122 T: PartialOrd, 123 { 124 match a.partial_cmp(b) { 125 Some(o) => o, 126 // Arbitrary way to handle NaNs that should never happen 127 None => Ordering::Equal, 128 } 129 } 130 131 let mut v = self.to_vec().into_boxed_slice(); 132 #[cfg(feature = "rayon")] 133 v.par_sort_unstable_by(cmp); 134 #[cfg(not(feature = "rayon"))] 135 v.sort_unstable_by(cmp); 136 137 // NB :-1: to intra-crate privacy rules 138 unsafe { mem::transmute(v) } 139 } 140 141 /// Returns the standard deviation of the sample 142 /// 143 /// The `mean` can be optionally passed along to speed up (2X) the computation 144 /// 145 /// - Time: `O(length)` std_dev(&self, mean: Option<A>) -> A146 pub fn std_dev(&self, mean: Option<A>) -> A { 147 self.var(mean).sqrt() 148 } 149 150 /// Returns the standard deviation as a percentage of the mean 151 /// 152 /// - Time: `O(length)` std_dev_pct(&self) -> A153 pub fn std_dev_pct(&self) -> A { 154 let _100 = A::cast(100); 155 let mean = self.mean(); 156 let std_dev = self.std_dev(Some(mean)); 157 158 (std_dev / mean) * _100 159 } 160 161 /// Returns the sum of all the elements of the sample 162 /// 163 /// - Time: `O(length)` sum(&self) -> A164 pub fn sum(&self) -> A { 165 crate::stats::sum(self) 166 } 167 168 /// Returns the t score between these two samples 169 /// 170 /// - Time: `O(length)` t(&self, other: &Sample<A>) -> A171 pub fn t(&self, other: &Sample<A>) -> A { 172 let (x_bar, y_bar) = (self.mean(), other.mean()); 173 let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar))); 174 let n_x = A::cast(self.len()); 175 let n_y = A::cast(other.len()); 176 let num = x_bar - y_bar; 177 let den = (s2_x / n_x + s2_y / n_y).sqrt(); 178 179 num / den 180 } 181 182 /// Returns the variance of the sample 183 /// 184 /// The `mean` can be optionally passed along to speed up (2X) the computation 185 /// 186 /// - Time: `O(length)` var(&self, mean: Option<A>) -> A187 pub fn var(&self, mean: Option<A>) -> A { 188 use std::ops::Add; 189 190 let mean = mean.unwrap_or_else(|| self.mean()); 191 let slice = self; 192 193 let sum = slice 194 .iter() 195 .map(|&x| (x - mean).powi(2)) 196 .fold(A::cast(0), Add::add); 197 198 sum / A::cast(slice.len() - 1) 199 } 200 201 // TODO Remove the `T` parameter in favor of `S::Output` 202 /// Returns the bootstrap distributions of the parameters estimated by the 1-sample statistic 203 /// 204 /// - Multi-threaded 205 /// - Time: `O(nresamples)` 206 /// - Memory: `O(nresamples)` bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions where S: Fn(&Sample<A>) -> T + Sync, T: Tuple + Send, T::Distributions: Send, T::Builder: Send,207 pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions 208 where 209 S: Fn(&Sample<A>) -> T + Sync, 210 T: Tuple + Send, 211 T::Distributions: Send, 212 T::Builder: Send, 213 { 214 #[cfg(feature = "rayon")] 215 { 216 (0..nresamples) 217 .into_par_iter() 218 .map_init( 219 || Resamples::new(self), 220 |resamples, _| statistic(resamples.next()), 221 ) 222 .fold( 223 || T::Builder::new(0), 224 |mut sub_distributions, sample| { 225 sub_distributions.push(sample); 226 sub_distributions 227 }, 228 ) 229 .reduce( 230 || T::Builder::new(0), 231 |mut a, mut b| { 232 a.extend(&mut b); 233 a 234 }, 235 ) 236 .complete() 237 } 238 #[cfg(not(feature = "rayon"))] 239 { 240 let mut resamples = Resamples::new(self); 241 (0..nresamples) 242 .map(|_| statistic(resamples.next())) 243 .fold(T::Builder::new(0), |mut sub_distributions, sample| { 244 sub_distributions.push(sample); 245 sub_distributions 246 }) 247 .complete() 248 } 249 } 250 251 #[cfg(test)] iqr(&self) -> A where usize: cast::From<A, Output = Result<usize, cast::Error>>,252 pub fn iqr(&self) -> A 253 where 254 usize: cast::From<A, Output = Result<usize, cast::Error>>, 255 { 256 self.percentiles().iqr() 257 } 258 259 #[cfg(test)] median(&self) -> A where usize: cast::From<A, Output = Result<usize, cast::Error>>,260 pub fn median(&self) -> A 261 where 262 usize: cast::From<A, Output = Result<usize, cast::Error>>, 263 { 264 self.percentiles().median() 265 } 266 } 267 268 impl<A> ops::Deref for Sample<A> { 269 type Target = [A]; 270 deref(&self) -> &[A]271 fn deref(&self) -> &[A] { 272 &self.0 273 } 274 } 275