1 use crate::runtime::metrics::batch::duration_as_u64;
2 use std::cmp;
3 use std::error::Error;
4 use std::fmt::{Display, Formatter};
5 use std::time::Duration;
6 
7 const DEFAULT_MIN_VALUE: Duration = Duration::from_nanos(100);
8 const DEFAULT_MAX_VALUE: Duration = Duration::from_secs(60);
9 
10 /// Default precision is 2^-2 = 25% max error
11 const DEFAULT_PRECISION: u32 = 2;
12 
13 /// Log Histogram
14 ///
15 /// This implements an [H2 Histogram](https://iop.systems/blog/h2-histogram/), a histogram similar
16 /// to HdrHistogram, but with better performance. It guarantees an error bound of `2^-p`.
17 ///
18 /// Unlike a traditional H2 histogram this has two small changes:
19 /// 1. The 0th bucket runs for `0..min_value`. This allows truncating a large number of buckets that
20 ///    would cover extremely short timescales that customers usually don't care about.
21 /// 2. The final bucket runs all the way to `u64::MAX` — traditional H2 histograms would truncate
22 ///    or reject these values.
23 ///
24 /// For information about the default configuration, see [`LogHistogramBuilder`].
25 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
26 pub struct LogHistogram {
27     /// Number of buckets in the histogram
28     pub(crate) num_buckets: usize,
29 
30     /// Precision of histogram. Error is bounded to 2^-p.
31     pub(crate) p: u32,
32 
33     /// All buckets `idx < bucket_offset` are grouped into bucket 0.
34     ///
35     /// This increases the smallest measurable value of the histogram.
36     pub(crate) bucket_offset: usize,
37 }
38 
39 impl Default for LogHistogram {
default() -> Self40     fn default() -> Self {
41         LogHistogramBuilder::default().build()
42     }
43 }
44 
45 impl LogHistogram {
46     /// Create a Histogram configuration directly from values for `n` and `p`.
47     ///
48     /// # Panics
49     /// - If `bucket_offset` is greater than the specified number of buckets, `(n - p + 1) * 2^p`
from_n_p(n: u32, p: u32, bucket_offset: usize) -> Self50     fn from_n_p(n: u32, p: u32, bucket_offset: usize) -> Self {
51         assert!(n >= p, "{n} (n) must be at least as large as {p} (p)");
52         let num_buckets = ((n - p + 1) * 1 << p) as usize - bucket_offset;
53         Self {
54             num_buckets,
55             p,
56             bucket_offset,
57         }
58     }
59 
60     /// Creates a builder for [`LogHistogram`]
builder() -> LogHistogramBuilder61     pub fn builder() -> LogHistogramBuilder {
62         LogHistogramBuilder::default()
63     }
64 
65     /// The maximum value that can be stored before truncation in this histogram
max_value(&self) -> u6466     pub fn max_value(&self) -> u64 {
67         let n = (self.num_buckets / (1 << self.p)) - 1 + self.p as usize;
68         (1_u64 << n) - 1
69     }
70 
value_to_bucket(&self, value: u64) -> usize71     pub(crate) fn value_to_bucket(&self, value: u64) -> usize {
72         let index = bucket_index(value, self.p);
73         let offset_bucket = if index < self.bucket_offset as u64 {
74             0
75         } else {
76             index - self.bucket_offset as u64
77         };
78         let max = self.num_buckets - 1;
79         offset_bucket.min(max as u64) as usize
80     }
81 
bucket_range(&self, bucket: usize) -> std::ops::Range<u64>82     pub(crate) fn bucket_range(&self, bucket: usize) -> std::ops::Range<u64> {
83         let LogHistogram {
84             p,
85             bucket_offset,
86             num_buckets,
87         } = self;
88         let input_bucket = bucket;
89         let bucket = bucket + bucket_offset;
90         let range_start_0th_bucket = match input_bucket {
91             0 => Some(0_u64),
92             _ => None,
93         };
94         let range_end_last_bucket = match input_bucket {
95             n if n == num_buckets - 1 => Some(u64::MAX),
96             _ => None,
97         };
98         if bucket < 1 << p {
99             // The first set of buckets are all size 1
100             let bucket = bucket as u64;
101             range_start_0th_bucket.unwrap_or(bucket)..range_end_last_bucket.unwrap_or(bucket + 1)
102         } else {
103             // Determine which range of buckets we're in, then determine which bucket in the range it is
104             let bucket = bucket as u64;
105             let p = *p as u64;
106             let w = (bucket >> p) - 1;
107             let base_bucket = (w + 1) * (1_u64 << p);
108             let offset = bucket - base_bucket;
109             let s = 1_u64 << (w + p);
110             let start = s + (offset << w);
111             let end = s + ((offset + 1) << w);
112 
113             range_start_0th_bucket.unwrap_or(start)..range_end_last_bucket.unwrap_or(end)
114         }
115     }
116 }
117 
118 /// Configuration for a [`LogHistogram`]
119 ///
120 /// The log-scaled histogram implements an H2 histogram where the first bucket covers
121 /// the range from 0 to [`LogHistogramBuilder::min_value`] and the final bucket covers
122 /// [`LogHistogramBuilder::max_value`] to infinity. The precision is bounded to the specified
123 /// [`LogHistogramBuilder::max_error`]. Specifically, the precision is the next smallest value
124 /// of `2^-p` such that it is smaller than the requested max error. You can also select `p` directly
125 /// with [`LogHistogramBuilder::precision_exact`].
126 ///
127 /// Depending on the selected parameters, the number of buckets required is variable. To ensure
128 /// that the histogram size is acceptable, callers may call [`LogHistogramBuilder::max_buckets`].
129 /// If the resulting histogram would require more buckets, then the method will return an error.
130 ///
131 /// ## Default values
132 /// The default configuration provides the following settings:
133 /// 1. `min_value`: 100ns
134 /// 2. `max_value`: 68 seconds. The final bucket covers all values >68 seconds
135 /// 3. `precision`: max error of 25%
136 ///
137 /// This uses 237 64-bit buckets.
138 #[derive(Default, Debug, Copy, Clone)]
139 pub struct LogHistogramBuilder {
140     max_value: Option<Duration>,
141     min_value: Option<Duration>,
142     precision: Option<u32>,
143 }
144 
145 impl From<LogHistogramBuilder> for LogHistogram {
from(value: LogHistogramBuilder) -> Self146     fn from(value: LogHistogramBuilder) -> Self {
147         value.build()
148     }
149 }
150 
151 impl LogHistogramBuilder {
152     /// Set the precision for this histogram
153     ///
154     /// This function determines the smallest value of `p` that would satisfy the requested precision
155     /// such that `2^-p` is less than `precision`. To set `p` directly, use
156     /// [`LogHistogramBuilder::precision_exact`].
157     ///
158     /// The default value is 25% (2^-2)
159     ///
160     /// The highest supported precision is `0.0977%` `(2^-10)`. Provided values
161     /// less than this will be truncated.
162     ///
163     /// # Panics
164     /// - `precision` < 0
165     /// - `precision` > 1
max_error(mut self, max_error: f64) -> Self166     pub fn max_error(mut self, max_error: f64) -> Self {
167         if max_error < 0.0 {
168             panic!("precision must be >= 0");
169         };
170         if max_error > 1.0 {
171             panic!("precision must be > 1");
172         };
173         let mut p = 2;
174         while 2_f64.powf(-1.0 * p as f64) > max_error && p <= 10 {
175             p += 1;
176         }
177         self.precision = Some(p);
178         self
179     }
180 
181     /// Sets the precision of this histogram directly.
182     ///
183     /// # Panics
184     /// - `p` < 2
185     /// - `p` > 10
precision_exact(mut self, p: u32) -> Self186     pub fn precision_exact(mut self, p: u32) -> Self {
187         if p < 2 {
188             panic!("precision must be >= 2");
189         };
190         if p > 10 {
191             panic!("precision must be <= 10");
192         };
193         self.precision = Some(p);
194         self
195     }
196 
197     /// Sets the minimum duration that can be accurately stored by this histogram.
198     ///
199     /// This sets the resolution. The first bucket will be no larger than
200     /// the provided duration. Setting this value will reduce the number of required buckets,
201     /// sometimes quite significantly.
min_value(mut self, duration: Duration) -> Self202     pub fn min_value(mut self, duration: Duration) -> Self {
203         self.min_value = Some(duration);
204         self
205     }
206 
207     /// Sets the maximum value that can by this histogram without truncation
208     ///
209     /// Values greater than this fall in the final bucket that stretches to `u64::MAX`.
210     ///
211     /// # Panics
212     /// The provided value is 0
max_value(mut self, duration: Duration) -> Self213     pub fn max_value(mut self, duration: Duration) -> Self {
214         if duration.is_zero() {
215             panic!("max value must be greater than 0");
216         }
217         self.max_value = Some(duration);
218         self
219     }
220 
221     /// Builds the log histogram, enforcing the max buckets requirement
max_buckets( &mut self, max_buckets: usize, ) -> Result<LogHistogram, InvalidHistogramConfiguration>222     pub fn max_buckets(
223         &mut self,
224         max_buckets: usize,
225     ) -> Result<LogHistogram, InvalidHistogramConfiguration> {
226         let histogram = self.build();
227         if histogram.num_buckets > max_buckets {
228             return Err(InvalidHistogramConfiguration::TooManyBuckets {
229                 required_bucket_count: histogram.num_buckets,
230             });
231         }
232         Ok(histogram)
233     }
234 
235     /// Builds the log histogram
build(&self) -> LogHistogram236     pub fn build(&self) -> LogHistogram {
237         let max_value = duration_as_u64(self.max_value.unwrap_or(DEFAULT_MAX_VALUE));
238         let max_value = max_value.next_power_of_two();
239         let min_value = duration_as_u64(self.min_value.unwrap_or(DEFAULT_MIN_VALUE));
240         let p = self.precision.unwrap_or(DEFAULT_PRECISION);
241         // determine the bucket offset by finding the bucket for the minimum value. We need to lower
242         // this by one to ensure we are at least as granular as requested.
243         let bucket_offset = cmp::max(bucket_index(min_value, p), 1) - 1;
244         // n must be at least as large as p
245         let n = max_value.ilog2().max(p);
246         LogHistogram::from_n_p(n, p, bucket_offset as usize)
247     }
248 }
249 
250 /// Error constructing a histogram
251 #[derive(Debug)]
252 pub enum InvalidHistogramConfiguration {
253     /// This histogram required more than the specified number of buckets
254     TooManyBuckets {
255         /// The number of buckets that would have been required
256         required_bucket_count: usize,
257     },
258 }
259 
260 impl Display for InvalidHistogramConfiguration {
fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result261     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
262         match self {
263             InvalidHistogramConfiguration::TooManyBuckets { required_bucket_count } =>
264                 write!(f, "The configuration for this histogram would have required {required_bucket_count} buckets")
265         }
266     }
267 }
268 
269 impl Error for InvalidHistogramConfiguration {}
270 
271 /// Compute the index for a given value + p combination
272 ///
273 /// This function does NOT enforce that the value is within the number of expected buckets.
bucket_index(value: u64, p: u32) -> u64274 fn bucket_index(value: u64, p: u32) -> u64 {
275     // Algorithm described here: https://iop.systems/blog/h2-histogram/
276     // find the highest non-zero digit
277     if value == 0 {
278         return 0;
279     }
280     let h = 63 - value.leading_zeros();
281     if h <= p {
282         value
283     } else {
284         let w = h - p;
285         ((w + 1) * (1_u32 << p)) as u64 + ((value - (1_u64 << h)) >> w)
286     }
287 }
288 
289 #[cfg(test)]
290 mod test {
291     use super::InvalidHistogramConfiguration;
292     use crate::runtime::metrics::histogram::h2_histogram::LogHistogram;
293     use crate::runtime::metrics::histogram::HistogramType;
294 
295     #[cfg(not(target_family = "wasm"))]
296     mod proptests {
297         use super::*;
298         use proptest::prelude::*;
valid_log_histogram_strategy() -> impl Strategy<Value = LogHistogram>299         fn valid_log_histogram_strategy() -> impl Strategy<Value = LogHistogram> {
300             (2..=50u32, 2..=16u32, 0..100usize).prop_map(|(n, p, bucket_offset)| {
301                 let p = p.min(n);
302                 let base = LogHistogram::from_n_p(n, p, 0);
303                 LogHistogram::from_n_p(n, p, bucket_offset.min(base.num_buckets - 1))
304             })
305         }
306 
307         // test against a wide assortment of different histogram configurations to ensure invariants hold
308         proptest! {
309             #[test]
310             fn proptest_log_histogram_invariants(histogram in valid_log_histogram_strategy()) {
311                 // 1. Assert that the first bucket always starts at 0
312                 let first_range = histogram.bucket_range(0);
313                 prop_assert_eq!(first_range.start, 0, "First bucket doesn't start at 0");
314 
315                 // Check that bucket ranges are disjoint and contiguous
316                 let mut prev_end = 0;
317                 let mut prev_size = 0;
318                 for bucket in 0..histogram.num_buckets {
319                     let range = histogram.bucket_range(bucket);
320                     prop_assert_eq!(range.start, prev_end, "Bucket ranges are not contiguous");
321                     prop_assert!(range.start < range.end, "Bucket range is empty or reversed");
322 
323                     let size = range.end - range.start;
324 
325                     // 2. Assert that the sizes of the buckets are always powers of 2
326                     if bucket > 0 && bucket < histogram.num_buckets - 1 {
327                         prop_assert!(size.is_power_of_two(), "Bucket size is not a power of 2");
328                     }
329 
330                     if bucket > 1 {
331                         // Assert that the sizes of the buckets are monotonically increasing
332                         // (after the first bucket, which may be smaller than the 0 bucket)
333                         prop_assert!(size >= prev_size, "Bucket sizes are not monotonically increasing: This size {size} (previous: {prev_size}). Bucket: {bucket}");
334                     }
335 
336 
337                     // 4. Assert that the size of the buckets is always within the error bound of 2^-p
338                     if bucket > 0 && bucket < histogram.num_buckets - 1 {
339                         let p = histogram.p as f64;
340                         let error_bound = 2.0_f64.powf(-p);
341                         // the most it could be wrong is by the length of the range / 2
342                         let relative_error = ((size as f64 - 1.0) / 2.0) / range.start as f64;
343                         prop_assert!(
344                             relative_error <= error_bound,
345                             "Bucket size error exceeds bound: {:?} > {:?} ({range:?})",
346                             relative_error,
347                             error_bound
348                         );
349                     }
350 
351                     prev_end = range.end;
352                     prev_size = size;
353                 }
354                 prop_assert_eq!(prev_end, u64::MAX, "Last bucket should end at u64::MAX");
355 
356                 // Check bijection between value_to_bucket and bucket_range
357                 for bucket in 0..histogram.num_buckets {
358                     let range = histogram.bucket_range(bucket);
359                     for value in [range.start, range.end - 1] {
360                         prop_assert_eq!(
361                             histogram.value_to_bucket(value),
362                             bucket,
363                             "value_to_bucket is not consistent with bucket_range"
364                         );
365                     }
366                 }
367             }
368         }
369     }
370 
371     #[test]
bucket_ranges_are_correct()372     fn bucket_ranges_are_correct() {
373         let p = 2;
374         let config = HistogramType::H2(LogHistogram {
375             num_buckets: 1024,
376             p,
377             bucket_offset: 0,
378         });
379 
380         // check precise buckets. There are 2^(p+1) precise buckets
381         for i in 0..2_usize.pow(p + 1) {
382             assert_eq!(
383                 config.value_to_bucket(i as u64),
384                 i,
385                 "{} should be in bucket {}",
386                 i,
387                 i
388             );
389         }
390 
391         let mut value = 2_usize.pow(p + 1);
392         let current_bucket = value;
393         while value < current_bucket * 2 {
394             assert_eq!(
395                 config.value_to_bucket(value as u64),
396                 current_bucket + ((value - current_bucket) / 2),
397                 "bucket for {value}"
398             );
399             value += 1;
400         }
401     }
402 
403     // test buckets against known values
404     #[test]
bucket_computation_spot_check()405     fn bucket_computation_spot_check() {
406         let p = 9;
407         let config = HistogramType::H2(LogHistogram {
408             num_buckets: 4096,
409             p,
410             bucket_offset: 0,
411         });
412         struct T {
413             v: u64,
414             bucket: usize,
415         }
416         let tests = [
417             T { v: 1, bucket: 1 },
418             T {
419                 v: 1023,
420                 bucket: 1023,
421             },
422             T {
423                 v: 1024,
424                 bucket: 1024,
425             },
426             T {
427                 v: 2048,
428                 bucket: 1536,
429             },
430             T {
431                 v: 2052,
432                 bucket: 1537,
433             },
434         ];
435         for test in tests {
436             assert_eq!(config.value_to_bucket(test.v), test.bucket);
437         }
438     }
439 
440     #[test]
last_bucket_goes_to_infinity()441     fn last_bucket_goes_to_infinity() {
442         let conf = HistogramType::H2(LogHistogram::from_n_p(16, 3, 10));
443         assert_eq!(conf.bucket_range(conf.num_buckets() - 1).end, u64::MAX);
444     }
445 
446     #[test]
bucket_offset()447     fn bucket_offset() {
448         // skip the first 10 buckets
449         let conf = HistogramType::H2(LogHistogram::from_n_p(16, 3, 10));
450         for i in 0..10 {
451             assert_eq!(conf.value_to_bucket(i), 0);
452         }
453         // There are 16 1-element buckets. We skipped 10 of them. The first 2 element bucket starts
454         // at 16
455         assert_eq!(conf.value_to_bucket(10), 0);
456         assert_eq!(conf.value_to_bucket(16), 6);
457         assert_eq!(conf.value_to_bucket(17), 6);
458         assert_eq!(conf.bucket_range(6), 16..18);
459     }
460 
461     #[test]
max_buckets_enforcement()462     fn max_buckets_enforcement() {
463         let error = LogHistogram::builder()
464             .max_error(0.001)
465             .max_buckets(5)
466             .expect_err("this produces way more than 5 buckets");
467         let num_buckets = match error {
468             InvalidHistogramConfiguration::TooManyBuckets {
469                 required_bucket_count,
470             } => required_bucket_count,
471         };
472         assert_eq!(num_buckets, 27549);
473     }
474 
475     #[test]
default_configuration_size()476     fn default_configuration_size() {
477         let conf = LogHistogram::builder().build();
478         assert_eq!(conf.num_buckets, 119);
479     }
480 }
481