1 use crate::runtime::metrics::batch::duration_as_u64;
2 use std::cmp;
3 use std::error::Error;
4 use std::fmt::{Display, Formatter};
5 use std::time::Duration;
6
7 const DEFAULT_MIN_VALUE: Duration = Duration::from_nanos(100);
8 const DEFAULT_MAX_VALUE: Duration = Duration::from_secs(60);
9
10 /// Default precision is 2^-2 = 25% max error
11 const DEFAULT_PRECISION: u32 = 2;
12
13 /// Log Histogram
14 ///
15 /// This implements an [H2 Histogram](https://iop.systems/blog/h2-histogram/), a histogram similar
16 /// to HdrHistogram, but with better performance. It guarantees an error bound of `2^-p`.
17 ///
18 /// Unlike a traditional H2 histogram this has two small changes:
19 /// 1. The 0th bucket runs for `0..min_value`. This allows truncating a large number of buckets that
20 /// would cover extremely short timescales that customers usually don't care about.
21 /// 2. The final bucket runs all the way to `u64::MAX` — traditional H2 histograms would truncate
22 /// or reject these values.
23 ///
24 /// For information about the default configuration, see [`LogHistogramBuilder`].
25 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
26 pub struct LogHistogram {
27 /// Number of buckets in the histogram
28 pub(crate) num_buckets: usize,
29
30 /// Precision of histogram. Error is bounded to 2^-p.
31 pub(crate) p: u32,
32
33 /// All buckets `idx < bucket_offset` are grouped into bucket 0.
34 ///
35 /// This increases the smallest measurable value of the histogram.
36 pub(crate) bucket_offset: usize,
37 }
38
39 impl Default for LogHistogram {
default() -> Self40 fn default() -> Self {
41 LogHistogramBuilder::default().build()
42 }
43 }
44
45 impl LogHistogram {
46 /// Create a Histogram configuration directly from values for `n` and `p`.
47 ///
48 /// # Panics
49 /// - If `bucket_offset` is greater than the specified number of buckets, `(n - p + 1) * 2^p`
from_n_p(n: u32, p: u32, bucket_offset: usize) -> Self50 fn from_n_p(n: u32, p: u32, bucket_offset: usize) -> Self {
51 assert!(n >= p, "{n} (n) must be at least as large as {p} (p)");
52 let num_buckets = ((n - p + 1) * 1 << p) as usize - bucket_offset;
53 Self {
54 num_buckets,
55 p,
56 bucket_offset,
57 }
58 }
59
60 /// Creates a builder for [`LogHistogram`]
builder() -> LogHistogramBuilder61 pub fn builder() -> LogHistogramBuilder {
62 LogHistogramBuilder::default()
63 }
64
65 /// The maximum value that can be stored before truncation in this histogram
max_value(&self) -> u6466 pub fn max_value(&self) -> u64 {
67 let n = (self.num_buckets / (1 << self.p)) - 1 + self.p as usize;
68 (1_u64 << n) - 1
69 }
70
value_to_bucket(&self, value: u64) -> usize71 pub(crate) fn value_to_bucket(&self, value: u64) -> usize {
72 let index = bucket_index(value, self.p);
73 let offset_bucket = if index < self.bucket_offset as u64 {
74 0
75 } else {
76 index - self.bucket_offset as u64
77 };
78 let max = self.num_buckets - 1;
79 offset_bucket.min(max as u64) as usize
80 }
81
bucket_range(&self, bucket: usize) -> std::ops::Range<u64>82 pub(crate) fn bucket_range(&self, bucket: usize) -> std::ops::Range<u64> {
83 let LogHistogram {
84 p,
85 bucket_offset,
86 num_buckets,
87 } = self;
88 let input_bucket = bucket;
89 let bucket = bucket + bucket_offset;
90 let range_start_0th_bucket = match input_bucket {
91 0 => Some(0_u64),
92 _ => None,
93 };
94 let range_end_last_bucket = match input_bucket {
95 n if n == num_buckets - 1 => Some(u64::MAX),
96 _ => None,
97 };
98 if bucket < 1 << p {
99 // The first set of buckets are all size 1
100 let bucket = bucket as u64;
101 range_start_0th_bucket.unwrap_or(bucket)..range_end_last_bucket.unwrap_or(bucket + 1)
102 } else {
103 // Determine which range of buckets we're in, then determine which bucket in the range it is
104 let bucket = bucket as u64;
105 let p = *p as u64;
106 let w = (bucket >> p) - 1;
107 let base_bucket = (w + 1) * (1_u64 << p);
108 let offset = bucket - base_bucket;
109 let s = 1_u64 << (w + p);
110 let start = s + (offset << w);
111 let end = s + ((offset + 1) << w);
112
113 range_start_0th_bucket.unwrap_or(start)..range_end_last_bucket.unwrap_or(end)
114 }
115 }
116 }
117
118 /// Configuration for a [`LogHistogram`]
119 ///
120 /// The log-scaled histogram implements an H2 histogram where the first bucket covers
121 /// the range from 0 to [`LogHistogramBuilder::min_value`] and the final bucket covers
122 /// [`LogHistogramBuilder::max_value`] to infinity. The precision is bounded to the specified
123 /// [`LogHistogramBuilder::max_error`]. Specifically, the precision is the next smallest value
124 /// of `2^-p` such that it is smaller than the requested max error. You can also select `p` directly
125 /// with [`LogHistogramBuilder::precision_exact`].
126 ///
127 /// Depending on the selected parameters, the number of buckets required is variable. To ensure
128 /// that the histogram size is acceptable, callers may call [`LogHistogramBuilder::max_buckets`].
129 /// If the resulting histogram would require more buckets, then the method will return an error.
130 ///
131 /// ## Default values
132 /// The default configuration provides the following settings:
133 /// 1. `min_value`: 100ns
134 /// 2. `max_value`: 68 seconds. The final bucket covers all values >68 seconds
135 /// 3. `precision`: max error of 25%
136 ///
137 /// This uses 237 64-bit buckets.
138 #[derive(Default, Debug, Copy, Clone)]
139 pub struct LogHistogramBuilder {
140 max_value: Option<Duration>,
141 min_value: Option<Duration>,
142 precision: Option<u32>,
143 }
144
145 impl From<LogHistogramBuilder> for LogHistogram {
from(value: LogHistogramBuilder) -> Self146 fn from(value: LogHistogramBuilder) -> Self {
147 value.build()
148 }
149 }
150
151 impl LogHistogramBuilder {
152 /// Set the precision for this histogram
153 ///
154 /// This function determines the smallest value of `p` that would satisfy the requested precision
155 /// such that `2^-p` is less than `precision`. To set `p` directly, use
156 /// [`LogHistogramBuilder::precision_exact`].
157 ///
158 /// The default value is 25% (2^-2)
159 ///
160 /// The highest supported precision is `0.0977%` `(2^-10)`. Provided values
161 /// less than this will be truncated.
162 ///
163 /// # Panics
164 /// - `precision` < 0
165 /// - `precision` > 1
max_error(mut self, max_error: f64) -> Self166 pub fn max_error(mut self, max_error: f64) -> Self {
167 if max_error < 0.0 {
168 panic!("precision must be >= 0");
169 };
170 if max_error > 1.0 {
171 panic!("precision must be > 1");
172 };
173 let mut p = 2;
174 while 2_f64.powf(-1.0 * p as f64) > max_error && p <= 10 {
175 p += 1;
176 }
177 self.precision = Some(p);
178 self
179 }
180
181 /// Sets the precision of this histogram directly.
182 ///
183 /// # Panics
184 /// - `p` < 2
185 /// - `p` > 10
precision_exact(mut self, p: u32) -> Self186 pub fn precision_exact(mut self, p: u32) -> Self {
187 if p < 2 {
188 panic!("precision must be >= 2");
189 };
190 if p > 10 {
191 panic!("precision must be <= 10");
192 };
193 self.precision = Some(p);
194 self
195 }
196
197 /// Sets the minimum duration that can be accurately stored by this histogram.
198 ///
199 /// This sets the resolution. The first bucket will be no larger than
200 /// the provided duration. Setting this value will reduce the number of required buckets,
201 /// sometimes quite significantly.
min_value(mut self, duration: Duration) -> Self202 pub fn min_value(mut self, duration: Duration) -> Self {
203 self.min_value = Some(duration);
204 self
205 }
206
207 /// Sets the maximum value that can by this histogram without truncation
208 ///
209 /// Values greater than this fall in the final bucket that stretches to `u64::MAX`.
210 ///
211 /// # Panics
212 /// The provided value is 0
max_value(mut self, duration: Duration) -> Self213 pub fn max_value(mut self, duration: Duration) -> Self {
214 if duration.is_zero() {
215 panic!("max value must be greater than 0");
216 }
217 self.max_value = Some(duration);
218 self
219 }
220
221 /// Builds the log histogram, enforcing the max buckets requirement
max_buckets( &mut self, max_buckets: usize, ) -> Result<LogHistogram, InvalidHistogramConfiguration>222 pub fn max_buckets(
223 &mut self,
224 max_buckets: usize,
225 ) -> Result<LogHistogram, InvalidHistogramConfiguration> {
226 let histogram = self.build();
227 if histogram.num_buckets > max_buckets {
228 return Err(InvalidHistogramConfiguration::TooManyBuckets {
229 required_bucket_count: histogram.num_buckets,
230 });
231 }
232 Ok(histogram)
233 }
234
235 /// Builds the log histogram
build(&self) -> LogHistogram236 pub fn build(&self) -> LogHistogram {
237 let max_value = duration_as_u64(self.max_value.unwrap_or(DEFAULT_MAX_VALUE));
238 let max_value = max_value.next_power_of_two();
239 let min_value = duration_as_u64(self.min_value.unwrap_or(DEFAULT_MIN_VALUE));
240 let p = self.precision.unwrap_or(DEFAULT_PRECISION);
241 // determine the bucket offset by finding the bucket for the minimum value. We need to lower
242 // this by one to ensure we are at least as granular as requested.
243 let bucket_offset = cmp::max(bucket_index(min_value, p), 1) - 1;
244 // n must be at least as large as p
245 let n = max_value.ilog2().max(p);
246 LogHistogram::from_n_p(n, p, bucket_offset as usize)
247 }
248 }
249
250 /// Error constructing a histogram
251 #[derive(Debug)]
252 pub enum InvalidHistogramConfiguration {
253 /// This histogram required more than the specified number of buckets
254 TooManyBuckets {
255 /// The number of buckets that would have been required
256 required_bucket_count: usize,
257 },
258 }
259
260 impl Display for InvalidHistogramConfiguration {
fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result261 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
262 match self {
263 InvalidHistogramConfiguration::TooManyBuckets { required_bucket_count } =>
264 write!(f, "The configuration for this histogram would have required {required_bucket_count} buckets")
265 }
266 }
267 }
268
269 impl Error for InvalidHistogramConfiguration {}
270
271 /// Compute the index for a given value + p combination
272 ///
273 /// This function does NOT enforce that the value is within the number of expected buckets.
bucket_index(value: u64, p: u32) -> u64274 fn bucket_index(value: u64, p: u32) -> u64 {
275 // Algorithm described here: https://iop.systems/blog/h2-histogram/
276 // find the highest non-zero digit
277 if value == 0 {
278 return 0;
279 }
280 let h = 63 - value.leading_zeros();
281 if h <= p {
282 value
283 } else {
284 let w = h - p;
285 ((w + 1) * (1_u32 << p)) as u64 + ((value - (1_u64 << h)) >> w)
286 }
287 }
288
289 #[cfg(test)]
290 mod test {
291 use super::InvalidHistogramConfiguration;
292 use crate::runtime::metrics::histogram::h2_histogram::LogHistogram;
293 use crate::runtime::metrics::histogram::HistogramType;
294
295 #[cfg(not(target_family = "wasm"))]
296 mod proptests {
297 use super::*;
298 use proptest::prelude::*;
valid_log_histogram_strategy() -> impl Strategy<Value = LogHistogram>299 fn valid_log_histogram_strategy() -> impl Strategy<Value = LogHistogram> {
300 (2..=50u32, 2..=16u32, 0..100usize).prop_map(|(n, p, bucket_offset)| {
301 let p = p.min(n);
302 let base = LogHistogram::from_n_p(n, p, 0);
303 LogHistogram::from_n_p(n, p, bucket_offset.min(base.num_buckets - 1))
304 })
305 }
306
307 // test against a wide assortment of different histogram configurations to ensure invariants hold
308 proptest! {
309 #[test]
310 fn proptest_log_histogram_invariants(histogram in valid_log_histogram_strategy()) {
311 // 1. Assert that the first bucket always starts at 0
312 let first_range = histogram.bucket_range(0);
313 prop_assert_eq!(first_range.start, 0, "First bucket doesn't start at 0");
314
315 // Check that bucket ranges are disjoint and contiguous
316 let mut prev_end = 0;
317 let mut prev_size = 0;
318 for bucket in 0..histogram.num_buckets {
319 let range = histogram.bucket_range(bucket);
320 prop_assert_eq!(range.start, prev_end, "Bucket ranges are not contiguous");
321 prop_assert!(range.start < range.end, "Bucket range is empty or reversed");
322
323 let size = range.end - range.start;
324
325 // 2. Assert that the sizes of the buckets are always powers of 2
326 if bucket > 0 && bucket < histogram.num_buckets - 1 {
327 prop_assert!(size.is_power_of_two(), "Bucket size is not a power of 2");
328 }
329
330 if bucket > 1 {
331 // Assert that the sizes of the buckets are monotonically increasing
332 // (after the first bucket, which may be smaller than the 0 bucket)
333 prop_assert!(size >= prev_size, "Bucket sizes are not monotonically increasing: This size {size} (previous: {prev_size}). Bucket: {bucket}");
334 }
335
336
337 // 4. Assert that the size of the buckets is always within the error bound of 2^-p
338 if bucket > 0 && bucket < histogram.num_buckets - 1 {
339 let p = histogram.p as f64;
340 let error_bound = 2.0_f64.powf(-p);
341 // the most it could be wrong is by the length of the range / 2
342 let relative_error = ((size as f64 - 1.0) / 2.0) / range.start as f64;
343 prop_assert!(
344 relative_error <= error_bound,
345 "Bucket size error exceeds bound: {:?} > {:?} ({range:?})",
346 relative_error,
347 error_bound
348 );
349 }
350
351 prev_end = range.end;
352 prev_size = size;
353 }
354 prop_assert_eq!(prev_end, u64::MAX, "Last bucket should end at u64::MAX");
355
356 // Check bijection between value_to_bucket and bucket_range
357 for bucket in 0..histogram.num_buckets {
358 let range = histogram.bucket_range(bucket);
359 for value in [range.start, range.end - 1] {
360 prop_assert_eq!(
361 histogram.value_to_bucket(value),
362 bucket,
363 "value_to_bucket is not consistent with bucket_range"
364 );
365 }
366 }
367 }
368 }
369 }
370
371 #[test]
bucket_ranges_are_correct()372 fn bucket_ranges_are_correct() {
373 let p = 2;
374 let config = HistogramType::H2(LogHistogram {
375 num_buckets: 1024,
376 p,
377 bucket_offset: 0,
378 });
379
380 // check precise buckets. There are 2^(p+1) precise buckets
381 for i in 0..2_usize.pow(p + 1) {
382 assert_eq!(
383 config.value_to_bucket(i as u64),
384 i,
385 "{} should be in bucket {}",
386 i,
387 i
388 );
389 }
390
391 let mut value = 2_usize.pow(p + 1);
392 let current_bucket = value;
393 while value < current_bucket * 2 {
394 assert_eq!(
395 config.value_to_bucket(value as u64),
396 current_bucket + ((value - current_bucket) / 2),
397 "bucket for {value}"
398 );
399 value += 1;
400 }
401 }
402
403 // test buckets against known values
404 #[test]
bucket_computation_spot_check()405 fn bucket_computation_spot_check() {
406 let p = 9;
407 let config = HistogramType::H2(LogHistogram {
408 num_buckets: 4096,
409 p,
410 bucket_offset: 0,
411 });
412 struct T {
413 v: u64,
414 bucket: usize,
415 }
416 let tests = [
417 T { v: 1, bucket: 1 },
418 T {
419 v: 1023,
420 bucket: 1023,
421 },
422 T {
423 v: 1024,
424 bucket: 1024,
425 },
426 T {
427 v: 2048,
428 bucket: 1536,
429 },
430 T {
431 v: 2052,
432 bucket: 1537,
433 },
434 ];
435 for test in tests {
436 assert_eq!(config.value_to_bucket(test.v), test.bucket);
437 }
438 }
439
440 #[test]
last_bucket_goes_to_infinity()441 fn last_bucket_goes_to_infinity() {
442 let conf = HistogramType::H2(LogHistogram::from_n_p(16, 3, 10));
443 assert_eq!(conf.bucket_range(conf.num_buckets() - 1).end, u64::MAX);
444 }
445
446 #[test]
bucket_offset()447 fn bucket_offset() {
448 // skip the first 10 buckets
449 let conf = HistogramType::H2(LogHistogram::from_n_p(16, 3, 10));
450 for i in 0..10 {
451 assert_eq!(conf.value_to_bucket(i), 0);
452 }
453 // There are 16 1-element buckets. We skipped 10 of them. The first 2 element bucket starts
454 // at 16
455 assert_eq!(conf.value_to_bucket(10), 0);
456 assert_eq!(conf.value_to_bucket(16), 6);
457 assert_eq!(conf.value_to_bucket(17), 6);
458 assert_eq!(conf.bucket_range(6), 16..18);
459 }
460
461 #[test]
max_buckets_enforcement()462 fn max_buckets_enforcement() {
463 let error = LogHistogram::builder()
464 .max_error(0.001)
465 .max_buckets(5)
466 .expect_err("this produces way more than 5 buckets");
467 let num_buckets = match error {
468 InvalidHistogramConfiguration::TooManyBuckets {
469 required_bucket_count,
470 } => required_bucket_count,
471 };
472 assert_eq!(num_buckets, 27549);
473 }
474
475 #[test]
default_configuration_size()476 fn default_configuration_size() {
477 let conf = LogHistogram::builder().build();
478 assert_eq!(conf.num_buckets, 119);
479 }
480 }
481