xref: /aosp_15_r20/external/pigweed/pw_varint/rust/pw_varint.rs (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 //! # pw_varint
16 //!
17 //! `pw_varint` provides support for encoding and decoding variable length
18 //! integers.  Small values require less memory than they do with fixed
19 //! encoding.  Signed integers are first zig-zag encoded to allow small
20 //! negative numbers to realize the memory savings.  For more information see
21 //! [Pigweed's pw_varint documentation](https://pigweed.dev/pw_varint).
22 //!
23 //! The encoding format used is compatible with
24 //! [Protocol Buffers](https://developers.google.com/protocol-buffers/docs/encoding#varints).
25 //!
26 //! Encoding and decoding is provided through the [VarintEncode] and
27 //! [VarintDecode] traits.
28 //!
29 //! # Example
30 //!
31 //! ```
32 //! use pw_varint::{VarintEncode, VarintDecode};
33 //!
34 //! let mut buffer = [0u8; 64];
35 //!
36 //! let encoded_len = (-1i64).varint_encode(&mut buffer).unwrap();
37 //!
38 //! let (decoded_len, val) = i64::varint_decode(&buffer ).unwrap();
39 //! ```
40 
41 #![cfg_attr(not(feature = "std"), no_std)]
42 
43 use core::num::Wrapping;
44 
45 use pw_status::{Error, Result};
46 
47 /// A trait for objects than can be decoded from a varint.
48 ///
49 /// `pw_varint` provides implementations for [i16], [u16], [i32], [u32],
50 /// [i64], and [u64].
51 pub trait VarintDecode: Sized {
52     /// Decode a type from a varint encoded series of bytes.
53     ///
54     /// Signed values will be implicitly zig-zag decoded.
varint_decode(data: &[u8]) -> Result<(usize, Self)>55     fn varint_decode(data: &[u8]) -> Result<(usize, Self)>;
56 }
57 
58 /// A trait for objects than can be encoded into a varint.
59 ///
60 /// `pw_varint` provides implementations for [i16], [u16], [i32], [u32],
61 /// [i64], and [u64].
62 pub trait VarintEncode: Sized {
63     /// Encode a type into a varint encoded series of bytes.
64     ///
65     /// Signed values will be implicitly zig-zag encoded.
varint_encode(self, data: &mut [u8]) -> Result<usize>66     fn varint_encode(self, data: &mut [u8]) -> Result<usize>;
67 }
68 
69 macro_rules! unsigned_varint_impl {
70     ($t:ty) => {
71         impl VarintDecode for $t {
72             fn varint_decode(data: &[u8]) -> Result<(usize, Self)> {
73                 let (data, val) = decode_u64(data)?;
74                 Ok((data, val as Self))
75             }
76         }
77 
78         impl VarintEncode for $t {
79             fn varint_encode(self, data: &mut [u8]) -> Result<usize> {
80                 encode_u64(data, self as u64)
81             }
82         }
83     };
84 }
85 
86 macro_rules! signed_varint_impl {
87     ($t:ty) => {
88         impl VarintDecode for $t {
89             fn varint_decode(data: &[u8]) -> Result<(usize, Self)> {
90                 let (data, val) = decode_u64(data)?;
91                 Ok((data, zig_zag_decode(val) as Self))
92             }
93         }
94 
95         impl VarintEncode for $t {
96             fn varint_encode(self, data: &mut [u8]) -> Result<usize> {
97                 encode_u64(data, zig_zag_encode(self as i64))
98             }
99         }
100     };
101 }
102 
103 unsigned_varint_impl!(u8);
104 unsigned_varint_impl!(u16);
105 unsigned_varint_impl!(u32);
106 unsigned_varint_impl!(u64);
107 
108 signed_varint_impl!(i8);
109 signed_varint_impl!(i16);
110 signed_varint_impl!(i32);
111 signed_varint_impl!(i64);
112 
decode_u64(data: &[u8]) -> Result<(usize, u64)>113 fn decode_u64(data: &[u8]) -> Result<(usize, u64)> {
114     let mut value: u64 = 0;
115     for (i, d) in data.iter().enumerate() {
116         value |= (*d as u64 & 0x7f) << (i * 7);
117 
118         if (*d & 0x80) == 0 {
119             return Ok((i + 1, value));
120         }
121     }
122     Err(Error::OutOfRange)
123 }
124 
encode_u64(data: &mut [u8], value: u64) -> Result<usize>125 fn encode_u64(data: &mut [u8], value: u64) -> Result<usize> {
126     let mut value = value;
127     for (i, d) in data.iter_mut().enumerate() {
128         let mut byte: u8 = (value & 0x7f) as u8;
129         value >>= 7;
130         if value > 0 {
131             byte |= 0x80;
132         }
133         *d = byte;
134         if value == 0 {
135             return Ok(i + 1);
136         }
137     }
138     Err(Error::OutOfRange)
139 }
140 
141 // ZigZag encodes a signed integer. This maps small negative numbers to small,
142 // unsigned positive numbers, which improves their density for LEB128 encoding.
143 //
144 // ZigZag encoding works by moving the sign bit from the most-significant bit to
145 // the least-significant bit. For the signed k-bit integer n, the formula is
146 //
147 //   (n << 1) ^ (n >> (k - 1))
148 //
149 // See the following for a description of ZigZag encoding:
150 //   https://protobuf.dev/programming-guides/encoding/#signed-ints
zig_zag_encode(value: i64) -> u64151 fn zig_zag_encode(value: i64) -> u64 {
152     ((value as u64) << 1) ^ ((value >> (i64::BITS - 1)) as u64)
153 }
154 
zig_zag_decode(value: u64) -> i64155 fn zig_zag_decode(value: u64) -> i64 {
156     let value = Wrapping(value);
157     ((value >> 1) ^ (!(value & Wrapping(1)) + Wrapping(1))).0 as i64
158 }
159 
160 #[cfg(test)]
161 mod test {
162     use super::*;
163 
success_cases_u8<T>() -> Vec<(Vec<u8>, T)> where T: From<u8>,164     fn success_cases_u8<T>() -> Vec<(Vec<u8>, T)>
165     where
166         T: From<u8>,
167     {
168         vec![
169             // From varint_test.cc EncodeSizeUnsigned32_SmallSingleByte.
170             (vec![0x00], 0x00.into()),
171             (vec![0x01], 0x01.into()),
172             (vec![0x02], 0x02.into()),
173             // From varint_test.cc EncodeSizeUnsigned32_LargeSingleByte.
174             (vec![0x3f], 0x3f.into()),
175             (vec![0x40], 0x40.into()),
176             (vec![0x7e], 0x7e.into()),
177             (vec![0x7f], 0x7f.into()),
178             // From varint_test.cc EncodeSizeUnsigned32_MultiByte.
179             (vec![0x80, 0x01], 128.into()),
180             (vec![0x81, 0x01], 129.into()),
181             // From https://protobuf.dev/programming-guides/encoding/.
182             (vec![0x96, 0x01], 150.into()),
183         ]
184     }
185 
success_cases_i8<T>() -> Vec<(Vec<u8>, T)> where T: From<i8>,186     fn success_cases_i8<T>() -> Vec<(Vec<u8>, T)>
187     where
188         T: From<i8>,
189     {
190         vec![
191             // From varint_test.cc EncodeSizeSigned32_SmallSingleByte.
192             (vec![0x00], 0i8.into()),
193             (vec![0x01], (-1i8).into()),
194             (vec![0x02], 1i8.into()),
195             (vec![0x03], (-2i8).into()),
196             (vec![0x04], 2i8.into()),
197             // From varint_test.cc EncodeSizeSigned32_LargeSingleByte.
198             (vec![125], (-63i8).into()),
199             (vec![126], (63i8).into()),
200             (vec![127], (-64i8).into()),
201             // From varint_test.cc EncodeSizeSigned32_MultiByte.
202             (vec![0x80, 0x1], 64i8.into()),
203             (vec![0x81, 0x1], (-65i8).into()),
204             (vec![0x82, 0x1], 65i8.into()),
205         ]
206     }
207 
success_cases_u32<T>() -> Vec<(Vec<u8>, T)> where T: From<u32>,208     fn success_cases_u32<T>() -> Vec<(Vec<u8>, T)>
209     where
210         T: From<u32>,
211     {
212         vec![
213             // From varint_test.cc EncodeSizeUnsigned32_MultiByte.
214             (vec![0xfe, 0xff, 0xff, 0xff, 0x0f], 0xffff_fffe.into()),
215             (vec![0xff, 0xff, 0xff, 0xff, 0x0f], 0xffff_ffff.into()),
216         ]
217     }
218 
success_cases_i32<T>() -> Vec<(Vec<u8>, T)> where T: From<i32>,219     fn success_cases_i32<T>() -> Vec<(Vec<u8>, T)>
220     where
221         T: From<i32>,
222     {
223         vec![
224             // From varint_test.cc EncodeSizeSigned32_MultiByte.
225             (vec![0xff, 0xff, 0xff, 0xff, 0x0f], i32::MIN.into()),
226             (vec![0xfe, 0xff, 0xff, 0xff, 0x0f], i32::MAX.into()),
227         ]
228     }
229 
230     #[test]
decode_test_u16()231     fn decode_test_u16() {
232         for case in success_cases_u8::<u16>() {
233             assert_eq!(u16::varint_decode(&case.0), Ok((case.0.len(), case.1)));
234         }
235 
236         assert_eq!(u16::varint_decode(&[0x96]), Err(Error::OutOfRange));
237     }
238 
239     #[test]
decode_test_i16()240     fn decode_test_i16() {
241         for case in success_cases_i8::<i16>() {
242             assert_eq!(i16::varint_decode(&case.0), Ok((case.0.len(), case.1)));
243         }
244 
245         assert_eq!(i16::varint_decode(&[0x96]), Err(Error::OutOfRange));
246     }
247 
248     #[test]
decode_test_u32()249     fn decode_test_u32() {
250         for case in success_cases_u8::<u32>()
251             .into_iter()
252             .chain(success_cases_u32::<u32>())
253         {
254             assert_eq!(u32::varint_decode(&case.0), Ok((case.0.len(), case.1)));
255         }
256 
257         assert_eq!(u32::varint_decode(&[0x96]), Err(Error::OutOfRange));
258     }
259 
260     #[test]
decode_test_i32()261     fn decode_test_i32() {
262         for case in success_cases_i8::<i32>()
263             .into_iter()
264             .chain(success_cases_i32::<i32>())
265         {
266             assert_eq!(i32::varint_decode(&case.0), Ok((case.0.len(), case.1)));
267         }
268 
269         assert_eq!(i32::varint_decode(&[0x96]), Err(Error::OutOfRange));
270     }
271 
272     #[test]
decode_test_u64()273     fn decode_test_u64() {
274         for case in success_cases_u8::<u64>()
275             .into_iter()
276             .chain(success_cases_u32::<u64>())
277         {
278             assert_eq!(u64::varint_decode(&case.0), Ok((case.0.len(), case.1)));
279         }
280 
281         assert_eq!(u64::varint_decode(&[0x96]), Err(Error::OutOfRange));
282     }
283 
284     #[test]
decode_test_i64()285     fn decode_test_i64() {
286         for case in success_cases_i8::<i64>()
287             .into_iter()
288             .chain(success_cases_i32::<i64>())
289         {
290             assert_eq!(i64::varint_decode(&case.0), Ok((case.0.len(), case.1)));
291         }
292 
293         assert_eq!(i64::varint_decode(&[0x96]), Err(Error::OutOfRange));
294     }
295 
296     #[test]
encode_test_u16()297     fn encode_test_u16() {
298         for case in success_cases_u8::<u16>() {
299             let mut buffer = [0u8; 64];
300             let len = case.1.varint_encode(&mut buffer).unwrap();
301             assert_eq!(len, case.0.len());
302             assert_eq!(&buffer[0..len], case.0);
303         }
304     }
305 
306     #[test]
encode_test_i16()307     fn encode_test_i16() {
308         for case in success_cases_i8::<i16>() {
309             let mut buffer = [0u8; 64];
310             let len = case.1.varint_encode(&mut buffer).unwrap();
311             assert_eq!(len, case.0.len());
312             assert_eq!(&buffer[0..len], case.0);
313         }
314     }
315 
316     #[test]
encode_test_u32()317     fn encode_test_u32() {
318         for case in success_cases_u8::<u32>()
319             .into_iter()
320             .chain(success_cases_u32::<u32>())
321         {
322             let mut buffer = [0u8; 64];
323             let len = case.1.varint_encode(&mut buffer).unwrap();
324             assert_eq!(len, case.0.len());
325             assert_eq!(&buffer[0..len], case.0);
326         }
327     }
328 
329     #[test]
encode_test_i32()330     fn encode_test_i32() {
331         for case in success_cases_i8::<i32>()
332             .into_iter()
333             .chain(success_cases_i32::<i32>())
334         {
335             let mut buffer = [0u8; 64];
336             let len = case.1.varint_encode(&mut buffer).unwrap();
337             assert_eq!(len, len);
338             assert_eq!(&buffer[0..len], case.0);
339         }
340     }
341 
342     #[test]
encode_test_u64()343     fn encode_test_u64() {
344         for case in success_cases_u8::<u64>()
345             .into_iter()
346             .chain(success_cases_u32::<u64>())
347         {
348             let mut buffer = [0u8; 64];
349             let len = case.1.varint_encode(&mut buffer).unwrap();
350             assert_eq!(len, case.0.len());
351             assert_eq!(&buffer[0..len], case.0);
352         }
353     }
354 
355     #[test]
encode_test_i64()356     fn encode_test_i64() {
357         for case in success_cases_i8::<i64>()
358             .into_iter()
359             .chain(success_cases_i32::<i64>())
360         {
361             let mut buffer = [0u8; 64];
362             let len = case.1.varint_encode(&mut buffer).unwrap();
363             assert_eq!(len, case.0.len());
364             assert_eq!(&buffer[0..len], case.0);
365         }
366     }
367 }
368