xref: /aosp_15_r20/external/pigweed/pw_tokenizer/rust/pw_tokenizer/internal.rs (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 use core::cmp::min;
16 
17 use pw_status::{Error, Result};
18 use pw_stream::{Cursor, Write};
19 use pw_varint::VarintEncode;
20 
21 use crate::MessageWriter;
22 
23 // The `Argument` enum is used to marshal arguments to pass to the tokenization
24 // engine.
25 pub enum Argument<'a> {
26     String(&'a str),
27     Varint(i32),
28     Varint64(i64),
29     Char(u8),
30 }
31 
32 impl<'a> From<&'a str> for Argument<'a> {
from(val: &'a str) -> Self33     fn from(val: &'a str) -> Self {
34         Self::String(val)
35     }
36 }
37 
38 impl<'a> From<i32> for Argument<'a> {
from(val: i32) -> Self39     fn from(val: i32) -> Self {
40         Self::Varint(val)
41     }
42 }
43 
44 impl<'a> From<u32> for Argument<'a> {
from(val: u32) -> Self45     fn from(val: u32) -> Self {
46         Self::Varint64(val as i64)
47     }
48 }
49 
50 // Wraps a `Cursor` so that `tokenize_to_buffer` and `tokenize_to_writer` can
51 // share implementations.  It is not meant to be used outside of
52 // `tokenize_to_buffer`.
53 struct CursorMessageWriter<'a> {
54     cursor: Cursor<&'a mut [u8]>,
55 }
56 
57 impl MessageWriter for CursorMessageWriter<'_> {
new() -> Self58     fn new() -> Self {
59         // Ensure `tokenize_to_buffer` never calls `new()`.
60         unimplemented!();
61     }
62 
write(&mut self, data: &[u8]) -> Result<()>63     fn write(&mut self, data: &[u8]) -> Result<()> {
64         self.cursor.write_all(data)
65     }
66 
remaining(&self) -> usize67     fn remaining(&self) -> usize {
68         self.cursor.remaining()
69     }
70 
finalize(self) -> Result<()>71     fn finalize(self) -> Result<()> {
72         // Ensure `tokenize_to_buffer` never calls `finalize()`.
73         unimplemented!();
74     }
75 }
76 
77 // Encode a string in Tokenizer format: length byte + data with the high bit of
78 // the length byte used to signal that the string was truncated.
encode_string<W: MessageWriter>(writer: &mut W, value: &str) -> Result<()>79 pub fn encode_string<W: MessageWriter>(writer: &mut W, value: &str) -> Result<()> {
80     const MAX_STRING_LENGTH: usize = 0x7f;
81 
82     let string_bytes = value.as_bytes();
83 
84     // Limit the encoding to the lesser of 127 or the available space in the buffer.
85     let max_len = min(MAX_STRING_LENGTH, writer.remaining() - 1);
86     let overflow = max_len < string_bytes.len();
87     let len = min(max_len, string_bytes.len());
88 
89     // First byte of an encoded string is it's length.
90     let mut header = len as u8;
91 
92     // The high bit of the first byte is used to indicate if the string was
93     // truncated.
94     if overflow {
95         header |= 0x80;
96     }
97     writer.write(&[header])?;
98 
99     writer.write(&string_bytes[..len])
100 }
101 
102 // Write out a tokenized message to an already created `MessageWriter`.
tokenize_engine<W: crate::MessageWriter>( writer: &mut W, token: u32, args: &[Argument<'_>], ) -> Result<()>103 fn tokenize_engine<W: crate::MessageWriter>(
104     writer: &mut W,
105     token: u32,
106     args: &[Argument<'_>],
107 ) -> Result<()> {
108     writer.write(&token.to_le_bytes()[..])?;
109     for arg in args {
110         match arg {
111             Argument::String(s) => encode_string(writer, s)?,
112             Argument::Varint(i) => {
113                 let mut encode_buffer = [0u8; 10];
114                 let len = i.varint_encode(&mut encode_buffer)?;
115                 writer.write(&encode_buffer[..len])?;
116             }
117             Argument::Varint64(i) => {
118                 let mut encode_buffer = [0u8; 10];
119                 let len = i.varint_encode(&mut encode_buffer)?;
120                 writer.write(&encode_buffer[..len])?;
121             }
122             Argument::Char(c) => writer.write(&[*c])?,
123         }
124     }
125 
126     Ok(())
127 }
128 
129 #[inline(never)]
tokenize_to_buffer(buffer: &mut [u8], token: u32, args: &[Argument<'_>]) -> Result<usize>130 pub fn tokenize_to_buffer(buffer: &mut [u8], token: u32, args: &[Argument<'_>]) -> Result<usize> {
131     let mut writer = CursorMessageWriter {
132         cursor: Cursor::new(buffer),
133     };
134     tokenize_engine(&mut writer, token, args)?;
135     Ok(writer.cursor.position())
136 }
137 
138 #[inline(never)]
tokenize_to_buffer_no_args(buffer: &mut [u8], token: u32) -> Result<usize>139 pub fn tokenize_to_buffer_no_args(buffer: &mut [u8], token: u32) -> Result<usize> {
140     let token_bytes = &token.to_le_bytes()[..];
141     let token_len = token_bytes.len();
142     if buffer.len() < token_len {
143         return Err(Error::OutOfRange);
144     }
145     buffer[..token_len].copy_from_slice(token_bytes);
146 
147     Ok(token_len)
148 }
149 
150 #[inline(never)]
tokenize_to_writer<W: crate::MessageWriter>( token: u32, args: &[Argument<'_>], ) -> Result<()>151 pub fn tokenize_to_writer<W: crate::MessageWriter>(
152     token: u32,
153     args: &[Argument<'_>],
154 ) -> Result<()> {
155     let mut writer = W::new();
156     tokenize_engine(&mut writer, token, args)?;
157     writer.finalize()
158 }
159 
160 #[inline(never)]
tokenize_to_writer_no_args<W: crate::MessageWriter>(token: u32) -> Result<()>161 pub fn tokenize_to_writer_no_args<W: crate::MessageWriter>(token: u32) -> Result<()> {
162     let mut writer = W::new();
163     writer.write(&token.to_le_bytes()[..])?;
164     writer.finalize()
165 }
166 
167 #[cfg(test)]
168 mod test {
169     use pw_stream::Seek;
170 
171     use super::*;
172 
do_string_encode_test<const BUFFER_LEN: usize>(value: &str, expected: &[u8])173     fn do_string_encode_test<const BUFFER_LEN: usize>(value: &str, expected: &[u8]) {
174         let mut buffer = [0u8; BUFFER_LEN];
175         let mut writer = CursorMessageWriter {
176             cursor: Cursor::new(&mut buffer),
177         };
178         encode_string(&mut writer, value).unwrap();
179 
180         let len = writer.cursor.stream_position().unwrap() as usize;
181         let buffer = writer.cursor.into_inner();
182 
183         assert_eq!(len, expected.len());
184         assert_eq!(&buffer[..len], expected);
185     }
186 
187     #[test]
test_string_encode()188     fn test_string_encode() {
189         do_string_encode_test::<64>("test", b"\x04test");
190         do_string_encode_test::<4>("test", b"\x83tes");
191         do_string_encode_test::<1>("test", b"\x80");
192 
193         // Truncates when the string does not fit.
194         do_string_encode_test::<64>(
195             "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttest",
196             b"\xbftesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttes",
197         );
198 
199         // Truncates when string is over 127 bytes.
200         do_string_encode_test::<1024>(
201             "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest",
202             b"\xfftesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttes",
203         );
204     }
205 }
206