1 // Copyright 2013-2014 The Rust Project Developers.
2 // Copyright 2018 The Uuid Project Developers.
3 //
4 // See the COPYRIGHT file at the top-level directory of this distribution.
5 //
6 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9 // option. This file may not be copied, modified, or distributed
10 // except according to those terms.
11 
12 //! [`Uuid`] parsing constructs and utilities.
13 //!
14 //! [`Uuid`]: ../struct.Uuid.html
15 
16 use crate::{
17     error::*,
18     std::{convert::TryFrom, str},
19     Uuid,
20 };
21 
22 impl str::FromStr for Uuid {
23     type Err = Error;
24 
from_str(uuid_str: &str) -> Result<Self, Self::Err>25     fn from_str(uuid_str: &str) -> Result<Self, Self::Err> {
26         Uuid::parse_str(uuid_str)
27     }
28 }
29 
30 impl TryFrom<&'_ str> for Uuid {
31     type Error = Error;
32 
try_from(uuid_str: &'_ str) -> Result<Self, Self::Error>33     fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> {
34         Uuid::parse_str(uuid_str)
35     }
36 }
37 
38 impl Uuid {
39     /// Parses a `Uuid` from a string of hexadecimal digits with optional
40     /// hyphens.
41     ///
42     /// Any of the formats generated by this module (simple, hyphenated, urn,
43     /// Microsoft GUID) are supported by this parsing function.
44     ///
45     /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
46     /// This method will be eventually deprecated in favor of `try_parse`.
47     ///
48     /// # Examples
49     ///
50     /// Parse a hyphenated UUID:
51     ///
52     /// ```
53     /// # use uuid::{Uuid, Version, Variant};
54     /// # fn main() -> Result<(), uuid::Error> {
55     /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?;
56     ///
57     /// assert_eq!(Some(Version::Random), uuid.get_version());
58     /// assert_eq!(Variant::RFC4122, uuid.get_variant());
59     /// # Ok(())
60     /// # }
61     /// ```
62     ///
63     /// [`try_parse`]: #method.try_parse
parse_str(input: &str) -> Result<Uuid, Error>64     pub fn parse_str(input: &str) -> Result<Uuid, Error> {
65         try_parse(input.as_bytes())
66             .map(Uuid::from_bytes)
67             .map_err(InvalidUuid::into_err)
68     }
69 
70     /// Parses a `Uuid` from a string of hexadecimal digits with optional
71     /// hyphens.
72     ///
73     /// This function is similar to [`parse_str`], in fact `parse_str` shares
74     /// the same underlying parser. The difference is that if `try_parse`
75     /// fails, it won't generate very useful error messages. The `parse_str`
76     /// function will eventually be deprecated in favor or `try_parse`.
77     ///
78     /// To parse a UUID from a byte stream instead of a UTF8 string, see
79     /// [`try_parse_ascii`].
80     ///
81     /// # Examples
82     ///
83     /// Parse a hyphenated UUID:
84     ///
85     /// ```
86     /// # use uuid::{Uuid, Version, Variant};
87     /// # fn main() -> Result<(), uuid::Error> {
88     /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000")?;
89     ///
90     /// assert_eq!(Some(Version::Random), uuid.get_version());
91     /// assert_eq!(Variant::RFC4122, uuid.get_variant());
92     /// # Ok(())
93     /// # }
94     /// ```
95     ///
96     /// [`parse_str`]: #method.parse_str
97     /// [`try_parse_ascii`]: #method.try_parse_ascii
try_parse(input: &str) -> Result<Uuid, Error>98     pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
99         Self::try_parse_ascii(input.as_bytes())
100     }
101 
102     /// Parses a `Uuid` from a string of hexadecimal digits with optional
103     /// hyphens.
104     ///
105     /// The input is expected to be a string of ASCII characters. This method
106     /// can be more convenient than [`try_parse`] if the UUID is being
107     /// parsed from a byte stream instead of from a UTF8 string.
108     ///
109     /// # Examples
110     ///
111     /// Parse a hyphenated UUID:
112     ///
113     /// ```
114     /// # use uuid::{Uuid, Version, Variant};
115     /// # fn main() -> Result<(), uuid::Error> {
116     /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
117     ///
118     /// assert_eq!(Some(Version::Random), uuid.get_version());
119     /// assert_eq!(Variant::RFC4122, uuid.get_variant());
120     /// # Ok(())
121     /// # }
122     /// ```
123     ///
124     /// [`try_parse`]: #method.try_parse
try_parse_ascii(input: &[u8]) -> Result<Uuid, Error>125     pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
126         match try_parse(input) {
127             Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
128             // If parsing fails then we don't know exactly what went wrong
129             // In this case, we just return a generic error
130             Err(_) => Err(Error(ErrorKind::Other)),
131         }
132     }
133 }
134 
try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid>135 const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
136     let result = match (input.len(), input) {
137         // Inputs of 32 bytes must be a non-hyphenated UUID
138         (32, s) => parse_simple(s),
139         // Hyphenated UUIDs may be wrapped in various ways:
140         // - `{UUID}` for braced UUIDs
141         // - `urn:uuid:UUID` for URNs
142         // - `UUID` for a regular hyphenated UUID
143         (36, s)
144         | (38, [b'{', s @ .., b'}'])
145         | (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) => {
146             parse_hyphenated(s)
147         }
148         // Any other shaped input is immediately invalid
149         _ => Err(()),
150     };
151 
152     match result {
153         Ok(b) => Ok(b),
154         Err(()) => Err(InvalidUuid(input)),
155     }
156 }
157 
158 #[inline]
parse_simple(s: &[u8]) -> Result<[u8; 16], ()>159 const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
160     // This length check here removes all other bounds
161     // checks in this function
162     if s.len() != 32 {
163         return Err(());
164     }
165 
166     let mut buf: [u8; 16] = [0; 16];
167     let mut i = 0;
168 
169     while i < 16 {
170         // Convert a two-char hex value (like `A8`)
171         // into a byte (like `10101000`)
172         let h1 = HEX_TABLE[s[i * 2] as usize];
173         let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
174 
175         // We use `0xff` as a sentinel value to indicate
176         // an invalid hex character sequence (like the letter `G`)
177         if h1 | h2 == 0xff {
178             return Err(());
179         }
180 
181         // The upper nibble needs to be shifted into position
182         // to produce the final byte value
183         buf[i] = SHL4_TABLE[h1 as usize] | h2;
184         i += 1;
185     }
186 
187     Ok(buf)
188 }
189 
190 #[inline]
parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()>191 const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
192     // This length check here removes all other bounds
193     // checks in this function
194     if s.len() != 36 {
195         return Err(());
196     }
197 
198     // We look at two hex-encoded values (4 chars) at a time because
199     // that's the size of the smallest group in a hyphenated UUID.
200     // The indexes we're interested in are:
201     //
202     // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
203     //            |   |   ||   ||   ||   ||   |   |
204     // hyphens  : |   |   8|  13|  18|  23|   |   |
205     // positions: 0   4    9   14   19   24  28  32
206 
207     // First, ensure the hyphens appear in the right places
208     match [s[8], s[13], s[18], s[23]] {
209         [b'-', b'-', b'-', b'-'] => {}
210         _ => return Err(()),
211     }
212 
213     let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
214     let mut buf: [u8; 16] = [0; 16];
215     let mut j = 0;
216 
217     while j < 8 {
218         let i = positions[j];
219 
220         // The decoding here is the same as the simple case
221         // We're just dealing with two values instead of one
222         let h1 = HEX_TABLE[s[i as usize] as usize];
223         let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
224         let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
225         let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
226 
227         if h1 | h2 | h3 | h4 == 0xff {
228             return Err(());
229         }
230 
231         buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
232         buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
233         j += 1;
234     }
235 
236     Ok(buf)
237 }
238 
239 const HEX_TABLE: &[u8; 256] = &{
240     let mut buf = [0; 256];
241     let mut i: u8 = 0;
242 
243     loop {
244         buf[i as usize] = match i {
245             b'0'..=b'9' => i - b'0',
246             b'a'..=b'f' => i - b'a' + 10,
247             b'A'..=b'F' => i - b'A' + 10,
248             _ => 0xff,
249         };
250 
251         if i == 255 {
252             break buf;
253         }
254 
255         i += 1
256     }
257 };
258 
259 const SHL4_TABLE: &[u8; 256] = &{
260     let mut buf = [0; 256];
261     let mut i: u8 = 0;
262 
263     loop {
264         buf[i as usize] = i.wrapping_shl(4);
265 
266         if i == 255 {
267             break buf;
268         }
269 
270         i += 1;
271     }
272 };
273 
274 #[cfg(test)]
275 mod tests {
276     use super::*;
277     use crate::{std::string::ToString, tests::new};
278 
279     #[test]
test_parse_uuid_v4_valid()280     fn test_parse_uuid_v4_valid() {
281         let from_hyphenated = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
282         let from_simple = Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").unwrap();
283         let from_urn = Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
284         let from_guid = Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}").unwrap();
285 
286         assert_eq!(from_hyphenated, from_simple);
287         assert_eq!(from_hyphenated, from_urn);
288         assert_eq!(from_hyphenated, from_guid);
289 
290         assert!(Uuid::parse_str("00000000000000000000000000000000").is_ok());
291         assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
292         assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4").is_ok());
293         assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").is_ok());
294         assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546").is_ok());
295         assert!(Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
296         assert!(Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}").is_ok());
297 
298         // Nil
299         let nil = Uuid::nil();
300         assert_eq!(
301             Uuid::parse_str("00000000000000000000000000000000").unwrap(),
302             nil
303         );
304         assert_eq!(
305             Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
306             nil
307         );
308     }
309 
310     #[test]
test_parse_uuid_v4_invalid()311     fn test_parse_uuid_v4_invalid() {
312         // Invalid
313         assert_eq!(
314             Uuid::parse_str(""),
315             Err(Error(ErrorKind::SimpleLength { len: 0 }))
316         );
317 
318         assert_eq!(
319             Uuid::parse_str("!"),
320             Err(Error(ErrorKind::Char {
321                 character: '!',
322                 index: 1,
323             }))
324         );
325 
326         assert_eq!(
327             Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45"),
328             Err(Error(ErrorKind::GroupLength {
329                 group: 4,
330                 len: 13,
331                 index: 25,
332             }))
333         );
334 
335         assert_eq!(
336             Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4"),
337             Err(Error(ErrorKind::GroupLength {
338                 group: 3,
339                 len: 3,
340                 index: 20,
341             }))
342         );
343 
344         assert_eq!(
345             Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4"),
346             Err(Error(ErrorKind::Char {
347                 character: 'G',
348                 index: 21,
349             }))
350         );
351 
352         assert_eq!(
353             Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4"),
354             Err(Error(ErrorKind::GroupCount { count: 2 }))
355         );
356 
357         assert_eq!(
358             Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4"),
359             Err(Error(ErrorKind::GroupCount { count: 3 }))
360         );
361 
362         assert_eq!(
363             Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4"),
364             Err(Error(ErrorKind::GroupCount { count: 4 }))
365         );
366 
367         assert_eq!(
368             Uuid::parse_str("F9168C5E-CEB2-4faa"),
369             Err(Error(ErrorKind::GroupCount { count: 3 }))
370         );
371 
372         assert_eq!(
373             Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4"),
374             Err(Error(ErrorKind::Char {
375                 character: 'X',
376                 index: 19,
377             }))
378         );
379 
380         assert_eq!(
381             Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41"),
382             Err(Error(ErrorKind::Char {
383                 character: '{',
384                 index: 1,
385             }))
386         );
387 
388         assert_eq!(
389             Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}"),
390             Err(Error(ErrorKind::GroupCount { count: 3 }))
391         );
392 
393         assert_eq!(
394             Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4"),
395             Err(Error(ErrorKind::GroupLength {
396                 group: 1,
397                 len: 3,
398                 index: 10,
399             }))
400         );
401 
402         // // (group, found, expecting)
403         // //
404         assert_eq!(
405             Uuid::parse_str("01020304-1112-2122-3132-41424344"),
406             Err(Error(ErrorKind::GroupLength {
407                 group: 4,
408                 len: 8,
409                 index: 25,
410             }))
411         );
412 
413         assert_eq!(
414             Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
415             Err(Error(ErrorKind::SimpleLength { len: 31 }))
416         );
417 
418         assert_eq!(
419             Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88"),
420             Err(Error(ErrorKind::SimpleLength { len: 33 }))
421         );
422 
423         assert_eq!(
424             Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8"),
425             Err(Error(ErrorKind::Char {
426                 character: 'g',
427                 index: 32,
428             }))
429         );
430 
431         assert_eq!(
432             Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8"),
433             Err(Error(ErrorKind::Char {
434                 character: '%',
435                 index: 16,
436             }))
437         );
438 
439         assert_eq!(
440             Uuid::parse_str("231231212212423424324323477343246663"),
441             Err(Error(ErrorKind::SimpleLength { len: 36 }))
442         );
443 
444         assert_eq!(
445             Uuid::parse_str("{00000000000000000000000000000000}"),
446             Err(Error(ErrorKind::GroupCount { count: 1 }))
447         );
448 
449         assert_eq!(
450             Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
451             Err(Error(ErrorKind::SimpleLength { len: 31 }))
452         );
453 
454         assert_eq!(
455             Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd"),
456             Err(Error(ErrorKind::Char {
457                 character: 'X',
458                 index: 7,
459             }))
460         );
461 
462         assert_eq!(
463             Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c"),
464             Err(Error(ErrorKind::GroupCount { count: 2 }))
465         );
466 
467         assert_eq!(
468             Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4"),
469             Err(Error(ErrorKind::GroupLength {
470                 group: 3,
471                 len: 5,
472                 index: 20,
473             }))
474         );
475 
476         assert_eq!(
477             Uuid::parse_str("\u{bcf3c}"),
478             Err(Error(ErrorKind::Char {
479                 character: '\u{bcf3c}',
480                 index: 1
481             }))
482         );
483     }
484 
485     #[test]
test_roundtrip_default()486     fn test_roundtrip_default() {
487         let uuid_orig = new();
488         let orig_str = uuid_orig.to_string();
489         let uuid_out = Uuid::parse_str(&orig_str).unwrap();
490         assert_eq!(uuid_orig, uuid_out);
491     }
492 
493     #[test]
test_roundtrip_hyphenated()494     fn test_roundtrip_hyphenated() {
495         let uuid_orig = new();
496         let orig_str = uuid_orig.hyphenated().to_string();
497         let uuid_out = Uuid::parse_str(&orig_str).unwrap();
498         assert_eq!(uuid_orig, uuid_out);
499     }
500 
501     #[test]
test_roundtrip_simple()502     fn test_roundtrip_simple() {
503         let uuid_orig = new();
504         let orig_str = uuid_orig.simple().to_string();
505         let uuid_out = Uuid::parse_str(&orig_str).unwrap();
506         assert_eq!(uuid_orig, uuid_out);
507     }
508 
509     #[test]
test_roundtrip_urn()510     fn test_roundtrip_urn() {
511         let uuid_orig = new();
512         let orig_str = uuid_orig.urn().to_string();
513         let uuid_out = Uuid::parse_str(&orig_str).unwrap();
514         assert_eq!(uuid_orig, uuid_out);
515     }
516 
517     #[test]
test_roundtrip_braced()518     fn test_roundtrip_braced() {
519         let uuid_orig = new();
520         let orig_str = uuid_orig.braced().to_string();
521         let uuid_out = Uuid::parse_str(&orig_str).unwrap();
522         assert_eq!(uuid_orig, uuid_out);
523     }
524 
525     #[test]
test_try_parse_ascii_non_utf8()526     fn test_try_parse_ascii_non_utf8() {
527         assert!(Uuid::try_parse_ascii(b"67e55044-10b1-426f-9247-bb680e5\0e0c8").is_err());
528     }
529 }
530