1 use std::convert::TryFrom;
2 use std::fmt;
3 use std::hash::{Hash, Hasher};
4 use std::str::FromStr;
5 
6 use bytes::Bytes;
7 
8 use super::{ErrorKind, InvalidUri};
9 use crate::byte_str::ByteStr;
10 
11 /// Represents the scheme component of a URI
12 #[derive(Clone)]
13 pub struct Scheme {
14     pub(super) inner: Scheme2,
15 }
16 
17 #[derive(Clone, Debug)]
18 pub(super) enum Scheme2<T = Box<ByteStr>> {
19     None,
20     Standard(Protocol),
21     Other(T),
22 }
23 
24 #[derive(Copy, Clone, Debug)]
25 pub(super) enum Protocol {
26     Http,
27     Https,
28 }
29 
30 impl Scheme {
31     /// HTTP protocol scheme
32     pub const HTTP: Scheme = Scheme {
33         inner: Scheme2::Standard(Protocol::Http),
34     };
35 
36     /// HTTP protocol over TLS.
37     pub const HTTPS: Scheme = Scheme {
38         inner: Scheme2::Standard(Protocol::Https),
39     };
40 
empty() -> Self41     pub(super) fn empty() -> Self {
42         Scheme {
43             inner: Scheme2::None,
44         }
45     }
46 
47     /// Return a str representation of the scheme
48     ///
49     /// # Examples
50     ///
51     /// ```
52     /// # use http::uri::*;
53     /// let scheme: Scheme = "http".parse().unwrap();
54     /// assert_eq!(scheme.as_str(), "http");
55     /// ```
56     #[inline]
as_str(&self) -> &str57     pub fn as_str(&self) -> &str {
58         use self::Protocol::*;
59         use self::Scheme2::*;
60 
61         match self.inner {
62             Standard(Http) => "http",
63             Standard(Https) => "https",
64             Other(ref v) => &v[..],
65             None => unreachable!(),
66         }
67     }
68 }
69 
70 impl<'a> TryFrom<&'a [u8]> for Scheme {
71     type Error = InvalidUri;
72     #[inline]
try_from(s: &'a [u8]) -> Result<Self, Self::Error>73     fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
74         use self::Scheme2::*;
75 
76         match Scheme2::parse_exact(s)? {
77             None => Err(ErrorKind::InvalidScheme.into()),
78             Standard(p) => Ok(Standard(p).into()),
79             Other(_) => {
80                 let bytes = Bytes::copy_from_slice(s);
81 
82                 // Safety: postcondition on parse_exact() means that s and
83                 // hence bytes are valid UTF-8.
84                 let string = unsafe { ByteStr::from_utf8_unchecked(bytes) };
85 
86                 Ok(Other(Box::new(string)).into())
87             }
88         }
89     }
90 }
91 
92 impl<'a> TryFrom<&'a str> for Scheme {
93     type Error = InvalidUri;
94     #[inline]
try_from(s: &'a str) -> Result<Self, Self::Error>95     fn try_from(s: &'a str) -> Result<Self, Self::Error> {
96         TryFrom::try_from(s.as_bytes())
97     }
98 }
99 
100 impl FromStr for Scheme {
101     type Err = InvalidUri;
102 
from_str(s: &str) -> Result<Self, Self::Err>103     fn from_str(s: &str) -> Result<Self, Self::Err> {
104         TryFrom::try_from(s)
105     }
106 }
107 
108 impl fmt::Debug for Scheme {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result109     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110         fmt::Debug::fmt(self.as_str(), f)
111     }
112 }
113 
114 impl fmt::Display for Scheme {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result115     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116         f.write_str(self.as_str())
117     }
118 }
119 
120 impl AsRef<str> for Scheme {
121     #[inline]
as_ref(&self) -> &str122     fn as_ref(&self) -> &str {
123         self.as_str()
124     }
125 }
126 
127 impl PartialEq for Scheme {
eq(&self, other: &Scheme) -> bool128     fn eq(&self, other: &Scheme) -> bool {
129         use self::Protocol::*;
130         use self::Scheme2::*;
131 
132         match (&self.inner, &other.inner) {
133             (&Standard(Http), &Standard(Http)) => true,
134             (&Standard(Https), &Standard(Https)) => true,
135             (&Other(ref a), &Other(ref b)) => a.eq_ignore_ascii_case(b),
136             (&None, _) | (_, &None) => unreachable!(),
137             _ => false,
138         }
139     }
140 }
141 
142 impl Eq for Scheme {}
143 
144 /// Case-insensitive equality
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// # use http::uri::Scheme;
150 /// let scheme: Scheme = "HTTP".parse().unwrap();
151 /// assert_eq!(scheme, *"http");
152 /// ```
153 impl PartialEq<str> for Scheme {
eq(&self, other: &str) -> bool154     fn eq(&self, other: &str) -> bool {
155         self.as_str().eq_ignore_ascii_case(other)
156     }
157 }
158 
159 /// Case-insensitive equality
160 impl PartialEq<Scheme> for str {
eq(&self, other: &Scheme) -> bool161     fn eq(&self, other: &Scheme) -> bool {
162         other == self
163     }
164 }
165 
166 /// Case-insensitive hashing
167 impl Hash for Scheme {
hash<H>(&self, state: &mut H) where H: Hasher,168     fn hash<H>(&self, state: &mut H)
169     where
170         H: Hasher,
171     {
172         match self.inner {
173             Scheme2::None => (),
174             Scheme2::Standard(Protocol::Http) => state.write_u8(1),
175             Scheme2::Standard(Protocol::Https) => state.write_u8(2),
176             Scheme2::Other(ref other) => {
177                 other.len().hash(state);
178                 for &b in other.as_bytes() {
179                     state.write_u8(b.to_ascii_lowercase());
180                 }
181             }
182         }
183     }
184 }
185 
186 impl<T> Scheme2<T> {
is_none(&self) -> bool187     pub(super) fn is_none(&self) -> bool {
188         match *self {
189             Scheme2::None => true,
190             _ => false,
191         }
192     }
193 }
194 
195 // Require the scheme to not be too long in order to enable further
196 // optimizations later.
197 const MAX_SCHEME_LEN: usize = 64;
198 
199 // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
200 //
201 // SCHEME_CHARS is a table of valid characters in the scheme part of a URI.  An
202 // entry in the table is 0 for invalid characters. For valid characters the
203 // entry is itself (i.e.  the entry for 43 is b'+' because b'+' == 43u8). An
204 // important characteristic of this table is that all entries above 127 are
205 // invalid. This makes all of the valid entries a valid single-byte UTF-8 code
206 // point. This means that a slice of such valid entries is valid UTF-8.
207 const SCHEME_CHARS: [u8; 256] = [
208     //  0      1      2      3      4      5      6      7      8      9
209         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //   x
210         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  1x
211         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  2x
212         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  3x
213         0,     0,     0,  b'+',     0,  b'-',  b'.',     0,  b'0',  b'1', //  4x
214      b'2',  b'3',  b'4',  b'5',  b'6',  b'7',  b'8',  b'9',  b':',     0, //  5x
215         0,     0,     0,     0,     0,  b'A',  b'B',  b'C',  b'D',  b'E', //  6x
216      b'F',  b'G',  b'H',  b'I',  b'J',  b'K',  b'L',  b'M',  b'N',  b'O', //  7x
217      b'P',  b'Q',  b'R',  b'S',  b'T',  b'U',  b'V',  b'W',  b'X',  b'Y', //  8x
218      b'Z',     0,     0,     0,     0,     0,     0,  b'a',  b'b',  b'c', //  9x
219      b'd',  b'e',  b'f',  b'g',  b'h',  b'i',  b'j',  b'k',  b'l',  b'm', // 10x
220      b'n',  b'o',  b'p',  b'q',  b'r',  b's',  b't',  b'u',  b'v',  b'w', // 11x
221      b'x',  b'y',  b'z',     0,     0,     0,  b'~',     0,     0,     0, // 12x
222         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 13x
223         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 14x
224         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 15x
225         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 16x
226         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 17x
227         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 18x
228         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 19x
229         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 20x
230         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 21x
231         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 22x
232         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 23x
233         0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 24x
234         0,     0,     0,     0,     0,     0                              // 25x
235 ];
236 
237 impl Scheme2<usize> {
238     // Postcondition: On all Ok() returns, s is valid UTF-8
parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri>239     fn parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri> {
240         match s {
241             b"http" => Ok(Protocol::Http.into()),
242             b"https" => Ok(Protocol::Https.into()),
243             _ => {
244                 if s.len() > MAX_SCHEME_LEN {
245                     return Err(ErrorKind::SchemeTooLong.into());
246                 }
247 
248                 // check that each byte in s is a SCHEME_CHARS which implies
249                 // that it is a valid single byte UTF-8 code point.
250                 for &b in s {
251                     match SCHEME_CHARS[b as usize] {
252                         b':' => {
253                             // Don't want :// here
254                             return Err(ErrorKind::InvalidScheme.into());
255                         }
256                         0 => {
257                             return Err(ErrorKind::InvalidScheme.into());
258                         }
259                         _ => {}
260                     }
261                 }
262 
263                 Ok(Scheme2::Other(()))
264             }
265         }
266     }
267 
parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri>268     pub(super) fn parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri> {
269         if s.len() >= 7 {
270             // Check for HTTP
271             if s[..7].eq_ignore_ascii_case(b"http://") {
272                 // Prefix will be striped
273                 return Ok(Protocol::Http.into());
274             }
275         }
276 
277         if s.len() >= 8 {
278             // Check for HTTPs
279             if s[..8].eq_ignore_ascii_case(b"https://") {
280                 return Ok(Protocol::Https.into());
281             }
282         }
283 
284         if s.len() > 3 {
285             for i in 0..s.len() {
286                 let b = s[i];
287 
288                 match SCHEME_CHARS[b as usize] {
289                     b':' => {
290                         // Not enough data remaining
291                         if s.len() < i + 3 {
292                             break;
293                         }
294 
295                         // Not a scheme
296                         if &s[i + 1..i + 3] != b"//" {
297                             break;
298                         }
299 
300                         if i > MAX_SCHEME_LEN {
301                             return Err(ErrorKind::SchemeTooLong.into());
302                         }
303 
304                         // Return scheme
305                         return Ok(Scheme2::Other(i));
306                     }
307                     // Invald scheme character, abort
308                     0 => break,
309                     _ => {}
310                 }
311             }
312         }
313 
314         Ok(Scheme2::None)
315     }
316 }
317 
318 impl Protocol {
len(&self) -> usize319     pub(super) fn len(&self) -> usize {
320         match *self {
321             Protocol::Http => 4,
322             Protocol::Https => 5,
323         }
324     }
325 }
326 
327 impl<T> From<Protocol> for Scheme2<T> {
from(src: Protocol) -> Self328     fn from(src: Protocol) -> Self {
329         Scheme2::Standard(src)
330     }
331 }
332 
333 #[doc(hidden)]
334 impl From<Scheme2> for Scheme {
from(src: Scheme2) -> Self335     fn from(src: Scheme2) -> Self {
336         Scheme { inner: src }
337     }
338 }
339 
340 #[cfg(test)]
341 mod test {
342     use super::*;
343 
344     #[test]
scheme_eq_to_str()345     fn scheme_eq_to_str() {
346         assert_eq!(&scheme("http"), "http");
347         assert_eq!(&scheme("https"), "https");
348         assert_eq!(&scheme("ftp"), "ftp");
349         assert_eq!(&scheme("my+funky+scheme"), "my+funky+scheme");
350     }
351 
352     #[test]
invalid_scheme_is_error()353     fn invalid_scheme_is_error() {
354         Scheme::try_from("my_funky_scheme").expect_err("Unexpectly valid Scheme");
355 
356         // Invalid UTF-8
357         Scheme::try_from([0xC0].as_ref()).expect_err("Unexpectly valid Scheme");
358     }
359 
scheme(s: &str) -> Scheme360     fn scheme(s: &str) -> Scheme {
361         s.parse().expect(&format!("Invalid scheme: {}", s))
362     }
363 }
364