1 use std::convert::TryFrom; 2 use std::fmt; 3 use std::hash::{Hash, Hasher}; 4 use std::str::FromStr; 5 6 use bytes::Bytes; 7 8 use super::{ErrorKind, InvalidUri}; 9 use crate::byte_str::ByteStr; 10 11 /// Represents the scheme component of a URI 12 #[derive(Clone)] 13 pub struct Scheme { 14 pub(super) inner: Scheme2, 15 } 16 17 #[derive(Clone, Debug)] 18 pub(super) enum Scheme2<T = Box<ByteStr>> { 19 None, 20 Standard(Protocol), 21 Other(T), 22 } 23 24 #[derive(Copy, Clone, Debug)] 25 pub(super) enum Protocol { 26 Http, 27 Https, 28 } 29 30 impl Scheme { 31 /// HTTP protocol scheme 32 pub const HTTP: Scheme = Scheme { 33 inner: Scheme2::Standard(Protocol::Http), 34 }; 35 36 /// HTTP protocol over TLS. 37 pub const HTTPS: Scheme = Scheme { 38 inner: Scheme2::Standard(Protocol::Https), 39 }; 40 empty() -> Self41 pub(super) fn empty() -> Self { 42 Scheme { 43 inner: Scheme2::None, 44 } 45 } 46 47 /// Return a str representation of the scheme 48 /// 49 /// # Examples 50 /// 51 /// ``` 52 /// # use http::uri::*; 53 /// let scheme: Scheme = "http".parse().unwrap(); 54 /// assert_eq!(scheme.as_str(), "http"); 55 /// ``` 56 #[inline] as_str(&self) -> &str57 pub fn as_str(&self) -> &str { 58 use self::Protocol::*; 59 use self::Scheme2::*; 60 61 match self.inner { 62 Standard(Http) => "http", 63 Standard(Https) => "https", 64 Other(ref v) => &v[..], 65 None => unreachable!(), 66 } 67 } 68 } 69 70 impl<'a> TryFrom<&'a [u8]> for Scheme { 71 type Error = InvalidUri; 72 #[inline] try_from(s: &'a [u8]) -> Result<Self, Self::Error>73 fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> { 74 use self::Scheme2::*; 75 76 match Scheme2::parse_exact(s)? { 77 None => Err(ErrorKind::InvalidScheme.into()), 78 Standard(p) => Ok(Standard(p).into()), 79 Other(_) => { 80 let bytes = Bytes::copy_from_slice(s); 81 82 // Safety: postcondition on parse_exact() means that s and 83 // hence bytes are valid UTF-8. 84 let string = unsafe { ByteStr::from_utf8_unchecked(bytes) }; 85 86 Ok(Other(Box::new(string)).into()) 87 } 88 } 89 } 90 } 91 92 impl<'a> TryFrom<&'a str> for Scheme { 93 type Error = InvalidUri; 94 #[inline] try_from(s: &'a str) -> Result<Self, Self::Error>95 fn try_from(s: &'a str) -> Result<Self, Self::Error> { 96 TryFrom::try_from(s.as_bytes()) 97 } 98 } 99 100 impl FromStr for Scheme { 101 type Err = InvalidUri; 102 from_str(s: &str) -> Result<Self, Self::Err>103 fn from_str(s: &str) -> Result<Self, Self::Err> { 104 TryFrom::try_from(s) 105 } 106 } 107 108 impl fmt::Debug for Scheme { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result109 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 110 fmt::Debug::fmt(self.as_str(), f) 111 } 112 } 113 114 impl fmt::Display for Scheme { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result115 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 116 f.write_str(self.as_str()) 117 } 118 } 119 120 impl AsRef<str> for Scheme { 121 #[inline] as_ref(&self) -> &str122 fn as_ref(&self) -> &str { 123 self.as_str() 124 } 125 } 126 127 impl PartialEq for Scheme { eq(&self, other: &Scheme) -> bool128 fn eq(&self, other: &Scheme) -> bool { 129 use self::Protocol::*; 130 use self::Scheme2::*; 131 132 match (&self.inner, &other.inner) { 133 (&Standard(Http), &Standard(Http)) => true, 134 (&Standard(Https), &Standard(Https)) => true, 135 (&Other(ref a), &Other(ref b)) => a.eq_ignore_ascii_case(b), 136 (&None, _) | (_, &None) => unreachable!(), 137 _ => false, 138 } 139 } 140 } 141 142 impl Eq for Scheme {} 143 144 /// Case-insensitive equality 145 /// 146 /// # Examples 147 /// 148 /// ``` 149 /// # use http::uri::Scheme; 150 /// let scheme: Scheme = "HTTP".parse().unwrap(); 151 /// assert_eq!(scheme, *"http"); 152 /// ``` 153 impl PartialEq<str> for Scheme { eq(&self, other: &str) -> bool154 fn eq(&self, other: &str) -> bool { 155 self.as_str().eq_ignore_ascii_case(other) 156 } 157 } 158 159 /// Case-insensitive equality 160 impl PartialEq<Scheme> for str { eq(&self, other: &Scheme) -> bool161 fn eq(&self, other: &Scheme) -> bool { 162 other == self 163 } 164 } 165 166 /// Case-insensitive hashing 167 impl Hash for Scheme { hash<H>(&self, state: &mut H) where H: Hasher,168 fn hash<H>(&self, state: &mut H) 169 where 170 H: Hasher, 171 { 172 match self.inner { 173 Scheme2::None => (), 174 Scheme2::Standard(Protocol::Http) => state.write_u8(1), 175 Scheme2::Standard(Protocol::Https) => state.write_u8(2), 176 Scheme2::Other(ref other) => { 177 other.len().hash(state); 178 for &b in other.as_bytes() { 179 state.write_u8(b.to_ascii_lowercase()); 180 } 181 } 182 } 183 } 184 } 185 186 impl<T> Scheme2<T> { is_none(&self) -> bool187 pub(super) fn is_none(&self) -> bool { 188 match *self { 189 Scheme2::None => true, 190 _ => false, 191 } 192 } 193 } 194 195 // Require the scheme to not be too long in order to enable further 196 // optimizations later. 197 const MAX_SCHEME_LEN: usize = 64; 198 199 // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 200 // 201 // SCHEME_CHARS is a table of valid characters in the scheme part of a URI. An 202 // entry in the table is 0 for invalid characters. For valid characters the 203 // entry is itself (i.e. the entry for 43 is b'+' because b'+' == 43u8). An 204 // important characteristic of this table is that all entries above 127 are 205 // invalid. This makes all of the valid entries a valid single-byte UTF-8 code 206 // point. This means that a slice of such valid entries is valid UTF-8. 207 const SCHEME_CHARS: [u8; 256] = [ 208 // 0 1 2 3 4 5 6 7 8 9 209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x 210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x 212 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x 213 0, 0, 0, b'+', 0, b'-', b'.', 0, b'0', b'1', // 4x 214 b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', 0, // 5x 215 0, 0, 0, 0, 0, b'A', b'B', b'C', b'D', b'E', // 6x 216 b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x 217 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x 218 b'Z', 0, 0, 0, 0, 0, 0, b'a', b'b', b'c', // 9x 219 b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x 220 b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x 221 b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x 222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x 223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x 224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x 225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x 226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x 227 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x 228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x 229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x 230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x 231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x 232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x 233 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x 234 0, 0, 0, 0, 0, 0 // 25x 235 ]; 236 237 impl Scheme2<usize> { 238 // Postcondition: On all Ok() returns, s is valid UTF-8 parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri>239 fn parse_exact(s: &[u8]) -> Result<Scheme2<()>, InvalidUri> { 240 match s { 241 b"http" => Ok(Protocol::Http.into()), 242 b"https" => Ok(Protocol::Https.into()), 243 _ => { 244 if s.len() > MAX_SCHEME_LEN { 245 return Err(ErrorKind::SchemeTooLong.into()); 246 } 247 248 // check that each byte in s is a SCHEME_CHARS which implies 249 // that it is a valid single byte UTF-8 code point. 250 for &b in s { 251 match SCHEME_CHARS[b as usize] { 252 b':' => { 253 // Don't want :// here 254 return Err(ErrorKind::InvalidScheme.into()); 255 } 256 0 => { 257 return Err(ErrorKind::InvalidScheme.into()); 258 } 259 _ => {} 260 } 261 } 262 263 Ok(Scheme2::Other(())) 264 } 265 } 266 } 267 parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri>268 pub(super) fn parse(s: &[u8]) -> Result<Scheme2<usize>, InvalidUri> { 269 if s.len() >= 7 { 270 // Check for HTTP 271 if s[..7].eq_ignore_ascii_case(b"http://") { 272 // Prefix will be striped 273 return Ok(Protocol::Http.into()); 274 } 275 } 276 277 if s.len() >= 8 { 278 // Check for HTTPs 279 if s[..8].eq_ignore_ascii_case(b"https://") { 280 return Ok(Protocol::Https.into()); 281 } 282 } 283 284 if s.len() > 3 { 285 for i in 0..s.len() { 286 let b = s[i]; 287 288 match SCHEME_CHARS[b as usize] { 289 b':' => { 290 // Not enough data remaining 291 if s.len() < i + 3 { 292 break; 293 } 294 295 // Not a scheme 296 if &s[i + 1..i + 3] != b"//" { 297 break; 298 } 299 300 if i > MAX_SCHEME_LEN { 301 return Err(ErrorKind::SchemeTooLong.into()); 302 } 303 304 // Return scheme 305 return Ok(Scheme2::Other(i)); 306 } 307 // Invald scheme character, abort 308 0 => break, 309 _ => {} 310 } 311 } 312 } 313 314 Ok(Scheme2::None) 315 } 316 } 317 318 impl Protocol { len(&self) -> usize319 pub(super) fn len(&self) -> usize { 320 match *self { 321 Protocol::Http => 4, 322 Protocol::Https => 5, 323 } 324 } 325 } 326 327 impl<T> From<Protocol> for Scheme2<T> { from(src: Protocol) -> Self328 fn from(src: Protocol) -> Self { 329 Scheme2::Standard(src) 330 } 331 } 332 333 #[doc(hidden)] 334 impl From<Scheme2> for Scheme { from(src: Scheme2) -> Self335 fn from(src: Scheme2) -> Self { 336 Scheme { inner: src } 337 } 338 } 339 340 #[cfg(test)] 341 mod test { 342 use super::*; 343 344 #[test] scheme_eq_to_str()345 fn scheme_eq_to_str() { 346 assert_eq!(&scheme("http"), "http"); 347 assert_eq!(&scheme("https"), "https"); 348 assert_eq!(&scheme("ftp"), "ftp"); 349 assert_eq!(&scheme("my+funky+scheme"), "my+funky+scheme"); 350 } 351 352 #[test] invalid_scheme_is_error()353 fn invalid_scheme_is_error() { 354 Scheme::try_from("my_funky_scheme").expect_err("Unexpectly valid Scheme"); 355 356 // Invalid UTF-8 357 Scheme::try_from([0xC0].as_ref()).expect_err("Unexpectly valid Scheme"); 358 } 359 scheme(s: &str) -> Scheme360 fn scheme(s: &str) -> Scheme { 361 s.parse().expect(&format!("Invalid scheme: {}", s)) 362 } 363 } 364