1 //! Bindings to [libFuzzer](http://llvm.org/docs/LibFuzzer.html): a runtime for
2 //! coverage-guided fuzzing.
3 //!
4 //! See [the `cargo-fuzz`
5 //! guide](https://rust-fuzz.github.io/book/cargo-fuzz.html) for a usage
6 //! tutorial.
7 //!
8 //! The main export of this crate is [the `fuzz_target!`
9 //! macro](./macro.fuzz_target.html), which allows you to define targets for
10 //! libFuzzer to exercise.
11 
12 #![deny(missing_docs, missing_debug_implementations)]
13 
14 pub use arbitrary;
15 use once_cell::sync::OnceCell;
16 
17 /// Indicates whether the input should be kept in the corpus or rejected. This
18 /// should be returned by your fuzz target. If your fuzz target does not return
19 /// a value (i.e., returns `()`), then the input will be kept in the corpus.
20 #[derive(Debug)]
21 pub enum Corpus {
22     /// Keep the input in the corpus.
23     Keep,
24 
25     /// Reject the input and do not keep it in the corpus.
26     Reject,
27 }
28 
29 impl From<()> for Corpus {
from(_: ()) -> Self30     fn from(_: ()) -> Self {
31         Self::Keep
32     }
33 }
34 
35 impl Corpus {
36     #[doc(hidden)]
37     /// Convert this Corpus result into the [integer codes used by
38     /// `libFuzzer`](https://llvm.org/docs/LibFuzzer.html#rejecting-unwanted-inputs).
39     /// This is -1 for reject, 0 for keep.
to_libfuzzer_code(self) -> i3240     pub fn to_libfuzzer_code(self) -> i32 {
41         match self {
42             Corpus::Keep => 0,
43             Corpus::Reject => -1,
44         }
45     }
46 }
47 
48 extern "C" {
49     // We do not actually cross the FFI bound here.
50     #[allow(improper_ctypes)]
rust_fuzzer_test_input(input: &[u8]) -> i3251     fn rust_fuzzer_test_input(input: &[u8]) -> i32;
52 
LLVMFuzzerMutate(data: *mut u8, size: usize, max_size: usize) -> usize53     fn LLVMFuzzerMutate(data: *mut u8, size: usize, max_size: usize) -> usize;
54 }
55 
56 #[doc(hidden)]
57 #[export_name = "LLVMFuzzerTestOneInput"]
test_input_wrap(data: *const u8, size: usize) -> i3258 pub fn test_input_wrap(data: *const u8, size: usize) -> i32 {
59     let test_input = ::std::panic::catch_unwind(|| unsafe {
60         let data_slice = ::std::slice::from_raw_parts(data, size);
61         rust_fuzzer_test_input(data_slice)
62     });
63 
64     match test_input {
65         Ok(i) => i,
66         Err(_) => {
67             // hopefully the custom panic hook will be called before and abort the
68             // process before the stack frames are unwinded.
69             ::std::process::abort();
70         }
71     }
72 }
73 
74 #[doc(hidden)]
75 pub static RUST_LIBFUZZER_DEBUG_PATH: OnceCell<String> = OnceCell::new();
76 
77 #[doc(hidden)]
78 #[export_name = "LLVMFuzzerInitialize"]
initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize79 pub fn initialize(_argc: *const isize, _argv: *const *const *const u8) -> isize {
80     // Registers a panic hook that aborts the process before unwinding.
81     // It is useful to abort before unwinding so that the fuzzer will then be
82     // able to analyse the process stack frames to tell different bugs appart.
83     //
84     // HACK / FIXME: it would be better to use `-C panic=abort` but it's currently
85     // impossible to build code using compiler plugins with this flag.
86     // We will be able to remove this code when
87     // https://github.com/rust-lang/cargo/issues/5423 is fixed.
88     let default_hook = ::std::panic::take_hook();
89     ::std::panic::set_hook(Box::new(move |panic_info| {
90         default_hook(panic_info);
91         ::std::process::abort();
92     }));
93 
94     // Initialize the `RUST_LIBFUZZER_DEBUG_PATH` cell with the path so it can be
95     // reused with little overhead.
96     if let Ok(path) = std::env::var("RUST_LIBFUZZER_DEBUG_PATH") {
97         RUST_LIBFUZZER_DEBUG_PATH
98             .set(path)
99             .expect("Since this is initialize it is only called once so can never fail");
100     }
101     0
102 }
103 
104 /// Define a fuzz target.
105 ///
106 /// ## Example
107 ///
108 /// This example takes a `&[u8]` slice and attempts to parse it. The parsing
109 /// might fail and return an `Err`, but it shouldn't ever panic or segfault.
110 ///
111 /// ```no_run
112 /// #![no_main]
113 ///
114 /// use libfuzzer_sys::fuzz_target;
115 ///
116 /// // Note: `|input|` is short for `|input: &[u8]|`.
117 /// fuzz_target!(|input| {
118 ///     let _result: Result<_, _> = my_crate::parse(input);
119 /// });
120 /// # mod my_crate { pub fn parse(_: &[u8]) -> Result<(), ()> { unimplemented!() } }
121 /// ```
122 ///
123 /// ## Rejecting Inputs
124 ///
125 /// It may be desirable to reject some inputs, i.e. to not add them to the
126 /// corpus.
127 ///
128 /// For example, when fuzzing an API consisting of parsing and other logic,
129 /// one may want to allow only those inputs into the corpus that parse
130 /// successfully. To indicate whether an input should be kept in or rejected
131 /// from the corpus, return either [Corpus::Keep] or [Corpus::Reject] from your
132 /// fuzz target. The default behavior (e.g. if `()` is returned) is to keep the
133 /// input in the corpus.
134 ///
135 /// For example:
136 ///
137 /// ```no_run
138 /// #![no_main]
139 ///
140 /// use libfuzzer_sys::{Corpus, fuzz_target};
141 ///
142 /// fuzz_target!(|input: String| -> Corpus {
143 ///     let parts: Vec<&str> = input.splitn(2, '=').collect();
144 ///     if parts.len() != 2 {
145 ///         return Corpus::Reject;
146 ///     }
147 ///
148 ///     let key = parts[0];
149 ///     let value = parts[1];
150 ///     let _result: Result<_, _> = my_crate::parse(key, value);
151 ///     Corpus::Keep
152 /// });
153 /// # mod my_crate { pub fn parse(_key: &str, _value: &str) -> Result<(), ()> { unimplemented!() } }
154 /// ```
155 ///
156 /// ## Arbitrary Input Types
157 ///
158 /// The input is a `&[u8]` slice by default, but you can take arbitrary input
159 /// types, as long as the type implements [the `arbitrary` crate's `Arbitrary`
160 /// trait](https://docs.rs/arbitrary/*/arbitrary/trait.Arbitrary.html) (which is
161 /// also re-exported as `libfuzzer_sys::arbitrary::Arbitrary` for convenience).
162 ///
163 /// For example, if you wanted to take an arbitrary RGB color, you could do the
164 /// following:
165 ///
166 /// ```no_run
167 /// #![no_main]
168 /// # mod foo {
169 ///
170 /// use libfuzzer_sys::{arbitrary::{Arbitrary, Error, Unstructured}, fuzz_target};
171 ///
172 /// #[derive(Debug)]
173 /// pub struct Rgb {
174 ///     r: u8,
175 ///     g: u8,
176 ///     b: u8,
177 /// }
178 ///
179 /// impl<'a> Arbitrary<'a> for Rgb {
180 ///     fn arbitrary(raw: &mut Unstructured<'a>) -> Result<Self, Error> {
181 ///         let mut buf = [0; 3];
182 ///         raw.fill_buffer(&mut buf)?;
183 ///         let r = buf[0];
184 ///         let g = buf[1];
185 ///         let b = buf[2];
186 ///         Ok(Rgb { r, g, b })
187 ///     }
188 /// }
189 ///
190 /// // Write a fuzz target that works with RGB colors instead of raw bytes.
191 /// fuzz_target!(|color: Rgb| {
192 ///     my_crate::convert_color(color);
193 /// });
194 /// # mod my_crate {
195 /// #     use super::Rgb;
196 /// #     pub fn convert_color(_: Rgb) {}
197 /// # }
198 /// # }
199 /// ```
200 ///
201 /// You can also enable the `arbitrary` crate's custom derive via this crate's
202 /// `"arbitrary-derive"` cargo feature.
203 #[macro_export]
204 macro_rules! fuzz_target {
205     (|$bytes:ident| $body:expr) => {
206         const _: () = {
207             /// Auto-generated function
208             #[no_mangle]
209             pub extern "C" fn rust_fuzzer_test_input(bytes: &[u8]) -> i32 {
210                 // When `RUST_LIBFUZZER_DEBUG_PATH` is set, write the debug
211                 // formatting of the input to that file. This is only intended for
212                 // `cargo fuzz`'s use!
213 
214                 // `RUST_LIBFUZZER_DEBUG_PATH` is set in initialization.
215                 if let Some(path) = $crate::RUST_LIBFUZZER_DEBUG_PATH.get() {
216                     use std::io::Write;
217                     let mut file = std::fs::File::create(path)
218                         .expect("failed to create `RUST_LIBFUZZER_DEBUG_PATH` file");
219                     writeln!(&mut file, "{:?}", bytes)
220                         .expect("failed to write to `RUST_LIBFUZZER_DEBUG_PATH` file");
221                     return 0;
222                 }
223 
224                 __libfuzzer_sys_run(bytes);
225                 0
226             }
227 
228             // Split out the actual fuzzer into a separate function which is
229             // tagged as never being inlined. This ensures that if the fuzzer
230             // panics there's at least one stack frame which is named uniquely
231             // according to this specific fuzzer that this is embedded within.
232             //
233             // Systems like oss-fuzz try to deduplicate crashes and without this
234             // panics in separate fuzzers can accidentally appear the same
235             // because each fuzzer will have a function called
236             // `rust_fuzzer_test_input`. By using a normal Rust function here
237             // it's named something like `the_fuzzer_name::_::__libfuzzer_sys_run` which should
238             // ideally help prevent oss-fuzz from deduplicate fuzz bugs across
239             // distinct targets accidentally.
240             #[inline(never)]
241             fn __libfuzzer_sys_run($bytes: &[u8]) {
242                 $body
243             }
244         };
245     };
246 
247     (|$data:ident: &[u8]| $body:expr) => {
248         $crate::fuzz_target!(|$data| $body);
249     };
250 
251     (|$data:ident: $dty:ty| $body:expr) => {
252         $crate::fuzz_target!(|$data: $dty| -> () { $body });
253     };
254 
255     (|$data:ident: $dty:ty| -> $rty:ty $body:block) => {
256         const _: () = {
257             /// Auto-generated function
258             #[no_mangle]
259             pub extern "C" fn rust_fuzzer_test_input(bytes: &[u8]) -> i32 {
260                 use $crate::arbitrary::{Arbitrary, Unstructured};
261 
262                 // Early exit if we don't have enough bytes for the `Arbitrary`
263                 // implementation. This helps the fuzzer avoid exploring all the
264                 // different not-enough-input-bytes paths inside the `Arbitrary`
265                 // implementation. Additionally, it exits faster, letting the fuzzer
266                 // get to longer inputs that actually lead to interesting executions
267                 // quicker.
268                 if bytes.len() < <$dty as Arbitrary>::size_hint(0).0 {
269                     return -1;
270                 }
271 
272                 let mut u = Unstructured::new(bytes);
273                 let data = <$dty as Arbitrary>::arbitrary_take_rest(u);
274 
275                 // When `RUST_LIBFUZZER_DEBUG_PATH` is set, write the debug
276                 // formatting of the input to that file. This is only intended for
277                 // `cargo fuzz`'s use!
278 
279                 // `RUST_LIBFUZZER_DEBUG_PATH` is set in initialization.
280                 if let Some(path) = $crate::RUST_LIBFUZZER_DEBUG_PATH.get() {
281                     use std::io::Write;
282                     let mut file = std::fs::File::create(path)
283                         .expect("failed to create `RUST_LIBFUZZER_DEBUG_PATH` file");
284                     (match data {
285                         Ok(data) => writeln!(&mut file, "{:#?}", data),
286                         Err(err) => writeln!(&mut file, "Arbitrary Error: {}", err),
287                     })
288                     .expect("failed to write to `RUST_LIBFUZZER_DEBUG_PATH` file");
289                     return -1;
290                 }
291 
292                 let data = match data {
293                     Ok(d) => d,
294                     Err(_) => return -1,
295                 };
296 
297                 let result = ::libfuzzer_sys::Corpus::from(__libfuzzer_sys_run(data));
298                 result.to_libfuzzer_code()
299             }
300 
301             // See above for why this is split to a separate function.
302             #[inline(never)]
303             fn __libfuzzer_sys_run($data: $dty) -> $rty {
304                 $body
305             }
306         };
307     };
308 }
309 
310 /// Define a custom mutator.
311 ///
312 /// This is optional, and libFuzzer will use its own, default mutation strategy
313 /// if this is not provided.
314 ///
315 /// You might consider using a custom mutator when your fuzz target is very
316 /// particular about the shape of its input:
317 ///
318 /// * You want to fuzz "deeper" than just the parser.
319 /// * The input contains checksums that have to match the hash of some subset of
320 ///   the data or else the whole thing is invalid, and therefore mutating any of
321 ///   that subset means you need to recompute the checksums.
322 /// * Small random changes to the input buffer make it invalid.
323 ///
324 /// That is, a custom mutator is useful in similar situations where [a `T:
325 /// Arbitrary` input type](macro.fuzz_target.html#arbitrary-input-types) is
326 /// useful. Note that the two approaches are not mutually exclusive; you can use
327 /// whichever is easier for your problem domain or both!
328 ///
329 /// ## Implementation Contract
330 ///
331 /// The original, unmodified input is given in `data[..size]`.
332 ///
333 /// You must modify the data in place and return the new size.
334 ///
335 /// The new size should not be greater than `max_size`. If this is not the case,
336 /// then the `data` will be truncated to fit within `max_size`. Note that
337 /// `max_size < size` is possible when shrinking test cases.
338 ///
339 /// You must produce the same mutation given the same `seed`. Generally, when
340 /// choosing what kind of mutation to make or where to mutate, you should start
341 /// by creating a random number generator (RNG) that is seeded with the given
342 /// `seed` and then consult the RNG whenever making a decision:
343 ///
344 /// ```no_run
345 /// #![no_main]
346 ///
347 /// use rand::{rngs::StdRng, Rng, SeedableRng};
348 ///
349 /// libfuzzer_sys::fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {
350 ///     let mut rng = StdRng::seed_from_u64(seed as u64);
351 ///
352 /// #   let first_mutation = |_, _, _, _| todo!();
353 /// #   let second_mutation = |_, _, _, _| todo!();
354 /// #   let third_mutation = |_, _, _, _| todo!();
355 /// #   let fourth_mutation = |_, _, _, _| todo!();
356 ///     // Choose which of our four supported kinds of mutations we want to make.
357 ///     match rng.gen_range(0..4) {
358 ///         0 => first_mutation(rng, data, size, max_size),
359 ///         1 => second_mutation(rng, data, size, max_size),
360 ///         2 => third_mutation(rng, data, size, max_size),
361 ///         3 => fourth_mutation(rng, data, size, max_size),
362 ///         _ => unreachable!()
363 ///     }
364 /// });
365 /// ```
366 ///
367 /// ## Example: Compression
368 ///
369 /// Consider a simple fuzz target that takes compressed data as input,
370 /// decompresses it, and then asserts that the decompressed data doesn't begin
371 /// with "boom". It is difficult for `libFuzzer` (or any other fuzzer) to crash
372 /// this fuzz target because nearly all mutations it makes will invalidate the
373 /// compression format. Therefore, we use a custom mutator that decompresses the
374 /// raw input, mutates the decompressed data, and then recompresses it. This
375 /// allows `libFuzzer` to quickly discover crashing inputs.
376 ///
377 /// ```no_run
378 /// #![no_main]
379 ///
380 /// use flate2::{read::GzDecoder, write::GzEncoder, Compression};
381 /// use libfuzzer_sys::{fuzz_mutator, fuzz_target};
382 /// use std::io::{Read, Write};
383 ///
384 /// fuzz_target!(|data: &[u8]| {
385 ///     // Decompress the input data and crash if it starts with "boom".
386 ///     if let Some(data) = decompress(data) {
387 ///         if data.starts_with(b"boom") {
388 ///             panic!();
389 ///         }
390 ///     }
391 /// });
392 ///
393 /// fuzz_mutator!(
394 ///     |data: &mut [u8], size: usize, max_size: usize, _seed: u32| {
395 ///         // Decompress the input data. If that fails, use a dummy value.
396 ///         let mut decompressed = decompress(&data[..size]).unwrap_or_else(|| b"hi".to_vec());
397 ///
398 ///         // Mutate the decompressed data with `libFuzzer`'s default mutator. Make
399 ///         // the `decompressed` vec's extra capacity available for insertion
400 ///         // mutations via `resize`.
401 ///         let len = decompressed.len();
402 ///         let cap = decompressed.capacity();
403 ///         decompressed.resize(cap, 0);
404 ///         let new_decompressed_size = libfuzzer_sys::fuzzer_mutate(&mut decompressed, len, cap);
405 ///
406 ///         // Recompress the mutated data.
407 ///         let compressed = compress(&decompressed[..new_decompressed_size]);
408 ///
409 ///         // Copy the recompressed mutated data into `data` and return the new size.
410 ///         let new_size = std::cmp::min(max_size, compressed.len());
411 ///         data[..new_size].copy_from_slice(&compressed[..new_size]);
412 ///         new_size
413 ///     }
414 /// );
415 ///
416 /// fn decompress(compressed_data: &[u8]) -> Option<Vec<u8>> {
417 ///     let mut decoder = GzDecoder::new(compressed_data);
418 ///     let mut decompressed = Vec::new();
419 ///     if decoder.read_to_end(&mut decompressed).is_ok() {
420 ///         Some(decompressed)
421 ///     } else {
422 ///         None
423 ///     }
424 /// }
425 ///
426 /// fn compress(data: &[u8]) -> Vec<u8> {
427 ///     let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
428 ///     encoder
429 ///         .write_all(data)
430 ///         .expect("writing into a vec is infallible");
431 ///     encoder.finish().expect("writing into a vec is infallible")
432 /// }
433 /// ```
434 ///
435 /// This example is inspired by [a similar example from the official `libFuzzer`
436 /// docs](https://github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md#example-compression).
437 ///
438 /// ## More Example Ideas
439 ///
440 /// * A PNG custom mutator that decodes a PNG, mutates the image, and then
441 /// re-encodes the mutated image as a new PNG.
442 ///
443 /// * A [`serde`](https://serde.rs/) custom mutator that deserializes your
444 ///   structure, mutates it, and then reserializes it.
445 ///
446 /// * A Wasm binary custom mutator that inserts, replaces, and removes a
447 ///   bytecode instruction in a function's body.
448 ///
449 /// * An HTTP request custom mutator that inserts, replaces, and removes a
450 ///   header from an HTTP request.
451 #[macro_export]
452 macro_rules! fuzz_mutator {
453     (
454         |
455         $data:ident : &mut [u8] ,
456         $size:ident : usize ,
457         $max_size:ident : usize ,
458         $seed:ident : u32 $(,)*
459         |
460         $body:block
461     ) => {
462         /// Auto-generated function.
463         #[export_name = "LLVMFuzzerCustomMutator"]
464         pub fn rust_fuzzer_custom_mutator(
465             $data: *mut u8,
466             $size: usize,
467             $max_size: usize,
468             $seed: std::os::raw::c_uint,
469         ) -> usize {
470             // Depending on if we are growing or shrinking the test case, `size`
471             // might be larger or smaller than `max_size`. The `data`'s capacity
472             // is the maximum of the two.
473             let len = std::cmp::max($max_size, $size);
474             let $data: &mut [u8] = unsafe { std::slice::from_raw_parts_mut($data, len) };
475 
476             // `unsigned int` is generally a `u32`, but not on all targets. Do
477             // an infallible (and potentially lossy, but that's okay because it
478             // preserves determinism) conversion.
479             let $seed = $seed as u32;
480 
481             // Truncate the new size if it is larger than the max.
482             let new_size = { $body };
483             std::cmp::min(new_size, $max_size)
484         }
485     };
486 }
487 
488 /// The default `libFuzzer` mutator.
489 ///
490 /// You generally don't have to use this at all unless you're defining a
491 /// custom mutator with [the `fuzz_mutator!` macro][crate::fuzz_mutator].
492 ///
493 /// Mutates `data[..size]` in place such that the mutated data is no larger than
494 /// `max_size` and returns the new size of the mutated data.
495 ///
496 /// To only allow shrinking mutations, make `max_size < size`.
497 ///
498 /// To additionally allow mutations that grow the size of the data, make
499 /// `max_size > size`.
500 ///
501 /// Both `size` and `max_size` must be less than or equal to `data.len()`.
502 ///
503 /// # Example
504 ///
505 /// ```no_run
506 /// // Create some data in a buffer.
507 /// let mut data = vec![0; 128];
508 /// data[..b"hello".len()].copy_from_slice(b"hello");
509 ///
510 /// // Ask `libFuzzer` to mutate the data. By setting `max_size` to our buffer's
511 /// // full length, we are allowing `libFuzzer` to perform mutations that grow
512 /// // the size of the data, such as insertions.
513 /// let size = b"hello".len();
514 /// let max_size = data.len();
515 /// let new_size = libfuzzer_sys::fuzzer_mutate(&mut data, size, max_size);
516 ///
517 /// // Get the mutated data out of the buffer.
518 /// let mutated_data = &data[..new_size];
519 /// ```
fuzzer_mutate(data: &mut [u8], size: usize, max_size: usize) -> usize520 pub fn fuzzer_mutate(data: &mut [u8], size: usize, max_size: usize) -> usize {
521     assert!(size <= data.len());
522     assert!(max_size <= data.len());
523     let new_size = unsafe { LLVMFuzzerMutate(data.as_mut_ptr(), size, max_size) };
524     assert!(new_size <= data.len());
525     new_size
526 }
527