arbitrary/
lib.rs

1// Copyright © 2019 The Rust Fuzz Project Developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! The `Arbitrary` trait crate.
10//!
11//! This trait provides an [`Arbitrary`] trait to
12//! produce well-typed, structured values, from raw, byte buffers. It is
13//! generally intended to be used with fuzzers like AFL or libFuzzer. See the
14//! [`Arbitrary`] trait's documentation for details on
15//! automatically deriving, implementing, and/or using the trait.
16
17#![deny(bad_style)]
18#![deny(missing_docs)]
19#![deny(future_incompatible)]
20#![deny(nonstandard_style)]
21#![deny(rust_2018_compatibility)]
22#![deny(rust_2018_idioms)]
23#![deny(unused)]
24
25mod error;
26mod foreign;
27pub mod size_hint;
28pub mod unstructured;
29
30#[cfg(test)]
31mod tests;
32
33pub use error::*;
34
35#[cfg(feature = "derive_arbitrary")]
36pub use derive_arbitrary::*;
37
38#[doc(inline)]
39pub use unstructured::Unstructured;
40
41/// Error indicating that the maximum recursion depth has been reached while calculating [`Arbitrary::size_hint`]()
42#[derive(Debug, Clone)]
43#[non_exhaustive]
44pub struct MaxRecursionReached {}
45
46impl core::fmt::Display for MaxRecursionReached {
47    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
48        f.write_str("Maximum recursion depth has been reached")
49    }
50}
51
52impl std::error::Error for MaxRecursionReached {}
53
54/// Generate arbitrary structured values from raw, unstructured data.
55///
56/// The `Arbitrary` trait allows you to generate valid structured values, like
57/// `HashMap`s, or ASTs, or `MyTomlConfig`, or any other data structure from
58/// raw, unstructured bytes provided by a fuzzer.
59///
60/// # Deriving `Arbitrary`
61///
62/// Automatically deriving the `Arbitrary` trait is the recommended way to
63/// implement `Arbitrary` for your types.
64///
65/// Using the custom derive requires that you enable the `"derive"` cargo
66/// feature in your `Cargo.toml`:
67///
68/// ```toml
69/// [dependencies]
70/// arbitrary = { version = "1", features = ["derive"] }
71/// ```
72///
73/// Then, you add the `#[derive(Arbitrary)]` annotation to your `struct` or
74/// `enum` type definition:
75///
76/// ```
77/// # #[cfg(feature = "derive")] mod foo {
78/// use arbitrary::Arbitrary;
79/// use std::collections::HashSet;
80///
81/// #[derive(Arbitrary)]
82/// pub struct AddressBook {
83///     friends: HashSet<Friend>,
84/// }
85///
86/// #[derive(Arbitrary, Hash, Eq, PartialEq)]
87/// pub enum Friend {
88///     Buddy { name: String },
89///     Pal { age: usize },
90/// }
91/// # }
92/// ```
93///
94/// Every member of the `struct` or `enum` must also implement `Arbitrary`.
95///
96/// It is also possible to change the default bounds added by the derive:
97///
98/// ```
99/// # #[cfg(feature = "derive")] mod foo {
100/// use arbitrary::Arbitrary;
101///
102/// trait Trait {
103///     type Assoc: for<'a> Arbitrary<'a>;
104/// }
105///
106/// #[derive(Arbitrary)]
107/// // The bounds are used verbatim, so any existing trait bounds will need to be repeated.
108/// #[arbitrary(bound = "T: Trait")]
109/// struct Point<T: Trait> {
110///     x: T::Assoc,
111/// }
112/// # }
113/// ```
114///
115/// # Implementing `Arbitrary` By Hand
116///
117/// Implementing `Arbitrary` mostly involves nested calls to other `Arbitrary`
118/// arbitrary implementations for each of your `struct` or `enum`'s members. But
119/// sometimes you need some amount of raw data, or you need to generate a
120/// variably-sized collection type, or something of that sort. The
121/// [`Unstructured`] type helps you with these tasks.
122///
123/// ```
124/// # #[cfg(feature = "derive")] mod foo {
125/// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
126/// # impl<T> MyCollection<T> {
127/// #     pub fn new() -> Self { MyCollection { _t: std::marker::PhantomData } }
128/// #     pub fn insert(&mut self, element: T) {}
129/// # }
130/// use arbitrary::{Arbitrary, Result, Unstructured};
131///
132/// impl<'a, T> Arbitrary<'a> for MyCollection<T>
133/// where
134///     T: Arbitrary<'a>,
135/// {
136///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
137///         // Get an iterator of arbitrary `T`s.
138///         let iter = u.arbitrary_iter::<T>()?;
139///
140///         // And then create a collection!
141///         let mut my_collection = MyCollection::new();
142///         for elem_result in iter {
143///             let elem = elem_result?;
144///             my_collection.insert(elem);
145///         }
146///
147///         Ok(my_collection)
148///     }
149/// }
150/// # }
151/// ```
152///
153/// # A Note On Output Distributions
154///
155/// There is no requirement for a particular distribution of the values. For
156/// example, it is not required that every value appears with the same
157/// probability. That being said, the main use for `Arbitrary` is for fuzzing,
158/// so in many cases a uniform distribution will make the most sense in order to
159/// provide the best coverage of the domain. In other cases this is not
160/// desirable or even possible, for example when sampling from a uniform
161/// distribution is computationally expensive or in the case of collections that
162/// may grow indefinitely.
163pub trait Arbitrary<'a>: Sized {
164    /// Generate an arbitrary value of `Self` from the given unstructured data.
165    ///
166    /// Calling `Arbitrary::arbitrary` requires that you have some raw data,
167    /// perhaps given to you by a fuzzer like AFL or libFuzzer. You wrap this
168    /// raw data in an `Unstructured`, and then you can call `<MyType as
169    /// Arbitrary>::arbitrary` to construct an arbitrary instance of `MyType`
170    /// from that unstructured data.
171    ///
172    /// Implementations may return an error if there is not enough data to
173    /// construct a full instance of `Self`, or they may fill out the rest of
174    /// `Self` with dummy values. Using dummy values when the underlying data is
175    /// exhausted can help avoid accidentally "defeating" some of the fuzzer's
176    /// mutations to the underlying byte stream that might otherwise lead to
177    /// interesting runtime behavior or new code coverage if only we had just a
178    /// few more bytes. However, it also requires that implementations for
179    /// recursive types (e.g. `struct Foo(Option<Box<Foo>>)`) avoid infinite
180    /// recursion when the underlying data is exhausted.
181    ///
182    /// ```
183    /// # #[cfg(feature = "derive")] fn foo() {
184    /// use arbitrary::{Arbitrary, Unstructured};
185    ///
186    /// #[derive(Arbitrary)]
187    /// pub struct MyType {
188    ///     // ...
189    /// }
190    ///
191    /// // Get the raw data from the fuzzer or wherever else.
192    /// # let get_raw_data_from_fuzzer = || &[];
193    /// let raw_data: &[u8] = get_raw_data_from_fuzzer();
194    ///
195    /// // Wrap that raw data in an `Unstructured`.
196    /// let mut unstructured = Unstructured::new(raw_data);
197    ///
198    /// // Generate an arbitrary instance of `MyType` and do stuff with it.
199    /// if let Ok(value) = MyType::arbitrary(&mut unstructured) {
200    /// #   let do_stuff = |_| {};
201    ///     do_stuff(value);
202    /// }
203    /// # }
204    /// ```
205    ///
206    /// See also the documentation for [`Unstructured`].
207    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self>;
208
209    /// Generate an arbitrary value of `Self` from the entirety of the given
210    /// unstructured data.
211    ///
212    /// This is similar to Arbitrary::arbitrary, however it assumes that it is
213    /// the last consumer of the given data, and is thus able to consume it all
214    /// if it needs.  See also the documentation for
215    /// [`Unstructured`].
216    fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result<Self> {
217        Self::arbitrary(&mut u)
218    }
219
220    /// Get a size hint for how many bytes out of an `Unstructured` this type
221    /// needs to construct itself.
222    ///
223    /// This is useful for determining how many elements we should insert when
224    /// creating an arbitrary collection.
225    ///
226    /// The return value is similar to [`Iterator::size_hint`]: it returns a
227    /// tuple where the first element is a lower bound on the number of bytes
228    /// required, and the second element is an optional upper bound.
229    ///
230    /// The default implementation return `(0, None)` which is correct for any
231    /// type, but not ultimately that useful. Using `#[derive(Arbitrary)]` will
232    /// create a better implementation. If you are writing an `Arbitrary`
233    /// implementation by hand, and your type can be part of a dynamically sized
234    /// collection (such as `Vec`), you are strongly encouraged to override this
235    /// default with a better implementation, and also override
236    /// [`try_size_hint`].
237    ///
238    /// ## How to implement this
239    ///
240    /// If the size hint calculation is a trivial constant and does not recurse
241    /// into any other `size_hint` call, you should implement it in `size_hint`:
242    ///
243    /// ```
244    /// use arbitrary::{size_hint, Arbitrary, Result, Unstructured};
245    ///
246    /// struct SomeStruct(u8);
247    ///
248    /// impl<'a> Arbitrary<'a> for SomeStruct {
249    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
250    ///         let buf = &mut [0];
251    ///         u.fill_buffer(buf)?;
252    ///         Ok(SomeStruct(buf[0]))
253    ///     }
254    ///
255    ///     #[inline]
256    ///     fn size_hint(depth: usize) -> (usize, Option<usize>) {
257    ///         let _ = depth;
258    ///         (1, Some(1))
259    ///     }
260    /// }
261    /// ```
262    ///
263    /// Otherwise, it should instead be implemented in [`try_size_hint`],
264    /// and the `size_hint` implementation should forward to it:
265    ///
266    /// ```
267    /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Result, Unstructured};
268    ///
269    /// struct SomeStruct<A, B> {
270    ///     a: A,
271    ///     b: B,
272    /// }
273    ///
274    /// impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for SomeStruct<A, B> {
275    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
276    ///         // ...
277    /// #       todo!()
278    ///     }
279    ///
280    ///     fn size_hint(depth: usize) -> (usize, Option<usize>) {
281    ///         // Return the value of try_size_hint
282    ///         //
283    ///         // If the recursion fails, return the default, always valid `(0, None)`
284    ///         Self::try_size_hint(depth).unwrap_or_default()
285    ///     }
286    ///
287    ///     fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
288    ///         // Protect against potential infinite recursion with
289    ///         // `try_recursion_guard`.
290    ///         size_hint::try_recursion_guard(depth, |depth| {
291    ///             // If we aren't too deep, then `recursion_guard` calls
292    ///             // this closure, which implements the natural size hint.
293    ///             // Don't forget to use the new `depth` in all nested
294    ///             // `try_size_hint` calls! We recommend shadowing the
295    ///             // parameter, like what is done here, so that you can't
296    ///             // accidentally use the wrong depth.
297    ///             Ok(size_hint::and(
298    ///                 <A as Arbitrary>::try_size_hint(depth)?,
299    ///                 <B as Arbitrary>::try_size_hint(depth)?,
300    ///             ))
301    ///         })
302    ///     }
303    /// }
304    /// ```
305    ///
306    /// ## Invariant
307    ///
308    /// It must be possible to construct every possible output using only inputs
309    /// of lengths bounded by these parameters. This applies to both
310    /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
311    ///
312    /// This is trivially true for `(0, None)`. To restrict this further, it
313    /// must be proven that all inputs that are now excluded produced redundant
314    /// outputs which are still possible to produce using the reduced input
315    /// space.
316    ///
317    /// [iterator-size-hint]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.size_hint
318    /// [`try_size_hint`]: Arbitrary::try_size_hint
319    #[inline]
320    fn size_hint(depth: usize) -> (usize, Option<usize>) {
321        let _ = depth;
322        (0, None)
323    }
324
325    /// Get a size hint for how many bytes out of an `Unstructured` this type
326    /// needs to construct itself.
327    ///
328    /// Unlike [`size_hint`], this function keeps the information that the
329    /// recursion limit was reached. This is required to "short circuit" the
330    /// calculation and avoid exponential blowup with recursive structures.
331    ///
332    /// If you are implementing [`size_hint`] for a struct that could be
333    /// recursive, you should implement `try_size_hint` and call the
334    /// `try_size_hint` when recursing
335    ///
336    ///
337    /// The return value is similar to [`core::iter::Iterator::size_hint`]: it
338    /// returns a tuple where the first element is a lower bound on the number
339    /// of bytes required, and the second element is an optional upper bound.
340    ///
341    /// The default implementation returns the value of [`size_hint`] which is
342    /// correct for any type, but might lead to exponential blowup when dealing
343    /// with recursive types.
344    ///
345    /// ## Invariant
346    ///
347    /// It must be possible to construct every possible output using only inputs
348    /// of lengths bounded by these parameters. This applies to both
349    /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
350    ///
351    /// This is trivially true for `(0, None)`. To restrict this further, it
352    /// must be proven that all inputs that are now excluded produced redundant
353    /// outputs which are still possible to produce using the reduced input
354    /// space.
355    ///
356    /// ## When to implement `try_size_hint`
357    ///
358    /// If you 100% know that the type you are implementing `Arbitrary` for is
359    /// not a recursive type, or your implementation is not transitively calling
360    /// any other `size_hint` methods, you may implement [`size_hint`], and the
361    /// default `try_size_hint` implementation will use it.
362    ///
363    /// Note that if you are implementing `Arbitrary` for a generic type, you
364    /// cannot guarantee the lack of type recursion!
365    ///
366    /// Otherwise, when there is possible type recursion, you should implement
367    /// `try_size_hint` instead.
368    ///
369    /// ## The `depth` parameter
370    ///
371    /// When implementing `try_size_hint`, you need to use
372    /// [`arbitrary::size_hint::try_recursion_guard(depth)`][crate::size_hint::try_recursion_guard]
373    /// to prevent potential infinite recursion when calculating size hints for
374    /// potentially recursive types:
375    ///
376    /// ```
377    /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Unstructured};
378    ///
379    /// // This can potentially be a recursive type if `L` or `R` contain
380    /// // something like `Box<Option<MyEither<L, R>>>`!
381    /// enum MyEither<L, R> {
382    ///     Left(L),
383    ///     Right(R),
384    /// }
385    ///
386    /// impl<'a, L, R> Arbitrary<'a> for MyEither<L, R>
387    /// where
388    ///     L: Arbitrary<'a>,
389    ///     R: Arbitrary<'a>,
390    /// {
391    ///     fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
392    ///         // ...
393    /// #       unimplemented!()
394    ///     }
395    ///
396    ///     fn size_hint(depth: usize) -> (usize, Option<usize>) {
397    ///         // Return the value of `try_size_hint`
398    ///         //
399    ///         // If the recursion fails, return the default `(0, None)` range,
400    ///         // which is always valid.
401    ///         Self::try_size_hint(depth).unwrap_or_default()
402    ///     }
403    ///
404    ///     fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
405    ///         // Protect against potential infinite recursion with
406    ///         // `try_recursion_guard`.
407    ///         size_hint::try_recursion_guard(depth, |depth| {
408    ///             // If we aren't too deep, then `recursion_guard` calls
409    ///             // this closure, which implements the natural size hint.
410    ///             // Don't forget to use the new `depth` in all nested
411    ///             // `try_size_hint` calls! We recommend shadowing the
412    ///             // parameter, like what is done here, so that you can't
413    ///             // accidentally use the wrong depth.
414    ///             Ok(size_hint::or(
415    ///                 <L as Arbitrary>::try_size_hint(depth)?,
416    ///                 <R as Arbitrary>::try_size_hint(depth)?,
417    ///             ))
418    ///         })
419    ///     }
420    /// }
421    /// ```
422    #[inline]
423    fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
424        Ok(Self::size_hint(depth))
425    }
426}
427
428/// Multiple conflicting arbitrary attributes are used on the same field:
429/// ```compile_fail
430/// #[derive(::arbitrary::Arbitrary)]
431/// struct Point {
432///     #[arbitrary(value = 2)]
433///     #[arbitrary(value = 2)]
434///     x: i32,
435/// }
436/// ```
437///
438/// An unknown attribute:
439/// ```compile_fail
440/// #[derive(::arbitrary::Arbitrary)]
441/// struct Point {
442///     #[arbitrary(unknown_attr)]
443///     x: i32,
444/// }
445/// ```
446///
447/// An unknown attribute with a value:
448/// ```compile_fail
449/// #[derive(::arbitrary::Arbitrary)]
450/// struct Point {
451///     #[arbitrary(unknown_attr = 13)]
452///     x: i32,
453/// }
454/// ```
455///
456/// `value` without RHS:
457/// ```compile_fail
458/// #[derive(::arbitrary::Arbitrary)]
459/// struct Point {
460///     #[arbitrary(value)]
461///     x: i32,
462/// }
463/// ```
464///
465/// `with` without RHS:
466/// ```compile_fail
467/// #[derive(::arbitrary::Arbitrary)]
468/// struct Point {
469///     #[arbitrary(with)]
470///     x: i32,
471/// }
472/// ```
473///
474/// Multiple conflicting bounds at the container-level:
475/// ```compile_fail
476/// #[derive(::arbitrary::Arbitrary)]
477/// #[arbitrary(bound = "T: Default")]
478/// #[arbitrary(bound = "T: Default")]
479/// struct Point<T: Default> {
480///     #[arbitrary(default)]
481///     x: T,
482/// }
483/// ```
484///
485/// Multiple conflicting bounds in a single bound attribute:
486/// ```compile_fail
487/// #[derive(::arbitrary::Arbitrary)]
488/// #[arbitrary(bound = "T: Default, T: Default")]
489/// struct Point<T: Default> {
490///     #[arbitrary(default)]
491///     x: T,
492/// }
493/// ```
494///
495/// Multiple conflicting bounds in multiple bound attributes:
496/// ```compile_fail
497/// #[derive(::arbitrary::Arbitrary)]
498/// #[arbitrary(bound = "T: Default", bound = "T: Default")]
499/// struct Point<T: Default> {
500///     #[arbitrary(default)]
501///     x: T,
502/// }
503/// ```
504///
505/// Too many bounds supplied:
506/// ```compile_fail
507/// #[derive(::arbitrary::Arbitrary)]
508/// #[arbitrary(bound = "T: Default")]
509/// struct Point {
510///     x: i32,
511/// }
512/// ```
513///
514/// Too many bounds supplied across multiple attributes:
515/// ```compile_fail
516/// #[derive(::arbitrary::Arbitrary)]
517/// #[arbitrary(bound = "T: Default")]
518/// #[arbitrary(bound = "U: Default")]
519/// struct Point<T: Default> {
520///     #[arbitrary(default)]
521///     x: T,
522/// }
523/// ```
524///
525/// Attempt to use the derive attribute on an enum variant:
526/// ```compile_fail
527/// #[derive(::arbitrary::Arbitrary)]
528/// enum Enum<T: Default> {
529///     #[arbitrary(default)]
530///     Variant(T),
531/// }
532/// ```
533#[cfg(all(doctest, feature = "derive"))]
534pub struct CompileFailTests;
Morty Proxy This is a proxified and sanitized view of the page, visit original site.