arbitrary/lib.rs
1// Copyright © 2019 The Rust Fuzz Project Developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! The `Arbitrary` trait crate.
10//!
11//! This trait provides an [`Arbitrary`] trait to
12//! produce well-typed, structured values, from raw, byte buffers. It is
13//! generally intended to be used with fuzzers like AFL or libFuzzer. See the
14//! [`Arbitrary`] trait's documentation for details on
15//! automatically deriving, implementing, and/or using the trait.
16
17#![deny(bad_style)]
18#![deny(missing_docs)]
19#![deny(future_incompatible)]
20#![deny(nonstandard_style)]
21#![deny(rust_2018_compatibility)]
22#![deny(rust_2018_idioms)]
23#![deny(unused)]
24
25mod error;
26mod foreign;
27pub mod size_hint;
28pub mod unstructured;
29
30#[cfg(test)]
31mod tests;
32
33pub use error::*;
34
35#[cfg(feature = "derive_arbitrary")]
36pub use derive_arbitrary::*;
37
38#[doc(inline)]
39pub use unstructured::Unstructured;
40
41/// Error indicating that the maximum recursion depth has been reached while calculating [`Arbitrary::size_hint`]()
42#[derive(Debug, Clone)]
43#[non_exhaustive]
44pub struct MaxRecursionReached {}
45
46impl core::fmt::Display for MaxRecursionReached {
47 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
48 f.write_str("Maximum recursion depth has been reached")
49 }
50}
51
52impl std::error::Error for MaxRecursionReached {}
53
54/// Generate arbitrary structured values from raw, unstructured data.
55///
56/// The `Arbitrary` trait allows you to generate valid structured values, like
57/// `HashMap`s, or ASTs, or `MyTomlConfig`, or any other data structure from
58/// raw, unstructured bytes provided by a fuzzer.
59///
60/// # Deriving `Arbitrary`
61///
62/// Automatically deriving the `Arbitrary` trait is the recommended way to
63/// implement `Arbitrary` for your types.
64///
65/// Using the custom derive requires that you enable the `"derive"` cargo
66/// feature in your `Cargo.toml`:
67///
68/// ```toml
69/// [dependencies]
70/// arbitrary = { version = "1", features = ["derive"] }
71/// ```
72///
73/// Then, you add the `#[derive(Arbitrary)]` annotation to your `struct` or
74/// `enum` type definition:
75///
76/// ```
77/// # #[cfg(feature = "derive")] mod foo {
78/// use arbitrary::Arbitrary;
79/// use std::collections::HashSet;
80///
81/// #[derive(Arbitrary)]
82/// pub struct AddressBook {
83/// friends: HashSet<Friend>,
84/// }
85///
86/// #[derive(Arbitrary, Hash, Eq, PartialEq)]
87/// pub enum Friend {
88/// Buddy { name: String },
89/// Pal { age: usize },
90/// }
91/// # }
92/// ```
93///
94/// Every member of the `struct` or `enum` must also implement `Arbitrary`.
95///
96/// It is also possible to change the default bounds added by the derive:
97///
98/// ```
99/// # #[cfg(feature = "derive")] mod foo {
100/// use arbitrary::Arbitrary;
101///
102/// trait Trait {
103/// type Assoc: for<'a> Arbitrary<'a>;
104/// }
105///
106/// #[derive(Arbitrary)]
107/// // The bounds are used verbatim, so any existing trait bounds will need to be repeated.
108/// #[arbitrary(bound = "T: Trait")]
109/// struct Point<T: Trait> {
110/// x: T::Assoc,
111/// }
112/// # }
113/// ```
114///
115/// # Implementing `Arbitrary` By Hand
116///
117/// Implementing `Arbitrary` mostly involves nested calls to other `Arbitrary`
118/// arbitrary implementations for each of your `struct` or `enum`'s members. But
119/// sometimes you need some amount of raw data, or you need to generate a
120/// variably-sized collection type, or something of that sort. The
121/// [`Unstructured`] type helps you with these tasks.
122///
123/// ```
124/// # #[cfg(feature = "derive")] mod foo {
125/// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
126/// # impl<T> MyCollection<T> {
127/// # pub fn new() -> Self { MyCollection { _t: std::marker::PhantomData } }
128/// # pub fn insert(&mut self, element: T) {}
129/// # }
130/// use arbitrary::{Arbitrary, Result, Unstructured};
131///
132/// impl<'a, T> Arbitrary<'a> for MyCollection<T>
133/// where
134/// T: Arbitrary<'a>,
135/// {
136/// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
137/// // Get an iterator of arbitrary `T`s.
138/// let iter = u.arbitrary_iter::<T>()?;
139///
140/// // And then create a collection!
141/// let mut my_collection = MyCollection::new();
142/// for elem_result in iter {
143/// let elem = elem_result?;
144/// my_collection.insert(elem);
145/// }
146///
147/// Ok(my_collection)
148/// }
149/// }
150/// # }
151/// ```
152///
153/// # A Note On Output Distributions
154///
155/// There is no requirement for a particular distribution of the values. For
156/// example, it is not required that every value appears with the same
157/// probability. That being said, the main use for `Arbitrary` is for fuzzing,
158/// so in many cases a uniform distribution will make the most sense in order to
159/// provide the best coverage of the domain. In other cases this is not
160/// desirable or even possible, for example when sampling from a uniform
161/// distribution is computationally expensive or in the case of collections that
162/// may grow indefinitely.
163pub trait Arbitrary<'a>: Sized {
164 /// Generate an arbitrary value of `Self` from the given unstructured data.
165 ///
166 /// Calling `Arbitrary::arbitrary` requires that you have some raw data,
167 /// perhaps given to you by a fuzzer like AFL or libFuzzer. You wrap this
168 /// raw data in an `Unstructured`, and then you can call `<MyType as
169 /// Arbitrary>::arbitrary` to construct an arbitrary instance of `MyType`
170 /// from that unstructured data.
171 ///
172 /// Implementations may return an error if there is not enough data to
173 /// construct a full instance of `Self`, or they may fill out the rest of
174 /// `Self` with dummy values. Using dummy values when the underlying data is
175 /// exhausted can help avoid accidentally "defeating" some of the fuzzer's
176 /// mutations to the underlying byte stream that might otherwise lead to
177 /// interesting runtime behavior or new code coverage if only we had just a
178 /// few more bytes. However, it also requires that implementations for
179 /// recursive types (e.g. `struct Foo(Option<Box<Foo>>)`) avoid infinite
180 /// recursion when the underlying data is exhausted.
181 ///
182 /// ```
183 /// # #[cfg(feature = "derive")] fn foo() {
184 /// use arbitrary::{Arbitrary, Unstructured};
185 ///
186 /// #[derive(Arbitrary)]
187 /// pub struct MyType {
188 /// // ...
189 /// }
190 ///
191 /// // Get the raw data from the fuzzer or wherever else.
192 /// # let get_raw_data_from_fuzzer = || &[];
193 /// let raw_data: &[u8] = get_raw_data_from_fuzzer();
194 ///
195 /// // Wrap that raw data in an `Unstructured`.
196 /// let mut unstructured = Unstructured::new(raw_data);
197 ///
198 /// // Generate an arbitrary instance of `MyType` and do stuff with it.
199 /// if let Ok(value) = MyType::arbitrary(&mut unstructured) {
200 /// # let do_stuff = |_| {};
201 /// do_stuff(value);
202 /// }
203 /// # }
204 /// ```
205 ///
206 /// See also the documentation for [`Unstructured`].
207 fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self>;
208
209 /// Generate an arbitrary value of `Self` from the entirety of the given
210 /// unstructured data.
211 ///
212 /// This is similar to Arbitrary::arbitrary, however it assumes that it is
213 /// the last consumer of the given data, and is thus able to consume it all
214 /// if it needs. See also the documentation for
215 /// [`Unstructured`].
216 fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result<Self> {
217 Self::arbitrary(&mut u)
218 }
219
220 /// Get a size hint for how many bytes out of an `Unstructured` this type
221 /// needs to construct itself.
222 ///
223 /// This is useful for determining how many elements we should insert when
224 /// creating an arbitrary collection.
225 ///
226 /// The return value is similar to [`Iterator::size_hint`]: it returns a
227 /// tuple where the first element is a lower bound on the number of bytes
228 /// required, and the second element is an optional upper bound.
229 ///
230 /// The default implementation return `(0, None)` which is correct for any
231 /// type, but not ultimately that useful. Using `#[derive(Arbitrary)]` will
232 /// create a better implementation. If you are writing an `Arbitrary`
233 /// implementation by hand, and your type can be part of a dynamically sized
234 /// collection (such as `Vec`), you are strongly encouraged to override this
235 /// default with a better implementation, and also override
236 /// [`try_size_hint`].
237 ///
238 /// ## How to implement this
239 ///
240 /// If the size hint calculation is a trivial constant and does not recurse
241 /// into any other `size_hint` call, you should implement it in `size_hint`:
242 ///
243 /// ```
244 /// use arbitrary::{size_hint, Arbitrary, Result, Unstructured};
245 ///
246 /// struct SomeStruct(u8);
247 ///
248 /// impl<'a> Arbitrary<'a> for SomeStruct {
249 /// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
250 /// let buf = &mut [0];
251 /// u.fill_buffer(buf)?;
252 /// Ok(SomeStruct(buf[0]))
253 /// }
254 ///
255 /// #[inline]
256 /// fn size_hint(depth: usize) -> (usize, Option<usize>) {
257 /// let _ = depth;
258 /// (1, Some(1))
259 /// }
260 /// }
261 /// ```
262 ///
263 /// Otherwise, it should instead be implemented in [`try_size_hint`],
264 /// and the `size_hint` implementation should forward to it:
265 ///
266 /// ```
267 /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Result, Unstructured};
268 ///
269 /// struct SomeStruct<A, B> {
270 /// a: A,
271 /// b: B,
272 /// }
273 ///
274 /// impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for SomeStruct<A, B> {
275 /// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
276 /// // ...
277 /// # todo!()
278 /// }
279 ///
280 /// fn size_hint(depth: usize) -> (usize, Option<usize>) {
281 /// // Return the value of try_size_hint
282 /// //
283 /// // If the recursion fails, return the default, always valid `(0, None)`
284 /// Self::try_size_hint(depth).unwrap_or_default()
285 /// }
286 ///
287 /// fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
288 /// // Protect against potential infinite recursion with
289 /// // `try_recursion_guard`.
290 /// size_hint::try_recursion_guard(depth, |depth| {
291 /// // If we aren't too deep, then `recursion_guard` calls
292 /// // this closure, which implements the natural size hint.
293 /// // Don't forget to use the new `depth` in all nested
294 /// // `try_size_hint` calls! We recommend shadowing the
295 /// // parameter, like what is done here, so that you can't
296 /// // accidentally use the wrong depth.
297 /// Ok(size_hint::and(
298 /// <A as Arbitrary>::try_size_hint(depth)?,
299 /// <B as Arbitrary>::try_size_hint(depth)?,
300 /// ))
301 /// })
302 /// }
303 /// }
304 /// ```
305 ///
306 /// ## Invariant
307 ///
308 /// It must be possible to construct every possible output using only inputs
309 /// of lengths bounded by these parameters. This applies to both
310 /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
311 ///
312 /// This is trivially true for `(0, None)`. To restrict this further, it
313 /// must be proven that all inputs that are now excluded produced redundant
314 /// outputs which are still possible to produce using the reduced input
315 /// space.
316 ///
317 /// [iterator-size-hint]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.size_hint
318 /// [`try_size_hint`]: Arbitrary::try_size_hint
319 #[inline]
320 fn size_hint(depth: usize) -> (usize, Option<usize>) {
321 let _ = depth;
322 (0, None)
323 }
324
325 /// Get a size hint for how many bytes out of an `Unstructured` this type
326 /// needs to construct itself.
327 ///
328 /// Unlike [`size_hint`], this function keeps the information that the
329 /// recursion limit was reached. This is required to "short circuit" the
330 /// calculation and avoid exponential blowup with recursive structures.
331 ///
332 /// If you are implementing [`size_hint`] for a struct that could be
333 /// recursive, you should implement `try_size_hint` and call the
334 /// `try_size_hint` when recursing
335 ///
336 ///
337 /// The return value is similar to [`core::iter::Iterator::size_hint`]: it
338 /// returns a tuple where the first element is a lower bound on the number
339 /// of bytes required, and the second element is an optional upper bound.
340 ///
341 /// The default implementation returns the value of [`size_hint`] which is
342 /// correct for any type, but might lead to exponential blowup when dealing
343 /// with recursive types.
344 ///
345 /// ## Invariant
346 ///
347 /// It must be possible to construct every possible output using only inputs
348 /// of lengths bounded by these parameters. This applies to both
349 /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
350 ///
351 /// This is trivially true for `(0, None)`. To restrict this further, it
352 /// must be proven that all inputs that are now excluded produced redundant
353 /// outputs which are still possible to produce using the reduced input
354 /// space.
355 ///
356 /// ## When to implement `try_size_hint`
357 ///
358 /// If you 100% know that the type you are implementing `Arbitrary` for is
359 /// not a recursive type, or your implementation is not transitively calling
360 /// any other `size_hint` methods, you may implement [`size_hint`], and the
361 /// default `try_size_hint` implementation will use it.
362 ///
363 /// Note that if you are implementing `Arbitrary` for a generic type, you
364 /// cannot guarantee the lack of type recursion!
365 ///
366 /// Otherwise, when there is possible type recursion, you should implement
367 /// `try_size_hint` instead.
368 ///
369 /// ## The `depth` parameter
370 ///
371 /// When implementing `try_size_hint`, you need to use
372 /// [`arbitrary::size_hint::try_recursion_guard(depth)`][crate::size_hint::try_recursion_guard]
373 /// to prevent potential infinite recursion when calculating size hints for
374 /// potentially recursive types:
375 ///
376 /// ```
377 /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Unstructured};
378 ///
379 /// // This can potentially be a recursive type if `L` or `R` contain
380 /// // something like `Box<Option<MyEither<L, R>>>`!
381 /// enum MyEither<L, R> {
382 /// Left(L),
383 /// Right(R),
384 /// }
385 ///
386 /// impl<'a, L, R> Arbitrary<'a> for MyEither<L, R>
387 /// where
388 /// L: Arbitrary<'a>,
389 /// R: Arbitrary<'a>,
390 /// {
391 /// fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
392 /// // ...
393 /// # unimplemented!()
394 /// }
395 ///
396 /// fn size_hint(depth: usize) -> (usize, Option<usize>) {
397 /// // Return the value of `try_size_hint`
398 /// //
399 /// // If the recursion fails, return the default `(0, None)` range,
400 /// // which is always valid.
401 /// Self::try_size_hint(depth).unwrap_or_default()
402 /// }
403 ///
404 /// fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
405 /// // Protect against potential infinite recursion with
406 /// // `try_recursion_guard`.
407 /// size_hint::try_recursion_guard(depth, |depth| {
408 /// // If we aren't too deep, then `recursion_guard` calls
409 /// // this closure, which implements the natural size hint.
410 /// // Don't forget to use the new `depth` in all nested
411 /// // `try_size_hint` calls! We recommend shadowing the
412 /// // parameter, like what is done here, so that you can't
413 /// // accidentally use the wrong depth.
414 /// Ok(size_hint::or(
415 /// <L as Arbitrary>::try_size_hint(depth)?,
416 /// <R as Arbitrary>::try_size_hint(depth)?,
417 /// ))
418 /// })
419 /// }
420 /// }
421 /// ```
422 #[inline]
423 fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
424 Ok(Self::size_hint(depth))
425 }
426}
427
428/// Multiple conflicting arbitrary attributes are used on the same field:
429/// ```compile_fail
430/// #[derive(::arbitrary::Arbitrary)]
431/// struct Point {
432/// #[arbitrary(value = 2)]
433/// #[arbitrary(value = 2)]
434/// x: i32,
435/// }
436/// ```
437///
438/// An unknown attribute:
439/// ```compile_fail
440/// #[derive(::arbitrary::Arbitrary)]
441/// struct Point {
442/// #[arbitrary(unknown_attr)]
443/// x: i32,
444/// }
445/// ```
446///
447/// An unknown attribute with a value:
448/// ```compile_fail
449/// #[derive(::arbitrary::Arbitrary)]
450/// struct Point {
451/// #[arbitrary(unknown_attr = 13)]
452/// x: i32,
453/// }
454/// ```
455///
456/// `value` without RHS:
457/// ```compile_fail
458/// #[derive(::arbitrary::Arbitrary)]
459/// struct Point {
460/// #[arbitrary(value)]
461/// x: i32,
462/// }
463/// ```
464///
465/// `with` without RHS:
466/// ```compile_fail
467/// #[derive(::arbitrary::Arbitrary)]
468/// struct Point {
469/// #[arbitrary(with)]
470/// x: i32,
471/// }
472/// ```
473///
474/// Multiple conflicting bounds at the container-level:
475/// ```compile_fail
476/// #[derive(::arbitrary::Arbitrary)]
477/// #[arbitrary(bound = "T: Default")]
478/// #[arbitrary(bound = "T: Default")]
479/// struct Point<T: Default> {
480/// #[arbitrary(default)]
481/// x: T,
482/// }
483/// ```
484///
485/// Multiple conflicting bounds in a single bound attribute:
486/// ```compile_fail
487/// #[derive(::arbitrary::Arbitrary)]
488/// #[arbitrary(bound = "T: Default, T: Default")]
489/// struct Point<T: Default> {
490/// #[arbitrary(default)]
491/// x: T,
492/// }
493/// ```
494///
495/// Multiple conflicting bounds in multiple bound attributes:
496/// ```compile_fail
497/// #[derive(::arbitrary::Arbitrary)]
498/// #[arbitrary(bound = "T: Default", bound = "T: Default")]
499/// struct Point<T: Default> {
500/// #[arbitrary(default)]
501/// x: T,
502/// }
503/// ```
504///
505/// Too many bounds supplied:
506/// ```compile_fail
507/// #[derive(::arbitrary::Arbitrary)]
508/// #[arbitrary(bound = "T: Default")]
509/// struct Point {
510/// x: i32,
511/// }
512/// ```
513///
514/// Too many bounds supplied across multiple attributes:
515/// ```compile_fail
516/// #[derive(::arbitrary::Arbitrary)]
517/// #[arbitrary(bound = "T: Default")]
518/// #[arbitrary(bound = "U: Default")]
519/// struct Point<T: Default> {
520/// #[arbitrary(default)]
521/// x: T,
522/// }
523/// ```
524///
525/// Attempt to use the derive attribute on an enum variant:
526/// ```compile_fail
527/// #[derive(::arbitrary::Arbitrary)]
528/// enum Enum<T: Default> {
529/// #[arbitrary(default)]
530/// Variant(T),
531/// }
532/// ```
533#[cfg(all(doctest, feature = "derive"))]
534pub struct CompileFailTests;