foldhash/
fast.rs

1//! The foldhash implementation optimized for speed.
2
3use core::hash::{BuildHasher, Hasher};
4
5use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed};
6use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3};
7
8/// A [`Hasher`] instance implementing foldhash, optimized for speed.
9///
10/// While you can create one directly with [`FoldHasher::with_seed`], you
11/// most likely want to use [`RandomState`], [`SeedableRandomState`] or
12/// [`FixedState`] to create [`FoldHasher`]s.
13#[derive(Clone)]
14pub struct FoldHasher {
15    accumulator: u64,
16    sponge: u128,
17    sponge_len: u8,
18    fold_seed: u64,
19    expand_seed: u64,
20    expand_seed2: u64,
21    expand_seed3: u64,
22}
23
24impl FoldHasher {
25    /// Initializes this [`FoldHasher`] with the given per-hasher seed and
26    /// [`SharedSeed`].
27    #[inline]
28    pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
29        FoldHasher {
30            accumulator: per_hasher_seed,
31            sponge: 0,
32            sponge_len: 0,
33            fold_seed: shared_seed.seeds[0],
34            expand_seed: shared_seed.seeds[1],
35            expand_seed2: shared_seed.seeds[2],
36            expand_seed3: shared_seed.seeds[3],
37        }
38    }
39
40    #[inline(always)]
41    fn write_num<T: Into<u128>>(&mut self, x: T) {
42        let bits: usize = 8 * core::mem::size_of::<T>();
43        if self.sponge_len as usize + bits > 128 {
44            let lo = self.sponge as u64;
45            let hi = (self.sponge >> 64) as u64;
46            self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
47            self.sponge = x.into();
48            self.sponge_len = bits as u8;
49        } else {
50            self.sponge |= x.into() << self.sponge_len;
51            self.sponge_len += bits as u8;
52        }
53    }
54}
55
56impl Hasher for FoldHasher {
57    #[inline(always)]
58    fn write(&mut self, bytes: &[u8]) {
59        // We perform overlapping reads in the byte hash which could lead to
60        // trivial length-extension attacks. These should be defeated by
61        // adding a length-dependent rotation on our unpredictable seed
62        // which costs only a single cycle (or none if executed with
63        // instruction-level parallelism).
64        let len = bytes.len();
65        let base_seed = rotate_right(self.accumulator, len as u32);
66        if len <= 16 {
67            let mut s0 = base_seed;
68            let mut s1 = self.expand_seed;
69            // XOR the input into s0, s1, then multiply and fold.
70            if len >= 8 {
71                s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
72                s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
73            } else if len >= 4 {
74                s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
75                s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
76            } else if len > 0 {
77                let lo = bytes[0];
78                let mid = bytes[len / 2];
79                let hi = bytes[len - 1];
80                s0 ^= lo as u64;
81                s1 ^= ((hi as u64) << 8) | mid as u64;
82            }
83            self.accumulator = folded_multiply(s0, s1);
84        } else if len < 256 {
85            self.accumulator = hash_bytes_medium(
86                bytes,
87                base_seed,
88                base_seed.wrapping_add(self.expand_seed),
89                self.fold_seed,
90            );
91        } else {
92            self.accumulator = hash_bytes_long(
93                bytes,
94                base_seed,
95                base_seed.wrapping_add(self.expand_seed),
96                base_seed.wrapping_add(self.expand_seed2),
97                base_seed.wrapping_add(self.expand_seed3),
98                self.fold_seed,
99            );
100        }
101    }
102
103    #[inline(always)]
104    fn write_u8(&mut self, i: u8) {
105        self.write_num(i);
106    }
107
108    #[inline(always)]
109    fn write_u16(&mut self, i: u16) {
110        self.write_num(i);
111    }
112
113    #[inline(always)]
114    fn write_u32(&mut self, i: u32) {
115        self.write_num(i);
116    }
117
118    #[inline(always)]
119    fn write_u64(&mut self, i: u64) {
120        self.write_num(i);
121    }
122
123    #[inline(always)]
124    fn write_u128(&mut self, i: u128) {
125        let lo = i as u64;
126        let hi = (i >> 64) as u64;
127        self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
128    }
129
130    #[inline(always)]
131    fn write_usize(&mut self, i: usize) {
132        // u128 doesn't implement From<usize>.
133        #[cfg(target_pointer_width = "32")]
134        self.write_num(i as u32);
135        #[cfg(target_pointer_width = "64")]
136        self.write_num(i as u64);
137    }
138
139    #[inline(always)]
140    fn finish(&self) -> u64 {
141        if self.sponge_len > 0 {
142            let lo = self.sponge as u64;
143            let hi = (self.sponge >> 64) as u64;
144            folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed)
145        } else {
146            self.accumulator
147        }
148    }
149}
150
151/// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized.
152#[derive(Copy, Clone, Debug)]
153pub struct RandomState {
154    per_hasher_seed: u64,
155    global_seed: GlobalSeed,
156}
157
158impl Default for RandomState {
159    #[inline(always)]
160    fn default() -> Self {
161        Self {
162            per_hasher_seed: gen_per_hasher_seed(),
163            global_seed: GlobalSeed::new(),
164        }
165    }
166}
167
168impl BuildHasher for RandomState {
169    type Hasher = FoldHasher;
170
171    #[inline(always)]
172    fn build_hasher(&self) -> FoldHasher {
173        FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
174    }
175}
176
177/// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly
178/// initialized by default, but can also be initialized with a specific seed.
179///
180/// This can be useful for e.g. testing, but the downside is that this type
181/// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is.
182#[derive(Copy, Clone, Debug)]
183pub struct SeedableRandomState {
184    per_hasher_seed: u64,
185    shared_seed: &'static SharedSeed,
186}
187
188impl Default for SeedableRandomState {
189    #[inline(always)]
190    fn default() -> Self {
191        Self::random()
192    }
193}
194
195impl SeedableRandomState {
196    /// Generates a random [`SeedableRandomState`], similar to [`RandomState`].
197    #[inline(always)]
198    pub fn random() -> Self {
199        Self {
200            per_hasher_seed: gen_per_hasher_seed(),
201            shared_seed: SharedSeed::global_random(),
202        }
203    }
204
205    /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`].
206    #[inline(always)]
207    pub fn fixed() -> Self {
208        Self {
209            per_hasher_seed: ARBITRARY3,
210            shared_seed: SharedSeed::global_fixed(),
211        }
212    }
213
214    /// Generates a [`SeedableRandomState`] with the given per-hasher seed
215    /// and [`SharedSeed`].
216    #[inline(always)]
217    pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self {
218        // XOR with ARBITRARY3 such that with_seed(0) matches default.
219        Self {
220            per_hasher_seed: per_hasher_seed ^ ARBITRARY3,
221            shared_seed,
222        }
223    }
224}
225
226impl BuildHasher for SeedableRandomState {
227    type Hasher = FoldHasher;
228
229    #[inline(always)]
230    fn build_hasher(&self) -> FoldHasher {
231        FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed)
232    }
233}
234
235/// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed.
236///
237/// Not recommended unless you absolutely need determinism.
238#[derive(Copy, Clone, Debug)]
239pub struct FixedState {
240    per_hasher_seed: u64,
241}
242
243impl FixedState {
244    /// Creates a [`FixedState`] with the given per-hasher-seed.
245    #[inline(always)]
246    pub const fn with_seed(per_hasher_seed: u64) -> Self {
247        // XOR with ARBITRARY3 such that with_seed(0) matches default.
248        Self {
249            per_hasher_seed: per_hasher_seed ^ ARBITRARY3,
250        }
251    }
252}
253
254impl Default for FixedState {
255    #[inline(always)]
256    fn default() -> Self {
257        Self {
258            per_hasher_seed: ARBITRARY3,
259        }
260    }
261}
262
263impl BuildHasher for FixedState {
264    type Hasher = FoldHasher;
265
266    #[inline(always)]
267    fn build_hasher(&self) -> FoldHasher {
268        FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed())
269    }
270}