foldhash/seed.rs
1// These constants may end up unused depending on platform support.
2#[allow(unused)]
3use crate::{ARBITRARY1, ARBITRARY9};
4
5use super::{folded_multiply, ARBITRARY2, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7};
6
7/// Used for FixedState, and RandomState if atomics for dynamic init are unavailable.
8const FIXED_GLOBAL_SEED: SharedSeed = SharedSeed {
9 seeds: [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7],
10};
11
12pub(crate) fn gen_per_hasher_seed() -> u64 {
13 // We initialize the per-hasher seed with the stack pointer to ensure
14 // different threads have different seeds, with as side benefit that
15 // stack address randomization gives us further non-determinism.
16 let mut per_hasher_seed = 0;
17 let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64;
18 per_hasher_seed = stack_ptr;
19
20 // If we have the standard library available we use a thread-local
21 // state to ensure RandomStates are different with high probability,
22 // even if the call stack is the same.
23 #[cfg(feature = "std")]
24 {
25 use std::cell::Cell;
26 thread_local! {
27 static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) };
28 }
29
30 PER_HASHER_NONDETERMINISM.with(|cell| {
31 let nondeterminism = cell.get();
32 per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
33 cell.set(per_hasher_seed);
34 })
35 };
36
37 // If we don't have the standard library we instead use a global
38 // atomic instead of a thread-local state.
39 //
40 // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner,
41 // but this doesn't matter in practice - it is impossible that two
42 // different threads have the same stack location, so they'll almost
43 // surely generate different seeds, and provide a different possible
44 // update for PER_HASHER_NONDETERMINISM. If we would use a proper
45 // fetch_add atomic update then there is a larger chance of
46 // problematic contention.
47 //
48 // We use usize instead of 64-bit atomics for best platform support.
49 #[cfg(not(feature = "std"))]
50 {
51 use core::sync::atomic::{AtomicUsize, Ordering};
52 static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
53
54 let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
55 per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
56 PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
57 }
58
59 // One extra mixing step to ensure good random bits.
60 folded_multiply(per_hasher_seed, ARBITRARY2)
61}
62
63/// A random seed intended to be shared by many different foldhash instances.
64///
65/// This seed is consumed by [`FoldHasher::with_seed`](crate::fast::FoldHasher::with_seed),
66/// and [`SeedableRandomState::with_seed`](crate::fast::SeedableRandomState::with_seed).
67#[derive(Clone, Debug)]
68pub struct SharedSeed {
69 pub(crate) seeds: [u64; 4],
70}
71
72impl SharedSeed {
73 /// Returns the globally shared randomly initialized [`SharedSeed`] as used
74 /// by [`RandomState`](crate::fast::RandomState).
75 #[inline(always)]
76 pub fn global_random() -> &'static SharedSeed {
77 global::GlobalSeed::new().get()
78 }
79
80 /// Returns the globally shared fixed [`SharedSeed`] as used
81 /// by [`FixedState`](crate::fast::FixedState).
82 #[inline(always)]
83 pub const fn global_fixed() -> &'static SharedSeed {
84 &FIXED_GLOBAL_SEED
85 }
86
87 /// Generates a new [`SharedSeed`] from a single 64-bit seed.
88 ///
89 /// Note that this is somewhat expensive so it is suggested to re-use the
90 /// [`SharedSeed`] as much as possible, using the per-hasher seed to
91 /// differentiate between hash instances.
92 pub const fn from_u64(seed: u64) -> Self {
93 macro_rules! mix {
94 ($x: expr) => {
95 folded_multiply($x, ARBITRARY9)
96 };
97 }
98
99 let seed_a = mix!(mix!(mix!(seed)));
100 let seed_b = mix!(mix!(mix!(seed_a)));
101 let seed_c = mix!(mix!(mix!(seed_b)));
102 let seed_d = mix!(mix!(mix!(seed_c)));
103
104 // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
105 // a common input. So we want our global seeds that are XOR'ed with the
106 // input to always be non-zero. To also ensure there is always a good spread
107 // of bits, we give up 3 bits of entropy and simply force some bits on.
108 const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
109 Self {
110 seeds: [
111 seed_a | FORCED_ONES,
112 seed_b | FORCED_ONES,
113 seed_c | FORCED_ONES,
114 seed_d | FORCED_ONES,
115 ],
116 }
117 }
118}
119
120#[cfg(target_has_atomic = "8")]
121mod global {
122 use super::*;
123 use core::cell::UnsafeCell;
124 use core::sync::atomic::{AtomicU8, Ordering};
125
126 fn generate_global_seed() -> SharedSeed {
127 let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
128
129 // Use address space layout randomization as our main randomness source.
130 // This isn't great, but we don't advertise HashDoS resistance in the first
131 // place. This is a whole lot better than nothing, at near zero cost with
132 // no dependencies.
133 let mut seed = 0;
134 let stack_ptr = &seed as *const _;
135 let func_ptr = generate_global_seed;
136 let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
137 seed = mix(seed, stack_ptr as usize as u64);
138 seed = mix(seed, func_ptr as usize as u64);
139 seed = mix(seed, static_ptr as usize as u64);
140
141 // If we have the standard library available, augment entropy with the
142 // current time and an address from the allocator.
143 #[cfg(feature = "std")]
144 {
145 #[cfg(not(any(
146 miri,
147 all(target_family = "wasm", target_os = "unknown"),
148 target_os = "zkvm"
149 )))]
150 if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() {
151 seed = mix(seed, duration.subsec_nanos() as u64);
152 seed = mix(seed, duration.as_secs());
153 }
154
155 let box_ptr = &*Box::new(0u8) as *const _;
156 seed = mix(seed, box_ptr as usize as u64);
157 }
158
159 SharedSeed::from_u64(seed)
160 }
161
162 // Now all the below code purely exists to cache the above seed as
163 // efficiently as possible. Even if we weren't a no_std crate and had access to
164 // OnceLock, we don't want to check whether the global is set each time we
165 // hash an object, so we hand-roll a global storage where type safety allows us
166 // to assume the storage is initialized after construction.
167 struct GlobalSeedStorage {
168 state: AtomicU8,
169 seed: UnsafeCell<SharedSeed>,
170 }
171
172 const UNINIT: u8 = 0;
173 const LOCKED: u8 = 1;
174 const INIT: u8 = 2;
175
176 // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
177 // LOCKED state, and only read the UnsafeCells when state is in the
178 // once-achieved-eternally-preserved state INIT.
179 unsafe impl Sync for GlobalSeedStorage {}
180
181 static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
182 state: AtomicU8::new(UNINIT),
183 seed: UnsafeCell::new(SharedSeed { seeds: [0; 4] }),
184 };
185
186 /// An object representing an initialized global seed.
187 ///
188 /// Does not actually store the seed inside itself, it is a zero-sized type.
189 /// This prevents inflating the RandomState size and in turn HashMap's size.
190 #[derive(Copy, Clone, Debug)]
191 pub struct GlobalSeed {
192 // So we can't accidentally type GlobalSeed { } within this crate.
193 _no_accidental_unsafe_init: (),
194 }
195
196 impl GlobalSeed {
197 #[inline(always)]
198 pub fn new() -> Self {
199 if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
200 Self::init_slow()
201 }
202 Self {
203 _no_accidental_unsafe_init: (),
204 }
205 }
206
207 #[cold]
208 #[inline(never)]
209 fn init_slow() {
210 // Generate seed outside of critical section.
211 let seed = generate_global_seed();
212
213 loop {
214 match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
215 UNINIT,
216 LOCKED,
217 Ordering::Acquire,
218 Ordering::Acquire,
219 ) {
220 Ok(_) => unsafe {
221 // SAFETY: we just acquired an exclusive lock.
222 *GLOBAL_SEED_STORAGE.seed.get() = seed;
223 GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
224 return;
225 },
226
227 Err(INIT) => return,
228
229 // Yes, it's a spin loop. We need to support no_std (so no easy
230 // access to proper locks), this is a one-time-per-program
231 // initialization, and the critical section is only a few
232 // store instructions, so it'll be fine.
233 _ => core::hint::spin_loop(),
234 }
235 }
236 }
237
238 #[inline(always)]
239 pub fn get(self) -> &'static SharedSeed {
240 // SAFETY: our constructor ensured we are in the INIT state and thus
241 // this raw read does not race with any write.
242 unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
243 }
244 }
245}
246
247#[cfg(not(target_has_atomic = "8"))]
248mod global {
249 use super::*;
250
251 #[derive(Copy, Clone, Debug)]
252 pub struct GlobalSeed {}
253
254 impl GlobalSeed {
255 #[inline(always)]
256 pub fn new() -> Self {
257 Self {}
258 }
259
260 #[inline(always)]
261 pub fn get(self) -> &'static SharedSeed {
262 &super::FIXED_GLOBAL_SEED
263 }
264 }
265}
266
267pub(crate) use global::GlobalSeed;