pub const BYTES_PER_THREAD: usize = 1024;
This needs to be big enough to store all thread-local variables for a single thread. We fail at runtime if this limit is exceeded.