shadow_shim/
lib.rs

1#![cfg_attr(not(test), no_std)]
2// https://github.com/rust-lang/rfcs/blob/master/text/2585-unsafe-block-in-unsafe-fn.md
3#![deny(unsafe_op_in_unsafe_fn)]
4
5use core::cell::{Cell, RefCell};
6use core::ffi::CStr;
7use core::mem::MaybeUninit;
8
9use crate::tls::ShimTlsVar;
10
11use linux_api::prctl::ArchPrctlOp;
12use linux_api::signal::{SigProcMaskAction, rt_sigprocmask};
13use num_enum::{IntoPrimitive, TryFromPrimitive};
14use shadow_shim_helper_rs::ipc::IPCData;
15use shadow_shim_helper_rs::shim_event::{ShimEventStartReq, ShimEventToShadow, ShimEventToShim};
16use shadow_shim_helper_rs::shim_shmem::{HostShmem, ManagerShmem, ProcessShmem, ThreadShmem};
17use shadow_shim_helper_rs::simulation_time::SimulationTime;
18use shadow_shim_helper_rs::syscall_types::ForeignPtr;
19use shadow_shmem::allocator::{ShMemBlockAlias, ShMemBlockSerialized, shdeserialize};
20use tls::ThreadLocalStorage;
21use vasi_sync::lazy_lock::LazyLock;
22use vasi_sync::scmutex::SelfContainedMutex;
23
24/// cbindgen:ignore
25mod bindings {
26    #![allow(unused)]
27    #![allow(non_upper_case_globals)]
28    #![allow(non_camel_case_types)]
29    #![allow(non_snake_case)]
30    // https://github.com/rust-lang/rust/issues/66220
31    #![allow(improper_ctypes)]
32    include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
33}
34
35pub mod clone;
36pub mod mmap_box;
37pub mod preempt;
38pub mod reinit_auxvec_random;
39pub mod shimlogger;
40pub mod syscall;
41pub mod tls;
42
43pub use shimlogger::export as shimlogger_export;
44
45pub mod signals;
46
47pub fn simtime() -> Option<SimulationTime> {
48    SimulationTime::from_c_simtime(unsafe { bindings::shim_sys_get_simtime_nanos() })
49}
50
51/// Values of this enum describes whether something occurred within the context
52/// of the shadow (shim) code, or the application/plugin code.
53///
54/// Methods of this enum interact with a private thread-local that tracks the
55/// current `ExecutionContext` for that thread.
56// See `CURRENT_EXECUTION_CONTEXT`.
57#[derive(Debug, Copy, Clone, Eq, PartialEq, IntoPrimitive, TryFromPrimitive)]
58#[repr(u8)]
59pub enum ExecutionContext {
60    // Redirect through constants below so that we can access in const contexts.
61    Shadow = EXECUTION_CONTEXT_SHADOW_CONST,
62    Application = EXECUTION_CONTEXT_APPLICATION_CONST,
63}
64
65pub const EXECUTION_CONTEXT_SHADOW_CONST: u8 = 0;
66pub const EXECUTION_CONTEXT_APPLICATION_CONST: u8 = 1;
67
68/// The `ExecutionContext` of the current thread. Should only be manipulated via
69/// methods of `ExecutionContext`.
70static CURRENT_EXECUTION_CONTEXT: ShimTlsVar<Cell<ExecutionContext>> =
71    ShimTlsVar::new(&SHIM_TLS, || Cell::new(ExecutionContext::Application));
72
73impl ExecutionContext {
74    /// Returns the current context for the current thread.
75    pub fn current() -> ExecutionContext {
76        CURRENT_EXECUTION_CONTEXT.get().get()
77    }
78
79    /// Enter this context for the current thread, and return a restorer that
80    /// will restore the previous context when dropped.
81    pub fn enter(&self) -> ExecutionContextRestorer {
82        ExecutionContextRestorer {
83            prev: self.enter_without_restorer(),
84        }
85    }
86
87    /// Enter this context for the current thread, *without* creating a
88    /// restorer. Returns the previous context.
89    pub fn enter_without_restorer(&self) -> ExecutionContext {
90        let current_execution_ctx = CURRENT_EXECUTION_CONTEXT.get();
91        let peeked_prev = current_execution_ctx.get();
92
93        // Potentially enable/disable preemption, being careful that the current
94        // context is set to shadow when calling other internal functions that
95        // require it.
96        let replaced_prev = match (peeked_prev, *self) {
97            (ExecutionContext::Shadow, ExecutionContext::Application) => {
98                // Call preempt::enable before changing context from shadow, so
99                // that it can access shim state.
100                // SAFETY: We only ever switch threads from the shadow execution
101                // context, and we disable preemption when entering the shadow
102                // execution context, so preemption should be disabled for all
103                // other threads in this process.
104                unsafe { preempt::enable() };
105                current_execution_ctx.replace(*self)
106            }
107            (ExecutionContext::Application, ExecutionContext::Shadow) => {
108                // Change context to shadow before calling preempt::disable, so
109                // that it can access shim state.
110                let c = current_execution_ctx.replace(*self);
111                preempt::disable();
112                c
113            }
114            (ExecutionContext::Application, ExecutionContext::Application) => {
115                // No need to actually replace.
116                ExecutionContext::Application
117            }
118            (ExecutionContext::Shadow, ExecutionContext::Shadow) => {
119                // No need to actually replace.
120                ExecutionContext::Shadow
121            }
122        };
123        // It *shouldn't* be possible for the execution context to have changed
124        // out from under us in between the initial peek and the actual
125        // replacement.
126        assert_eq!(peeked_prev, replaced_prev);
127        peeked_prev
128    }
129}
130
131/// Restores an execution context when droped.
132#[must_use]
133#[derive(Debug)]
134pub struct ExecutionContextRestorer {
135    prev: ExecutionContext,
136}
137
138impl ExecutionContextRestorer {
139    /// Returns the context that this object will restore.
140    pub fn ctx(&self) -> ExecutionContext {
141        self.prev
142    }
143}
144
145impl Drop for ExecutionContextRestorer {
146    fn drop(&mut self) {
147        ExecutionContext::enter_without_restorer(&self.prev);
148    }
149}
150
151// We use a page for a stack guard, and up to another page to page-align the
152// stack guard. We assume 4k pages here but detect at runtime if this is too small.
153const SHIM_SIGNAL_STACK_GUARD_OVERHEAD: usize = 4096 * 2;
154
155mod tls_thread_signal_stack {
156    use super::*;
157
158    static THREAD_SIGNAL_STACK: ShimTlsVar<Cell<*mut core::ffi::c_void>> =
159        ShimTlsVar::new(&SHIM_TLS, || Cell::new(core::ptr::null_mut()));
160
161    // Shouldn't need to make this very large, but needs to be big enough to run the
162    // managed process's signal handlers as well - possibly recursively.
163    //
164    // Stack space that's *never* used shouldn't ever become resident, but an
165    // occasional deep stack could force the pages to be resident ever after.  To
166    // mitigate that, we could consider `madvise(MADV_DONTNEED)` after running
167    // signal handlers, to let the OS reclaim the (now-popped) signal handler stack
168    // frames.
169    const SHIM_SIGNAL_STACK_MIN_USABLE_SIZE: usize = 1024 * 100;
170    const SHIM_SIGNAL_STACK_SIZE: usize =
171        SHIM_SIGNAL_STACK_GUARD_OVERHEAD + SHIM_SIGNAL_STACK_MIN_USABLE_SIZE;
172
173    /// Allocates and installs a signal stack. This is to ensure that our
174    /// signal handlers have enough stack space; otherwise we can run out in managed
175    /// processes that use small stacks.
176    ///
177    /// This should be called once per thread before any signal handlers run.
178    /// Panics if already called on the current thread.
179    pub fn init() {
180        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
181        if THREAD_SIGNAL_STACK.get().get().is_null() {
182            // Allocate
183            let new_stack = unsafe {
184                rustix::mm::mmap_anonymous(
185                    core::ptr::null_mut(),
186                    SHIM_SIGNAL_STACK_SIZE,
187                    rustix::mm::ProtFlags::READ | rustix::mm::ProtFlags::WRITE,
188                    rustix::mm::MapFlags::PRIVATE,
189                )
190            }
191            .unwrap();
192
193            // Save to thread-local, so that we can deallocate on thread exit
194            assert!(
195                THREAD_SIGNAL_STACK.get().replace(new_stack).is_null(),
196                "Allocated signal stack twice for current thread"
197            );
198
199            // Set up guard page
200            unsafe { rustix::mm::mprotect(new_stack, 4096, rustix::mm::MprotectFlags::empty()) }
201                .unwrap();
202        } else {
203            // We get here after forking.
204            //
205            // We still have the signal stack allocated in the new process.
206            // We still need to install it though, below.
207        }
208
209        // Install via `sigaltstack`. The kernel will switch to this stack when
210        // invoking one of our signal handlers.
211        let stack_descriptor = linux_api::signal::stack_t {
212            ss_sp: THREAD_SIGNAL_STACK.get().get(),
213            ss_size: SHIM_SIGNAL_STACK_SIZE.try_into().unwrap(),
214            // Clear the alternate stack settings on entry to signal handler, and
215            // restore it on exit.  Otherwise a signal handler invoked while another
216            // is running on the same thread would clobber the first handler's stack.
217            // Instead we want the second handler to push a new frame on the alt
218            // stack that's already installed.
219            ss_flags: linux_api::signal::SigAltStackFlags::SS_AUTODISARM.bits(),
220        };
221        unsafe {
222            linux_api::signal::sigaltstack(Some(&stack_descriptor), None).unwrap();
223        }
224    }
225
226    /// # Safety
227    ///
228    /// After calling this function, the current thread must ensure the following
229    /// sequence can't happen before exiting:
230    ///
231    /// * Another thread runs.
232    /// * That thread also frees its stack.
233    /// * This thread runs again on the signal stack (e.g. by handling a new signal).
234    ///
235    /// Generally in the shim we rely on Shadow's scheduling model to ensure
236    /// this, since we know Shadow won't permit another thread to run
237    /// preemptively before the curent thread has a chance to finish exiting.
238    pub unsafe fn free() {
239        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
240        // A signal stack waiting to be freed.
241        //
242        // We can't free the current thread's signal stack, since we may be running on it.
243        // Instead we save the pointer to this global, and free the pointer that was already
244        // there, if any.
245        //
246        // We smuggle the pointer through as a `usize`, since pointers aren't `Sync`.
247        static FREE_SIGNAL_STACK: SelfContainedMutex<usize> = SelfContainedMutex::const_new(0);
248
249        let mut free_signal_stack = FREE_SIGNAL_STACK.lock();
250        let this_thread_stack = THREAD_SIGNAL_STACK.get().replace(core::ptr::null_mut());
251        let stack_to_free_now =
252            core::mem::replace(&mut *free_signal_stack, this_thread_stack as usize);
253        if stack_to_free_now != 0 {
254            unsafe {
255                rustix::mm::munmap(
256                    stack_to_free_now as *mut core::ffi::c_void,
257                    SHIM_SIGNAL_STACK_SIZE,
258                )
259            }
260            .unwrap();
261        }
262    }
263}
264
265/// Per-thread IPC channel between the shim, running in a managed process, and
266/// the shadow process.
267mod tls_ipc {
268    use super::*;
269    static IPC_DATA_BLOCK: ShimTlsVar<RefCell<Option<ShMemBlockAlias<IPCData>>>> =
270        ShimTlsVar::new(&SHIM_TLS, || RefCell::new(None));
271
272    // Panics if this thread's IPC hasn't been initialized yet.
273    pub fn with<O>(f: impl FnOnce(&IPCData) -> O) -> O {
274        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
275        let ipc = IPC_DATA_BLOCK.get();
276        let ipc = ipc.borrow();
277        ipc.as_ref().map(|block| f(block)).unwrap()
278    }
279
280    /// The previous value, if any, is dropped.
281    ///
282    /// # Safety
283    ///
284    /// `blk` must contained a serialized block referencing a `ShMemBlock` of type `IPCData`.
285    /// The `ShMemBlock` must outlive the current thread.
286    pub unsafe fn set(blk: &ShMemBlockSerialized) {
287        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
288        let blk: ShMemBlockAlias<IPCData> = unsafe { shdeserialize(blk) };
289        IPC_DATA_BLOCK.get().replace(Some(blk));
290    }
291}
292
293mod tls_thread_shmem {
294    use super::*;
295
296    static SHMEM: ShimTlsVar<RefCell<Option<ShMemBlockAlias<ThreadShmem>>>> =
297        ShimTlsVar::new(&SHIM_TLS, || RefCell::new(None));
298
299    /// Panics if `set` hasn't been called yet.
300    pub fn with<O>(f: impl FnOnce(&ThreadShmem) -> O) -> O {
301        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
302        f(SHMEM.get().borrow().as_ref().unwrap())
303    }
304
305    /// The previous value, if any, is dropped.
306    ///
307    /// # Safety
308    ///
309    /// `blk` must contained a serialized block referencing a `ShMemBlock` of
310    /// type `ThreadShmem`.  The `ShMemBlock` must outlive the current thread.
311    pub unsafe fn set(blk: &ShMemBlockSerialized) {
312        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
313        // SAFETY: Caller guarantees correct type.
314        let blk = unsafe { shdeserialize(blk) };
315        SHMEM.get().borrow_mut().replace(blk);
316    }
317}
318
319mod global_manager_shmem {
320    use super::*;
321
322    // This is set explicitly, so needs a Mutex.
323    static INITIALIZER: SelfContainedMutex<Option<ShMemBlockSerialized>> =
324        SelfContainedMutex::const_new(None);
325
326    // The actual block is in a `LazyLock`, which is much faster to access.
327    // It uses `INITIALIZER` to do its one-time init.
328    static SHMEM: LazyLock<ShMemBlockAlias<ManagerShmem>> = LazyLock::const_new(|| {
329        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
330        let serialized = INITIALIZER.lock().take().unwrap();
331        unsafe { shdeserialize(&serialized) }
332    });
333
334    /// # Safety
335    ///
336    /// `blk` must contained a serialized block referencing a `ShMemBlock` of type `ManagerShmem`.
337    /// The `ShMemBlock` must outlive this process.
338    pub unsafe fn set(blk: &ShMemBlockSerialized) {
339        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
340        assert!(!SHMEM.initd());
341        assert!(INITIALIZER.lock().replace(*blk).is_none());
342        // Ensure that `try_get` returns true (without it having to take the
343        // `INITIALIZER` lock to check), and that we fail early if `SHMEM` can't
344        // actually be initialized.
345        SHMEM.force();
346    }
347
348    /// Panics if `set` hasn't been called yet.
349    pub fn get() -> impl core::ops::Deref<Target = ShMemBlockAlias<'static, ManagerShmem>> + 'static
350    {
351        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
352        SHMEM.force()
353    }
354
355    pub fn try_get()
356    -> Option<impl core::ops::Deref<Target = ShMemBlockAlias<'static, ManagerShmem>> + 'static>
357    {
358        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
359        if !SHMEM.initd() {
360            // No need to do the more-expensive `INITIALIZER` check; `set`
361            // forces `SHMEM` to initialize.
362            None
363        } else {
364            Some(get())
365        }
366    }
367}
368
369mod global_host_shmem {
370    use super::*;
371
372    // This is set explicitly, so needs a Mutex.
373    static INITIALIZER: SelfContainedMutex<Option<ShMemBlockSerialized>> =
374        SelfContainedMutex::const_new(None);
375
376    // The actual block is in a `LazyLock`, which is much faster to access.
377    // It uses `INITIALIZER` to do its one-time init.
378    static SHMEM: LazyLock<ShMemBlockAlias<HostShmem>> = LazyLock::const_new(|| {
379        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
380        let serialized = INITIALIZER.lock().take().unwrap();
381        unsafe { shdeserialize(&serialized) }
382    });
383
384    /// # Safety
385    ///
386    /// `blk` must contained a serialized block referencing a `ShMemBlock` of type `HostShmem`.
387    /// The `ShMemBlock` must outlive this process.
388    pub unsafe fn set(blk: &ShMemBlockSerialized) {
389        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
390        assert!(!SHMEM.initd());
391        assert!(INITIALIZER.lock().replace(*blk).is_none());
392        // Ensure that `try_get` returns true (without it having to take the
393        // `INITIALIZER` lock to check), and that we fail early if `SHMEM` can't
394        // actually be initialized.
395        SHMEM.force();
396    }
397
398    /// Panics if `set` hasn't been called yet.
399    pub fn get() -> impl core::ops::Deref<Target = ShMemBlockAlias<'static, HostShmem>> + 'static {
400        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
401        SHMEM.force()
402    }
403
404    pub fn try_get()
405    -> Option<impl core::ops::Deref<Target = ShMemBlockAlias<'static, HostShmem>> + 'static> {
406        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
407        if !SHMEM.initd() {
408            // No need to do the more-expensive `INITIALIZER` check; `set`
409            // forces `SHMEM` to initialize.
410            None
411        } else {
412            Some(get())
413        }
414    }
415}
416
417mod tls_process_shmem {
418    use super::*;
419
420    static SHMEM: ShimTlsVar<RefCell<Option<ShMemBlockAlias<ProcessShmem>>>> =
421        ShimTlsVar::new(&SHIM_TLS, || RefCell::new(None));
422
423    /// Panics if `set` hasn't been called yet.
424    pub fn with<O>(f: impl FnOnce(&ProcessShmem) -> O) -> O {
425        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
426        f(SHMEM.get().borrow().as_ref().unwrap())
427    }
428
429    /// The previous value, if any, is dropped.
430    ///
431    /// # Safety
432    ///
433    /// `blk` must contained a serialized block referencing a `ShMemBlock` of
434    /// type `ProcessShmem`.  The `ShMemBlock` must outlive the current thread.
435    pub unsafe fn set(blk: &ShMemBlockSerialized) {
436        debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
437        // SAFETY: Caller guarantees correct type.
438        let blk = unsafe { shdeserialize(blk) };
439        SHMEM.get().borrow_mut().replace(blk);
440    }
441}
442
443// Force cargo to link against crates that aren't (yet) referenced from Rust
444// code (but are referenced from this crate's C code).
445// https://github.com/rust-lang/cargo/issues/9391
446extern crate asm_util;
447extern crate log_c2rust;
448extern crate logger;
449extern crate shadow_shim_helper_rs;
450extern crate shadow_shmem;
451
452/// Global instance of thread local storage for use in the shim.
453///
454/// SAFETY: We ensure that every thread unregisters itself before exiting,
455/// via [`release_and_exit_current_thread`].
456static SHIM_TLS: ThreadLocalStorage = unsafe { ThreadLocalStorage::new(tls::Mode::Native) };
457
458/// Release this thread's shim thread local storage and exit the thread.
459///
460/// Should be called by every thread that accesses thread local storage.
461///
462/// Panics if there are still any live references to this thread's [`ShimTlsVar`]s.
463///
464/// # Safety
465///
466/// In the case that this function somehow panics, caller must not
467/// access thread local storage again from the current thread, e.g.
468/// using `std::panic::catch_unwind` or a custom panic hook.
469pub unsafe fn release_and_exit_current_thread(exit_status: i32) -> ! {
470    debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
471    // Block all signals, to ensure a signal handler can't run and attempt to
472    // access thread local storage.
473    rt_sigprocmask(
474        SigProcMaskAction::SIG_BLOCK,
475        &linux_api::signal::sigset_t::FULL,
476        None,
477    )
478    .unwrap();
479
480    // SAFETY: No code can access thread local storage in between deregistration
481    // and exit, unless `unregister_curren_thread` itself panics.
482    unsafe { SHIM_TLS.unregister_current_thread() }
483
484    linux_api::exit::exit_raw(exit_status).unwrap();
485    unreachable!()
486}
487
488/// Emulate the cpuid instruction. Takes the current values of rax-rdx,
489/// which are mutated to the updated values.
490fn emulate_cpuid(
491    rax: &mut core::ffi::c_longlong,
492    rbx: &mut core::ffi::c_longlong,
493    rcx: &mut core::ffi::c_longlong,
494    rdx: &mut core::ffi::c_longlong,
495) {
496    // Intentionally allow dropping the high 32 bits here. The `cpuid`
497    // instruction is specified as only paying attention to the low 32 bits
498    // (e.g. eax not rax).
499    let leaf = *rax as u32;
500    let sub_leaf = *rcx as u32;
501
502    // Re-enable cpuid; execute a native cpuid; Re-disable cpuid.
503    //
504    // It might be nice to cache results here to avoid the extra syscalls, but:
505    // * cpuid usage is already relatively expensive, so *shouldn't* be in the hot path in the managed program.
506    // * there are a lot of subtleties to trying to cache.
507    //
508    // Caching subtleties:
509    //
510    // * We need to know whether the leaf being queried (eax) supports sub-leaves,
511    //   and hence whether to key by the sub-leaf (ecx) in addition to the leaf.
512    // * Some values depend on which core the instruction is executed.
513    //   (OTOH this is potentially more reason to cache or otherwise try to
514    //   normalize those values, for determinism).
515    // * "If a value entered for CPUID.EAX is higher than the maximum input
516    //   value for basic or extended function for that processor then the data
517    //   for the highest basic information leaf is returned."
518    //   <https://www.felixcloutier.com/x86/cpuid>
519    // * "If a value entered for CPUID.EAX is less than or equal to the
520    //   maximum input value and the leaf is not supported on that processor
521    //   then 0 is returned in all the registers."
522    //   <https://www.felixcloutier.com/x86/cpuid>
523    unsafe { linux_api::prctl::arch_prctl(ArchPrctlOp::ARCH_SET_CPUID, 1) }
524        .unwrap_or_else(|e| panic!("Couldn't re-enable cpuid: {e:?}"));
525    // SAFETY: We don't support running in environments that don't have cpuid.
526    let mut res = unsafe { asm_util::cpuid::cpuid(leaf, Some(sub_leaf)) };
527    unsafe { linux_api::prctl::arch_prctl(ArchPrctlOp::ARCH_SET_CPUID, 0) }
528        .unwrap_or_else(|e| panic!("Couldn't re-disable cpuid: {e:?}"));
529
530    // Potentially mess with the results.
531    match (leaf, sub_leaf) {
532        (asm_util::cpuid::RDRAND_LEAF, _) => {
533            const { assert!(asm_util::cpuid::RDRAND_SUB_LEAF.is_none()) };
534            // Always say we don't support rdrand (which breaks shadow's determinism).
535            res.ecx &= !asm_util::cpuid::RDRAND_FLAG;
536        }
537        (asm_util::cpuid::RDSEED_LEAF, sl) if sl == asm_util::cpuid::RDSEED_SUB_LEAF.unwrap() => {
538            // Always say we don't support rdseed (which breaks shadow's determinism).
539            res.ebx &= !asm_util::cpuid::RDSEED_FLAG;
540        }
541        _ => (),
542    }
543    *rax = res.eax.into();
544    *rbx = res.ebx.into();
545    *rcx = res.ecx.into();
546    *rdx = res.edx.into();
547}
548
549/// Perform once-per-thread initialization for the shim.
550///
551/// Unlike `init_process` this must only be called once - we do so explicitly
552/// when creating a new managed thread.
553///
554/// Uses C ABI so that we can call from `asm`.
555extern "C" fn init_thread() {
556    debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
557    unsafe { bindings::_shim_child_thread_init_preload() };
558    log::trace!("Finished shim thread init");
559}
560
561/// Ensure once-per-process init for the shim is done.
562///
563/// Safe and cheap to call repeatedly; e.g. from API entry points.
564fn init_process() {
565    debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
566    static STARTED_INIT: LazyLock<()> = LazyLock::const_new(|| ());
567    if STARTED_INIT.initd() {
568        // Avoid recursion in initialization.
569        //
570        // TODO: This shouldn't be necessary once we've gotten rid of all
571        // calls to libc from the shim's initialization.
572        return;
573    }
574    STARTED_INIT.force();
575
576    unsafe { bindings::_shim_parent_init_preload() };
577
578    log::trace!("Finished shim global init");
579}
580
581/// Wait for "start" event from Shadow, using it to set things up for the
582/// current thread, and if `is_first_thread` is true then also for the current
583/// process.
584fn wait_for_start_event(is_first_thread: bool) {
585    debug_assert_eq!(ExecutionContext::current(), ExecutionContext::Shadow);
586    log::trace!("waiting for start event");
587
588    let mut working_dir = [0u8; linux_api::limits::PATH_MAX];
589    let working_dir_ptr;
590    let working_dir_len;
591    if is_first_thread {
592        working_dir_ptr = ForeignPtr::from_raw_ptr(working_dir.as_mut_ptr());
593        working_dir_len = working_dir.len();
594    } else {
595        working_dir_ptr = ForeignPtr::null();
596        working_dir_len = 0;
597    }
598
599    let mut thread_blk_serialized = MaybeUninit::<ShMemBlockSerialized>::uninit();
600    let mut process_blk_serialized = MaybeUninit::<ShMemBlockSerialized>::uninit();
601    let start_req = ShimEventToShadow::StartReq(ShimEventStartReq {
602        thread_shmem_block_to_init: ForeignPtr::from_raw_ptr(thread_blk_serialized.as_mut_ptr()),
603        process_shmem_block_to_init: ForeignPtr::from_raw_ptr(process_blk_serialized.as_mut_ptr()),
604        initial_working_dir_to_init: working_dir_ptr,
605        initial_working_dir_to_init_len: working_dir_len,
606    });
607    let res = tls_ipc::with(|ipc| {
608        ipc.to_shadow().send(start_req);
609        ipc.from_shadow().receive().unwrap()
610    });
611    let ShimEventToShim::StartRes(res) = res else {
612        panic!("Unexpected response: {res:?}");
613    };
614    if is_first_thread {
615        // SAFETY: We're ensuring serial execution in this process, and no other
616        // Rust code in this library should have tried accessing the auxiliary
617        // vector yet, so no references should exist.
618        //
619        // WARNING: It's possible that the dynamic linker/loader or constructors
620        // in other dynamically linked libraries *have* run, and that rewriting
621        // this value here will violate safety assumptions in those objects.
622        // Fortunately we haven't observed this in practice.
623        unsafe { reinit_auxvec_random::reinit_auxvec_random(&res.auxvec_random) };
624    }
625
626    // SAFETY: shadow should have initialized
627    let thread_blk_serialized = unsafe { thread_blk_serialized.assume_init() };
628    // SAFETY: blk should be of the correct type and outlive this thread.
629    unsafe { tls_thread_shmem::set(&thread_blk_serialized) };
630
631    // SAFETY: shadow should have initialized
632    let process_blk_serialized = unsafe { process_blk_serialized.assume_init() };
633    // SAFETY: blk should be of the correct type and outlive this process.
634    unsafe { tls_process_shmem::set(&process_blk_serialized) };
635
636    // TODO: Instead use posix_spawn_file_actions_addchdir_np in the shadow process,
637    // which was added in glibc 2.29. Currently this is blocked on debian-10, which
638    // uses glibc 2.28.
639    if is_first_thread {
640        let working_dir = CStr::from_bytes_until_nul(&working_dir).unwrap();
641        rustix::process::chdir(working_dir).unwrap();
642    }
643}
644
645// Rust's linking of a `cdylib` only considers Rust `pub extern "C-unwind"` entry
646// points, and the symbols those recursively used, to be used. i.e. any function
647// called from outside of the shim needs to be exported from the Rust code. We
648// wrap some C implementations here.
649pub mod export {
650    use core::ops::Deref;
651
652    use super::*;
653
654    /// # Safety
655    ///
656    /// The syscall itself must be safe to make.
657    #[unsafe(no_mangle)]
658    pub unsafe extern "C-unwind" fn shim_api_syscall(
659        n: core::ffi::c_long,
660        arg1: u64,
661        arg2: u64,
662        arg3: u64,
663        arg4: u64,
664        arg5: u64,
665        arg6: u64,
666    ) -> i64 {
667        let _prev = ExecutionContext::Shadow.enter();
668        unsafe {
669            bindings::shimc_api_syscall(_prev.ctx().into(), n, arg1, arg2, arg3, arg4, arg5, arg6)
670        }
671    }
672
673    /// # Safety
674    ///
675    /// Pointers must be dereferenceable.
676    #[unsafe(no_mangle)]
677    pub unsafe extern "C-unwind" fn shim_api_getaddrinfo(
678        node: *const core::ffi::c_char,
679        service: *const core::ffi::c_char,
680        hints: *const libc::addrinfo,
681        res: *mut *mut libc::addrinfo,
682    ) -> i32 {
683        let _prev = ExecutionContext::Shadow.enter();
684        unsafe { bindings::shimc_api_getaddrinfo(node, service, hints, res) }
685    }
686
687    /// # Safety
688    ///
689    /// * Pointers must be dereferenceable.
690    /// * `res` is invalidated afterwards.
691    #[unsafe(no_mangle)]
692    pub unsafe extern "C-unwind" fn shim_api_freeaddrinfo(res: *mut libc::addrinfo) {
693        let _prev = ExecutionContext::Shadow.enter();
694        unsafe { bindings::shimc_api_freeaddrinfo(res) }
695    }
696
697    /// # Safety
698    ///
699    /// Pointers must be dereferenceable
700    #[unsafe(no_mangle)]
701    pub unsafe extern "C-unwind" fn shim_api_getifaddrs(ifap: *mut *mut libc::ifaddrs) -> i32 {
702        // We *don't* enter ExecutionContext::Shadow here, because this
703        // implementation is pure "userspace"; it doesn't directly access shadow
704        // internal functionality, but *does* use libc in a way that we want the
705        // underlying syscalls to be interposed.
706        unsafe { bindings::shimc_api_getifaddrs(ifap) }
707    }
708
709    /// # Safety
710    ///
711    /// * Pointers must be dereferenceable.
712    /// * `ifa` is invalidated afterwards.
713    #[unsafe(no_mangle)]
714    pub unsafe extern "C-unwind" fn shim_api_freeifaddrs(ifa: *mut libc::ifaddrs) {
715        // We *don't* enter ExecutionContext::Shadow here, because this
716        // implementation is pure "userspace"; it doesn't directly access shadow
717        // internal functionality, but *does* use libc in a way that we want the
718        // underlying syscalls to be interposed.
719        unsafe { bindings::shimc_api_freeifaddrs(ifa) }
720    }
721
722    /// Sets the flag determining whether syscalls are passed through natively, and
723    /// returns the old value.
724    ///
725    /// Typical usage is to set this to the desired value at the beginning of an
726    /// operation, and restore the old value afterwards.
727    #[unsafe(no_mangle)]
728    pub extern "C-unwind" fn shim_swapExecutionContext(new: ExecutionContext) -> ExecutionContext {
729        new.enter_without_restorer()
730    }
731
732    /// Whether syscall interposition is currently enabled.
733    #[unsafe(no_mangle)]
734    pub extern "C-unwind" fn shim_getExecutionContext() -> ExecutionContext {
735        ExecutionContext::current()
736    }
737
738    /// Allocates and installs a signal stack.
739    ///
740    /// This is to ensure that our signal handlers have enough stack space;
741    /// otherwise we can run out in managed processes that use small stacks.
742    ///
743    /// This should be called once per thread before any signal handlers run.
744    /// Panics if already called on the current thread.
745    #[unsafe(no_mangle)]
746    pub extern "C-unwind" fn _shim_init_signal_stack() {
747        tls_thread_signal_stack::init();
748    }
749
750    /// # Safety
751    ///
752    /// The current thread must exit before:
753    /// * Another thread runs.
754    /// * That thread also frees its stack.
755    /// * This thread runs again on the signal stack (e.g. by handling a new signal).
756    ///
757    /// Generally in the shim we rely on Shadow's scheduling model to ensure
758    /// this, since we know Shadow won't permit another thread to run
759    /// preemptively before the curent thread has a chance to finish exiting.
760    #[unsafe(no_mangle)]
761    pub unsafe extern "C-unwind" fn shim_freeSignalStack() {
762        unsafe { tls_thread_signal_stack::free() };
763    }
764
765    /// # Safety
766    ///
767    /// stdin must contained a serialized block of
768    /// type `IPCData`, which outlives the current thread.
769    #[unsafe(no_mangle)]
770    pub unsafe extern "C-unwind" fn _shim_parent_init_ipc() {
771        let mut bytes = [0; core::mem::size_of::<ShMemBlockSerialized>()];
772        let bytes_read = rustix::io::read(
773            unsafe { rustix::fd::BorrowedFd::borrow_raw(libc::STDIN_FILENO) },
774            &mut bytes,
775        )
776        .unwrap();
777        // Implement looping? We should get it all in one read, though.
778        assert_eq!(bytes_read, bytes.len());
779        let ipc_blk = shadow_pod::from_array(&bytes);
780        // SAFETY: caller is responsible for `set`'s preconditions.
781        unsafe { tls_ipc::set(&ipc_blk) };
782    }
783
784    /// This thread's IPC channel. Panics if it hasn't been initialized yet.
785    ///
786    /// # Safety
787    ///
788    /// The returned pointer must not outlive the current thread.
789    #[unsafe(no_mangle)]
790    pub unsafe extern "C-unwind" fn shim_thisThreadEventIPC() -> *const IPCData {
791        tls_ipc::with(core::ptr::from_ref)
792    }
793
794    /// This thread's IPC channel. Panics if it hasn't been initialized yet.
795    ///
796    /// # Safety
797    ///
798    /// The returned pointer must not outlive the current thread.
799    #[unsafe(no_mangle)]
800    pub unsafe extern "C-unwind" fn shim_threadSharedMem()
801    -> *const shadow_shim_helper_rs::shim_shmem::export::ShimShmemThread {
802        tls_thread_shmem::with(core::ptr::from_ref)
803    }
804
805    #[unsafe(no_mangle)]
806    pub extern "C-unwind" fn _shim_load() {
807        init_process();
808    }
809
810    /// Should be used to exit every thread in the shim.
811    ///
812    /// # Safety
813    ///
814    /// In the case that this function somehow panics, caller must not
815    /// access thread local storage again from the current thread, e.g.
816    /// using `std::panic::catch_unwind` or a custom panic hook.
817    #[unsafe(no_mangle)]
818    pub unsafe extern "C-unwind" fn shim_release_and_exit_current_thread(status: i32) {
819        unsafe { release_and_exit_current_thread(status) }
820    }
821
822    #[unsafe(no_mangle)]
823    pub extern "C-unwind" fn shim_managerSharedMem()
824    -> *const shadow_shim_helper_rs::shim_shmem::export::ShimShmemManager {
825        let rv = global_manager_shmem::try_get();
826        rv.map(|x| {
827            let rv: &shadow_shim_helper_rs::shim_shmem::export::ShimShmemManager = x.deref();
828            // We know this pointer will be live for the lifetime of the
829            // process, and that we never construct a mutable reference to the
830            // underlying data.
831            core::ptr::from_ref(rv)
832        })
833        .unwrap_or(core::ptr::null())
834    }
835
836    #[unsafe(no_mangle)]
837    pub extern "C-unwind" fn shim_hostSharedMem()
838    -> *const shadow_shim_helper_rs::shim_shmem::export::ShimShmemHost {
839        let rv = global_host_shmem::try_get();
840        rv.map(|x| {
841            let rv: &shadow_shim_helper_rs::shim_shmem::export::ShimShmemHost = x.deref();
842            // We know this pointer will be live for the lifetime of the
843            // process, and that we never construct a mutable reference to the
844            // underlying data.
845            core::ptr::from_ref(rv)
846        })
847        .unwrap_or(core::ptr::null())
848    }
849
850    #[unsafe(no_mangle)]
851    pub extern "C-unwind" fn shim_processSharedMem()
852    -> *const shadow_shim_helper_rs::shim_shmem::export::ShimShmemProcess {
853        tls_process_shmem::with(|process| {
854            // We know this pointer will be live for the lifetime of the
855            // process, and that we never construct a mutable reference to the
856            // underlying data.
857            core::ptr::from_ref(process)
858        })
859    }
860
861    /// Wait for start event from shadow, from a newly spawned thread.
862    #[unsafe(no_mangle)]
863    pub extern "C-unwind" fn _shim_preload_only_child_ipc_wait_for_start_event() {
864        wait_for_start_event(false);
865    }
866
867    #[unsafe(no_mangle)]
868    pub extern "C-unwind" fn _shim_ipc_wait_for_start_event() {
869        wait_for_start_event(true);
870    }
871
872    #[unsafe(no_mangle)]
873    pub extern "C-unwind" fn _shim_parent_init_manager_shm() {
874        unsafe { global_manager_shmem::set(&global_host_shmem::get().manager_shmem) }
875    }
876
877    #[unsafe(no_mangle)]
878    pub extern "C-unwind" fn _shim_parent_init_host_shm() {
879        tls_process_shmem::with(|process| unsafe { global_host_shmem::set(&process.host_shmem) });
880    }
881
882    #[unsafe(no_mangle)]
883    pub extern "C-unwind" fn _shim_parent_close_stdin() {
884        unsafe { rustix::io::close(libc::STDIN_FILENO) };
885    }
886
887    /// Emulate the cpuid instruction. Takes the current values of rax-rdx,
888    /// which are mutated to the updated values.
889    ///
890    /// # Safety
891    ///
892    /// Parameters must be safely dereferenceable and writable.
893    #[unsafe(no_mangle)]
894    pub unsafe extern "C-unwind" fn _shim_emulate_cpuid(
895        rax: *mut core::ffi::c_longlong,
896        rbx: *mut core::ffi::c_longlong,
897        rcx: *mut core::ffi::c_longlong,
898        rdx: *mut core::ffi::c_longlong,
899    ) {
900        emulate_cpuid(
901            unsafe { rax.as_mut() }.unwrap(),
902            unsafe { rbx.as_mut() }.unwrap(),
903            unsafe { rcx.as_mut() }.unwrap(),
904            unsafe { rdx.as_mut() }.unwrap(),
905        );
906    }
907}