Skip to main content

shadow_rs/host/
process.rs

1//! An emulated Linux process.
2
3use std::cell::{Cell, Ref, RefCell, RefMut};
4use std::collections::BTreeMap;
5use std::ffi::{CStr, CString, c_char, c_void};
6use std::fmt::Write;
7use std::num::TryFromIntError;
8use std::ops::{Deref, DerefMut};
9use std::os::fd::AsRawFd;
10use std::path::{Path, PathBuf};
11use std::sync::Arc;
12use std::sync::atomic::Ordering;
13#[cfg(feature = "perf_timers")]
14use std::time::Duration;
15
16use linux_api::errno::Errno;
17use linux_api::fcntl::OFlag;
18use linux_api::posix_types::Pid;
19use linux_api::sched::{CloneFlags, SuidDump};
20use linux_api::signal::{
21    LinuxDefaultAction, SigActionFlags, Signal, SignalFromI32Error, defaultaction, siginfo_t,
22    sigset_t,
23};
24use log::{debug, trace, warn};
25use rustix::process::{WaitOptions, WaitStatus};
26use shadow_shim_helper_rs::HostId;
27use shadow_shim_helper_rs::explicit_drop::{ExplicitDrop, ExplicitDropper};
28use shadow_shim_helper_rs::rootedcell::Root;
29use shadow_shim_helper_rs::rootedcell::rc::RootedRc;
30use shadow_shim_helper_rs::rootedcell::refcell::RootedRefCell;
31use shadow_shim_helper_rs::shim_shmem::ProcessShmem;
32use shadow_shim_helper_rs::simulation_time::SimulationTime;
33use shadow_shim_helper_rs::syscall_types::{ForeignPtr, ManagedPhysicalMemoryAddr};
34use shadow_shmem::allocator::ShMemBlock;
35
36use super::descriptor::descriptor_table::{DescriptorHandle, DescriptorTable};
37use super::descriptor::listener::StateEventSource;
38use super::descriptor::{FileSignals, FileState};
39use super::host::Host;
40use super::memory_manager::{MemoryManager, ProcessMemoryRef, ProcessMemoryRefMut};
41use super::syscall::formatter::StraceFmtMode;
42use super::syscall::types::ForeignArrayPtr;
43use super::thread::{Thread, ThreadId};
44use super::timer::Timer;
45use crate::core::configuration::{ProcessFinalState, RunningVal};
46use crate::core::work::task::TaskRef;
47use crate::core::worker::Worker;
48use crate::cshadow;
49use crate::host::context::ProcessContext;
50use crate::host::descriptor::Descriptor;
51use crate::host::managed_thread::ManagedThread;
52use crate::host::syscall::formatter::FmtOptions;
53use crate::utility::callback_queue::CallbackQueue;
54#[cfg(feature = "perf_timers")]
55use crate::utility::perf_timer::PerfTimer;
56use crate::utility::{self, debug_assert_cloexec};
57
58/// Virtual pid of a shadow process
59#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Ord, PartialOrd)]
60pub struct ProcessId(u32);
61
62impl ProcessId {
63    // The first Process to run after boot is the "init" process, and has pid=1.
64    // In Shadow simulations, this roughly corresponds to Shadow itself. e.g.
65    // processes spawned by Shadow itself have a parent pid of 1.
66    pub const INIT: Self = ProcessId(1);
67
68    /// Returns what the `ProcessId` would be of a `Process` whose thread
69    /// group leader has id `thread_group_leader_tid`.
70    pub fn from_thread_group_leader_tid(thread_group_leader_tid: ThreadId) -> Self {
71        ProcessId::try_from(libc::pid_t::from(thread_group_leader_tid)).unwrap()
72    }
73}
74
75impl std::fmt::Display for ProcessId {
76    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
77        write!(f, "{}", self.0)
78    }
79}
80
81impl TryFrom<u32> for ProcessId {
82    type Error = TryFromIntError;
83
84    fn try_from(val: u32) -> Result<Self, Self::Error> {
85        // we don't actually want the value as a `pid_t`, we just want to make sure it can be
86        // converted successfully
87        let _ = libc::pid_t::try_from(val)?;
88        Ok(ProcessId(val))
89    }
90}
91
92impl TryFrom<libc::pid_t> for ProcessId {
93    type Error = TryFromIntError;
94
95    fn try_from(value: libc::pid_t) -> Result<Self, Self::Error> {
96        Ok(ProcessId(value.try_into()?))
97    }
98}
99
100impl From<ProcessId> for u32 {
101    fn from(val: ProcessId) -> Self {
102        val.0
103    }
104}
105
106impl From<ProcessId> for libc::pid_t {
107    fn from(val: ProcessId) -> Self {
108        val.0.try_into().unwrap()
109    }
110}
111
112impl From<ThreadId> for ProcessId {
113    fn from(value: ThreadId) -> Self {
114        ProcessId::try_from(libc::pid_t::from(value)).unwrap()
115    }
116}
117
118#[derive(Debug, Copy, Clone, Eq, PartialEq)]
119pub enum ExitStatus {
120    Normal(i32),
121    Signaled(Signal),
122    /// The process was killed by Shadow rather than exiting "naturally" as part
123    /// of the simulation. Currently this only happens when the process is still
124    /// running when the simulation stop_time is reached.
125    ///
126    /// A signal delivered via `shutdown_signal` does not result in this status;
127    /// e.g. if the process is killed directly by the signal the ExitStatus will
128    /// be `Signaled`; if the process handles the signal and exits by calling
129    /// `exit`, the status will be `Normal`.
130    StoppedByShadow,
131}
132
133#[derive(Debug)]
134struct StraceLogging {
135    file: RootedRefCell<std::fs::File>,
136    options: FmtOptions,
137}
138
139/// Parts of the process that are present in all states.
140struct Common {
141    id: ProcessId,
142    host_id: HostId,
143
144    // Parent pid (aka `ppid`), as returned e.g. by `getppid`.  This can change
145    // at runtime if the original parent exits and is reaped.
146    parent_pid: Cell<ProcessId>,
147
148    // Process group id (aka `pgid`), as returned e.g. by `getpgid`.
149    group_id: Cell<ProcessId>,
150
151    // Session id, as returned e.g. by `getsid`.
152    session_id: Cell<ProcessId>,
153
154    // Signal to send to parent on death.
155    exit_signal: Option<Signal>,
156
157    // Signal to send to this process when its parent dies, as configured via
158    // `prctl(PR_SET_PDEATHSIG)`.
159    parent_death_signal: Cell<Option<Signal>>,
160
161    // unique id of the program that this process should run
162    name: CString,
163
164    // the name of the executable as provided in shadow's config, for logging purposes
165    plugin_name: CString,
166
167    // absolute path to the process's working directory.
168    // This must remain in sync with the actual working dir of the native process.
169    // See https://github.com/shadow/shadow/issues/2960
170    working_dir: CString,
171
172    // (emulated) Process-wide resource limits. We don't enforce these, but track
173    // what they are so that we can return the expected value for e.g. `getrlimit`.
174    rlimits: [linux_api::resource::rlimit64; linux_api::resource::RLIM_NLIMITS as usize],
175}
176
177impl Common {
178    fn id(&self) -> ProcessId {
179        self.id
180    }
181
182    fn physical_address(&self, vptr: ForeignPtr<()>) -> ManagedPhysicalMemoryAddr {
183        // We currently don't keep a true system-wide virtual <-> physical address
184        // mapping. Instead we simply assume that no shadow processes map the same
185        // underlying physical memory, and that therefore (pid, virtual address)
186        // uniquely defines a physical address.
187        //
188        // If we ever want to support futexes in memory shared between processes,
189        // we'll need to change this.  The most foolproof way to do so is probably
190        // to change ManagedPhysicalMemoryAddr to be a bigger struct that identifies where
191        // the mapped region came from (e.g. what file), and the offset into that
192        // region. Such "fat" physical pointers might make memory management a
193        // little more cumbersome though, e.g. when using them as keys in the futex
194        // table.
195        //
196        // Alternatively we could hash the region+offset to a 64-bit value, but
197        // then we'd need to deal with potential collisions. On average we'd expect
198        // a collision after 2**32 physical addresses; i.e. they *probably*
199        // wouldn't happen in practice for realistic simulations.
200
201        // Linux uses the bottom 48-bits for user-space virtual addresses, giving
202        // us 16 bits for the pid.
203        const PADDR_BITS: i32 = 64;
204        const VADDR_BITS: i32 = 48;
205        const PID_BITS: i32 = 16;
206        assert_eq!(PADDR_BITS, PID_BITS + VADDR_BITS);
207
208        let high_part: u64 = u64::from(u32::from(self.id())) << VADDR_BITS;
209        assert_eq!(
210            ProcessId::try_from((high_part >> VADDR_BITS) as u32),
211            Ok(self.id())
212        );
213
214        let low_part = u64::from(vptr);
215        assert_eq!(low_part >> VADDR_BITS, 0);
216
217        ManagedPhysicalMemoryAddr::from(high_part | low_part)
218    }
219
220    fn name(&self) -> &str {
221        self.name.to_str().unwrap()
222    }
223
224    pub fn thread_group_leader_id(&self) -> ThreadId {
225        // tid of the thread group leader is equal to the pid.
226        ThreadId::from(self.id())
227    }
228}
229
230/// A process that is currently runnable.
231pub struct RunnableProcess {
232    common: Common,
233
234    // Expected end state, if any. We'll report an error if this is present and
235    // doesn't match the actual exit status.
236    //
237    // This will be None e.g. for processes created via `fork` instead of
238    // spawned directly from Shadow's config file. In those cases it's the
239    // parent's responsibility to reap and interpret the exit status.
240    expected_final_state: Option<ProcessFinalState>,
241
242    // Shared memory allocation for shared state with shim.
243    shim_shared_mem_block: ShMemBlock<'static, ProcessShmem>,
244
245    // Shared with forked Processes
246    strace_logging: Option<Arc<StraceLogging>>,
247
248    // The shim's log file. This gets dup'd into the ManagedProcess
249    // where the shim can write to it directly. We persist it to handle the case
250    // where we need to recreatea a ManagedProcess and have it continue writing
251    // to the same file.
252    //
253    // Shared with forked Processes
254    shimlog_file: Arc<std::fs::File>,
255
256    // "dumpable" state, as manipulated via the prctl operations PR_SET_DUMPABLE
257    // and PR_GET_DUMPABLE.
258    dumpable: Cell<SuidDump>,
259
260    native_pid: Pid,
261
262    // timer that tracks the amount of CPU time we spend on plugin execution and processing
263    #[cfg(feature = "perf_timers")]
264    cpu_delay_timer: RefCell<PerfTimer>,
265    #[cfg(feature = "perf_timers")]
266    total_run_time: Cell<Duration>,
267
268    itimer_real: RefCell<Timer>,
269
270    // The `RootedRc` lets us hold a reference to a thread without holding a
271    // reference to the thread list. e.g. this lets us implement the `clone`
272    // syscall, which adds a thread to the list while we have a reference to the
273    // parent thread.
274    threads: RefCell<BTreeMap<ThreadId, RootedRc<RootedRefCell<Thread>>>>,
275
276    // References to `Self::memory_manager` cached on behalf of C code using legacy
277    // C memory access APIs.
278    // TODO: Remove these when we've migrated Shadow off of the APIs that need
279    // them (probably by migrating all the calling code to Rust).
280    //
281    // SAFETY: Must be before memory_manager for drop order.
282    unsafe_borrow_mut: RefCell<Option<UnsafeBorrowMut>>,
283    unsafe_borrows: RefCell<Vec<UnsafeBorrow>>,
284
285    // `clone(2)` documents that if `CLONE_THREAD` is set, then `CLONE_VM` must
286    // also be set. Hence all threads in a process always share the same virtual
287    // address space, and hence we have a `MemoryManager` at the `Process` level
288    // rather than the `Thread` level.
289    // SAFETY: Must come after `unsafe_borrows` and `unsafe_borrow_mut`.
290    // Boxed to avoid invalidating those if Self is moved.
291    memory_manager: Box<RefCell<MemoryManager>>,
292
293    // Listeners for child-events.
294    // e.g. these listeners are notified when a child of this process exits.
295    child_process_event_listeners: RefCell<StateEventSource>,
296}
297
298impl RunnableProcess {
299    /// Spawn a `ManagedThread` corresponding to the given `exec` syscall
300    /// parameters.  Intended for use by the `exec` syscall handlers. Whether it
301    /// succeeds or fails, does *not* mutate `self`, though `self`'s strace and
302    /// shim log files will be passed into the new `ManagedThread`.
303    ///
304    /// In case the native `exec` syscall fails, the corresponding error is returned.
305    pub fn spawn_mthread_for_exec(
306        &self,
307        host: &Host,
308        plugin_path: &CStr,
309        argv: Vec<CString>,
310        envv: Vec<CString>,
311    ) -> Result<ManagedThread, Errno> {
312        ManagedThread::spawn(
313            plugin_path,
314            argv,
315            envv,
316            self.strace_logging
317                .as_ref()
318                .map(|s| s.file.borrow(host.root()))
319                .as_deref(),
320            &self.shimlog_file,
321            host.preload_paths(),
322        )
323    }
324
325    /// Call after a thread has exited. Removes the thread and does corresponding cleanup and notifications.
326    fn reap_thread(&self, host: &Host, threadrc: RootedRc<RootedRefCell<Thread>>) {
327        let threadrc = ExplicitDropper::new(threadrc, |t| {
328            t.explicit_drop_recursive(host.root(), host);
329        });
330        let thread = threadrc.borrow(host.root());
331
332        assert!(!thread.is_running());
333
334        // If the `clear_child_tid` attribute on the thread is set, and there are
335        // any other threads left alive in the process, perform a futex wake on
336        // that address. This mechanism is typically used in `pthread_join` etc.
337        // See `set_tid_address(2)`.
338        let clear_child_tid_pvp = thread.get_tid_address();
339        if !clear_child_tid_pvp.is_null() && !self.threads.borrow().is_empty() {
340            self.memory_manager
341                .borrow_mut()
342                .write(clear_child_tid_pvp, &0)
343                .unwrap();
344
345            // Wake the corresponding futex.
346            let futexes = host.futextable_borrow();
347            let addr = self
348                .common
349                .physical_address(clear_child_tid_pvp.cast::<()>());
350
351            if let Some(futex) = futexes.get(addr) {
352                futex.wake(1);
353            }
354        }
355    }
356
357    /// This cleans up memory references left over from legacy C code; usually
358    /// a syscall handler.
359    ///
360    /// Writes the leftover mutable ref to memory (if any), and frees
361    /// all memory refs.
362    pub fn free_unsafe_borrows_flush(&self) -> Result<(), Errno> {
363        self.unsafe_borrows.borrow_mut().clear();
364
365        let unsafe_borrow_mut = self.unsafe_borrow_mut.borrow_mut().take();
366        if let Some(borrow) = unsafe_borrow_mut {
367            borrow.flush()
368        } else {
369            Ok(())
370        }
371    }
372
373    /// This cleans up memory references left over from legacy C code; usually
374    /// a syscall handler.
375    ///
376    /// Frees all memory refs without writing back to memory.
377    pub fn free_unsafe_borrows_noflush(&self) {
378        self.unsafe_borrows.borrow_mut().clear();
379
380        let unsafe_borrow_mut = self.unsafe_borrow_mut.borrow_mut().take();
381        if let Some(borrow) = unsafe_borrow_mut {
382            borrow.noflush();
383        }
384    }
385
386    #[track_caller]
387    pub fn memory_borrow(&self) -> impl Deref<Target = MemoryManager> + '_ {
388        self.memory_manager.borrow()
389    }
390
391    #[track_caller]
392    pub fn memory_borrow_mut(&self) -> impl DerefMut<Target = MemoryManager> + '_ {
393        self.memory_manager.borrow_mut()
394    }
395
396    pub fn strace_logging_options(&self) -> Option<FmtOptions> {
397        self.strace_logging.as_ref().map(|x| x.options)
398    }
399
400    /// If strace logging is disabled, this function will do nothing and return `None`.
401    pub fn with_strace_file<T>(&self, f: impl FnOnce(&mut std::fs::File) -> T) -> Option<T> {
402        // TODO: get Host from caller. Would need t update syscall-logger.
403        Worker::with_active_host(|host| {
404            let strace_logging = self.strace_logging.as_ref()?;
405            let mut file = strace_logging.file.borrow_mut(host.root());
406            Some(f(&mut file))
407        })
408        .unwrap()
409    }
410
411    pub fn native_pid(&self) -> Pid {
412        self.native_pid
413    }
414
415    #[track_caller]
416    fn first_live_thread(&self, root: &Root) -> Option<Ref<'_, RootedRc<RootedRefCell<Thread>>>> {
417        Ref::filter_map(self.threads.borrow(), |threads| {
418            threads.values().next().inspect(|thread| {
419                // There shouldn't be any non-running threads in the table.
420                assert!(thread.borrow(root).is_running());
421            })
422        })
423        .ok()
424    }
425
426    /// Returns a dynamically borrowed reference to the first live thread.
427    /// This is meant primarily for the MemoryManager.
428    #[track_caller]
429    pub fn first_live_thread_borrow(
430        &self,
431        root: &Root,
432    ) -> Option<impl Deref<Target = RootedRc<RootedRefCell<Thread>>> + '_> {
433        self.first_live_thread(root)
434    }
435
436    #[track_caller]
437    fn thread(&self, virtual_tid: ThreadId) -> Option<Ref<'_, RootedRc<RootedRefCell<Thread>>>> {
438        Ref::filter_map(self.threads.borrow(), |threads| threads.get(&virtual_tid)).ok()
439    }
440
441    #[track_caller]
442    pub fn thread_borrow(
443        &self,
444        virtual_tid: ThreadId,
445    ) -> Option<impl Deref<Target = RootedRc<RootedRefCell<Thread>>> + '_> {
446        self.thread(virtual_tid)
447    }
448
449    // Disposes of `self`, returning the internal `Common` for reuse.
450    // Used internally when changing states.
451    fn into_common(self) -> Common {
452        // There shouldn't be any outstanding unsafe borrows when changing
453        // states, since that would indicate C code might still have a pointer
454        // to memory.
455        assert!(self.unsafe_borrow_mut.take().is_none());
456        assert!(self.unsafe_borrows.take().is_empty());
457
458        self.common
459    }
460
461    /// Starts the CPU delay timer.
462    /// Panics if the timer is already running.
463    #[cfg(feature = "perf_timers")]
464    pub fn start_cpu_delay_timer(&self) {
465        self.cpu_delay_timer.borrow_mut().start()
466    }
467
468    /// Stop the timer and return the most recent (not cumulative) duration.
469    /// Panics if the timer was not already running.
470    #[cfg(feature = "perf_timers")]
471    pub fn stop_cpu_delay_timer(&self, host: &Host) -> Duration {
472        let mut timer = self.cpu_delay_timer.borrow_mut();
473        timer.stop();
474        let total_elapsed = timer.elapsed();
475        let prev_total = self.total_run_time.replace(total_elapsed);
476        let delta = total_elapsed - prev_total;
477
478        host.cpu_borrow_mut().add_delay(delta);
479
480        delta
481    }
482
483    fn interrupt_with_signal(&self, host: &Host, signal: Signal) {
484        let threads = self.threads.borrow();
485        for thread in threads.values() {
486            let thread = thread.borrow(host.root());
487            {
488                let thread_shmem = thread.shmem();
489                let host_lock = host.shim_shmem_lock_borrow().unwrap();
490                let thread_shmem_protected = thread_shmem.protected.borrow(&host_lock.root);
491                let blocked_signals = thread_shmem_protected.blocked_signals;
492                if blocked_signals.has(signal) {
493                    continue;
494                }
495            }
496            let Some(mut cond) = thread.syscall_condition_mut() else {
497                // Defensively handle this gracefully, but it probably shouldn't happen.
498                // The only thread in the process not blocked on a syscall should be
499                // the current-running thread (if any), but the caller should have
500                // delivered the signal synchronously instead of using this function
501                // in that case.
502                warn!("thread {:?} has no syscall_condition. How?", thread.id());
503                continue;
504            };
505            cond.wakeup_for_signal(host, signal);
506            break;
507        }
508    }
509
510    /// Send the signal described in `siginfo` to `process`. `current_thread`
511    /// should be set if there is one (e.g. if this is being called from a syscall
512    /// handler), and `None` otherwise (e.g. when called from a timer expiration event).
513    ///
514    /// An event will be scheduled to deliver the signal unless `current_thread`
515    /// is set, and belongs to the process `self`, and doesn't have the signal
516    /// blocked.  In that the signal will be processed synchronously when
517    /// returning from the current syscall.
518    pub fn signal(&self, host: &Host, current_thread: Option<&Thread>, siginfo_t: &siginfo_t) {
519        let signal = match siginfo_t.signal() {
520            Ok(s) => s,
521            Err(SignalFromI32Error(0)) => return,
522            Err(SignalFromI32Error(n)) => panic!("Bad signo {n}"),
523        };
524
525        // Scope for `process_shmem_protected`
526        {
527            let host_shmem = host.shim_shmem_lock_borrow().unwrap();
528            let mut process_shmem_protected = self
529                .shim_shared_mem_block
530                .protected
531                .borrow_mut(&host_shmem.root);
532            // SAFETY: We don't try to call any of the function pointers.
533            let action = unsafe { process_shmem_protected.signal_action(signal) };
534            match unsafe { action.handler() } {
535                linux_api::signal::SignalHandler::Handler(_) => (),
536                linux_api::signal::SignalHandler::Action(_) => (),
537                linux_api::signal::SignalHandler::SigIgn => return,
538                linux_api::signal::SignalHandler::SigDfl => {
539                    if defaultaction(signal) == LinuxDefaultAction::IGN {
540                        return;
541                    }
542                }
543            }
544
545            if process_shmem_protected.pending_signals.has(signal) {
546                // Signal is already pending. From signal(7):In the case where a
547                // standard signal is already pending, the siginfo_t structure (see
548                // sigaction(2)) associated with that signal is not overwritten on
549                // arrival of subsequent instances of the same signal.
550                return;
551            }
552            process_shmem_protected.pending_signals.add(signal);
553            process_shmem_protected.set_pending_standard_siginfo(signal, siginfo_t);
554        }
555
556        if let Some(thread) = current_thread
557            && thread.process_id() == self.common.id()
558        {
559            let host_shmem = host.shim_shmem_lock_borrow().unwrap();
560            let threadmem = thread.shmem();
561            let threadprotmem = threadmem.protected.borrow(&host_shmem.root);
562            if !threadprotmem.blocked_signals.has(signal) {
563                // Target process is this process, and current thread hasn't blocked
564                // the signal.  It will be delivered to this thread when it resumes.
565                return;
566            }
567        }
568
569        self.interrupt_with_signal(host, signal);
570    }
571
572    /// Adds a new thread to the process and schedules it to run.
573    /// Intended for use by `clone`.
574    pub fn add_thread(&self, host: &Host, thread: RootedRc<RootedRefCell<Thread>>) {
575        let pid = self.common.id();
576        let tid = thread.borrow(host.root()).id();
577        self.threads.borrow_mut().insert(tid, thread);
578
579        // Schedule thread to start. We're giving the caller's reference to thread
580        // to the TaskRef here, which is why we don't increment its ref count to
581        // create the TaskRef, but do decrement it on cleanup.
582        let task = TaskRef::new(move |host| {
583            host.resume(pid, tid);
584        });
585        host.schedule_task_with_delay(task, SimulationTime::ZERO);
586    }
587
588    /// Create a new `Process`, forked from `self`, with the thread `new_thread_group_leader`.
589    pub fn new_forked_process(
590        &self,
591        host: &Host,
592        flags: CloneFlags,
593        exit_signal: Option<Signal>,
594        new_thread_group_leader: RootedRc<RootedRefCell<Thread>>,
595    ) -> RootedRc<RootedRefCell<Process>> {
596        let new_tgl_tid;
597        let native_pid;
598        {
599            let new_tgl = new_thread_group_leader.borrow(host.root());
600            new_tgl_tid = new_tgl.id();
601            native_pid = new_tgl.native_pid();
602        }
603        let pid = ProcessId::from_thread_group_leader_tid(new_tgl_tid);
604        assert_eq!(
605            pid,
606            new_thread_group_leader.borrow(host.root()).process_id()
607        );
608        let plugin_name = self.common.plugin_name.clone();
609        let name = make_name(host, plugin_name.to_str().unwrap(), pid);
610
611        let parent_pid = if flags.contains(CloneFlags::CLONE_PARENT) {
612            self.common.parent_pid.get()
613        } else {
614            self.common.id
615        };
616
617        // Process group is always inherited from the parent process.
618        let process_group_id = self.common.group_id.get();
619
620        // Session is always inherited from the parent process.
621        let session_id = self.common.session_id.get();
622
623        let common = Common {
624            id: pid,
625            host_id: host.id(),
626            name,
627            plugin_name,
628            working_dir: self.common.working_dir.clone(),
629            parent_pid: Cell::new(parent_pid),
630            group_id: Cell::new(process_group_id),
631            session_id: Cell::new(session_id),
632            exit_signal,
633            parent_death_signal: Cell::new(None),
634            rlimits: self.common.rlimits,
635        };
636
637        // The child will log to the same strace log file. Entries contain thread IDs,
638        // though it might be tricky to map those back to processes.
639        let strace_logging = self.strace_logging.as_ref().cloned();
640
641        // `fork(2)`:
642        //  > The child does not inherit timers from its parent
643        //  > (setitimer(2), alarm(2), timer_create(2)).
644        let itimer_real = RefCell::new(Timer::new(move |host| itimer_real_expiration(host, pid)));
645
646        let threads = RefCell::new(BTreeMap::from([(new_tgl_tid, new_thread_group_leader)]));
647
648        let shim_shared_mem = ProcessShmem::new(
649            &host.shim_shmem_lock_borrow().unwrap().root,
650            host.shim_shmem().serialize(),
651            host.id(),
652            strace_logging
653                .as_ref()
654                .map(|x| x.file.borrow(host.root()).as_raw_fd()),
655        );
656        let shim_shared_mem_block = shadow_shmem::allocator::shmalloc(shim_shared_mem);
657
658        let runnable_process = RunnableProcess {
659            common,
660            expected_final_state: None,
661            shim_shared_mem_block,
662            strace_logging,
663            dumpable: self.dumpable.clone(),
664            native_pid,
665            #[cfg(feature = "perf_timers")]
666            cpu_delay_timer: RefCell::new(PerfTimer::new_stopped()),
667            #[cfg(feature = "perf_timers")]
668            total_run_time: Cell::new(Duration::ZERO),
669            itimer_real,
670            threads,
671            unsafe_borrow_mut: RefCell::new(None),
672            unsafe_borrows: RefCell::new(Vec::new()),
673            memory_manager: Box::new(RefCell::new(unsafe { MemoryManager::new(native_pid) })),
674            child_process_event_listeners: Default::default(),
675            shimlog_file: self.shimlog_file.clone(),
676        };
677        let child_process = Process {
678            state: RefCell::new(Some(ProcessState::Runnable(runnable_process))),
679        };
680        RootedRc::new(host.root(), RootedRefCell::new(host.root(), child_process))
681    }
682
683    /// Shared memory for this process.
684    pub fn shmem(&self) -> impl Deref<Target = ShMemBlock<'static, ProcessShmem>> + '_ {
685        &self.shim_shared_mem_block
686    }
687}
688
689impl ExplicitDrop for RunnableProcess {
690    type ExplicitDropParam = Host;
691    type ExplicitDropResult = ();
692
693    fn explicit_drop(mut self, host: &Self::ExplicitDropParam) -> Self::ExplicitDropResult {
694        let threads = std::mem::take(self.threads.get_mut());
695        for thread in threads.into_values() {
696            thread.explicit_drop_recursive(host.root(), host);
697        }
698    }
699}
700
701/// A process that has exited.
702pub struct ZombieProcess {
703    common: Common,
704
705    exit_status: ExitStatus,
706}
707
708impl ZombieProcess {
709    pub fn exit_status(&self) -> ExitStatus {
710        self.exit_status
711    }
712
713    /// Process that can reap this zombie process, if any.
714    pub fn reaper<'host>(
715        &self,
716        host: &'host Host,
717    ) -> Option<impl Deref<Target = RootedRc<RootedRefCell<Process>>> + 'host> {
718        let parent_pid = self.common.parent_pid.get();
719        if parent_pid == ProcessId::INIT {
720            return None;
721        }
722        let parentrc = host.process_borrow(parent_pid)?;
723
724        // If the parent has *explicitly* ignored the exit signal, then it
725        // doesn't reap.
726        //
727        // `waitpid(2)`:
728        // > POSIX.1-2001 specifies that if the disposition of SIGCHLD is set to SIG_IGN or the SA_NOCLDWAIT flag is set for SIGCHLD  (see
729        // > sigaction(2)),  then  children  that  terminate  do not become zombies and a call to wait() or waitpid() will block until all
730        // > children have terminated, and then fail with errno set to ECHILD.  (The original POSIX standard left the behavior of  setting
731        // > SIGCHLD to SIG_IGN unspecified.  Note that even though the default disposition of SIGCHLD is "ignore", explicitly setting the
732        // > disposition to SIG_IGN results in different treatment of zombie process children.)
733        //
734        // TODO: validate that this applies to whatever signal is configured as the exit
735        // signal, even if it's not SIGCHLD.
736        if let Some(exit_signal) = self.common.exit_signal {
737            let parent = parentrc.borrow(host.root());
738            let parent_shmem = parent.shmem();
739            let host_shmem_lock = host.shim_shmem_lock_borrow().unwrap();
740            let parent_shmem_protected = parent_shmem.protected.borrow(&host_shmem_lock.root);
741            // SAFETY: We don't dereference function pointers.
742            let action = unsafe { parent_shmem_protected.signal_action(exit_signal) };
743            if action.is_ignore() {
744                return None;
745            }
746        }
747
748        Some(parentrc)
749    }
750
751    fn notify_parent_of_exit(&self, host: &Host) {
752        let Some(exit_signal) = self.common.exit_signal else {
753            trace!("Not notifying parent of exit: no signal specified");
754            return;
755        };
756        let parent_pid = self.common.parent_pid.get();
757        if parent_pid == ProcessId::INIT {
758            trace!("Not notifying parent of exit: parent is 'init'");
759            return;
760        }
761        let Some(parent_rc) = host.process_borrow(parent_pid) else {
762            trace!("Not notifying parent of exit: parent {parent_pid:?} not found");
763            return;
764        };
765        let parent = parent_rc.borrow(host.root());
766        let siginfo = self.exit_siginfo(exit_signal);
767
768        let Some(parent_runnable) = parent.as_runnable() else {
769            trace!("Not notifying parent of exit: {parent_pid:?} not running");
770            debug_panic!("Non-running parent process shouldn't be possible.");
771            #[allow(unreachable_code)]
772            {
773                return;
774            }
775        };
776        parent_runnable.signal(host, None, &siginfo);
777        CallbackQueue::queue_and_run_with_legacy(|q| {
778            let mut parent_child_listeners =
779                parent_runnable.child_process_event_listeners.borrow_mut();
780            parent_child_listeners.notify_listeners(
781                FileState::CHILD_EVENT,
782                FileState::CHILD_EVENT,
783                FileSignals::empty(),
784                q,
785            );
786        });
787    }
788
789    /// Construct a siginfo containing information about how the process exited.
790    /// Used internally to send a signal to the parent process, and by the
791    /// `waitid` syscall handler.
792    ///
793    /// `exit_signal` is the signal to set in the `siginfo_t`.
794    pub fn exit_siginfo(&self, exit_signal: Signal) -> siginfo_t {
795        match self.exit_status {
796            ExitStatus::Normal(exit_code) => siginfo_t::new_for_sigchld_exited(
797                exit_signal,
798                self.common.id.into(),
799                0,
800                exit_code,
801                0,
802                0,
803            ),
804            ExitStatus::Signaled(fatal_signal) => {
805                // This ought to be `siginfo_t::new_for_sigchld_dumped` if
806                // the child dumped core, but that depends on various other
807                // system variables outside of our control. We always report
808                // that no core was dropped for determinism.
809                siginfo_t::new_for_sigchld_killed(
810                    exit_signal,
811                    self.common.id.into(),
812                    0,
813                    fatal_signal,
814                    0,
815                    0,
816                )
817            }
818
819            ExitStatus::StoppedByShadow => unreachable!(),
820        }
821    }
822}
823
824/// Inner implementation of a simulated process.
825// We could box the variants, but it's unclear whether it's really worth the extra code and extra
826// allocations. Most of the values of this type will be in the larger `Runnable` variant rather than
827// the smaller `Zombie` variant anyways.
828#[allow(clippy::large_enum_variant)]
829enum ProcessState {
830    Runnable(RunnableProcess),
831    Zombie(ZombieProcess),
832}
833
834impl ProcessState {
835    fn common(&self) -> &Common {
836        match self {
837            ProcessState::Runnable(r) => &r.common,
838            ProcessState::Zombie(z) => &z.common,
839        }
840    }
841
842    fn common_mut(&mut self) -> &mut Common {
843        match self {
844            ProcessState::Runnable(r) => &mut r.common,
845            ProcessState::Zombie(z) => &mut z.common,
846        }
847    }
848
849    fn as_runnable(&self) -> Option<&RunnableProcess> {
850        match self {
851            ProcessState::Runnable(r) => Some(r),
852            ProcessState::Zombie(_) => None,
853        }
854    }
855
856    fn as_runnable_mut(&mut self) -> Option<&mut RunnableProcess> {
857        match self {
858            ProcessState::Runnable(r) => Some(r),
859            ProcessState::Zombie(_) => None,
860        }
861    }
862
863    fn as_zombie(&self) -> Option<&ZombieProcess> {
864        match self {
865            ProcessState::Runnable(_) => None,
866            ProcessState::Zombie(z) => Some(z),
867        }
868    }
869}
870
871impl ExplicitDrop for ProcessState {
872    type ExplicitDropParam = Host;
873    type ExplicitDropResult = ();
874
875    fn explicit_drop(self, host: &Self::ExplicitDropParam) -> Self::ExplicitDropResult {
876        match self {
877            ProcessState::Runnable(r) => r.explicit_drop(host),
878            ProcessState::Zombie(_) => (),
879        }
880    }
881}
882
883/// A simulated process.
884pub struct Process {
885    // Most of the implementation should be in [`ProcessState`].
886    // This wrapper allows us to change the state.
887    state: RefCell<Option<ProcessState>>,
888}
889
890fn itimer_real_expiration(host: &Host, pid: ProcessId) {
891    let Some(process) = host.process_borrow(pid) else {
892        debug!("Process {pid:?} no longer exists");
893        return;
894    };
895    let process = process.borrow(host.root());
896    let Some(runnable) = process.as_runnable() else {
897        debug!("Process {:?} no longer running", &*process.name());
898        return;
899    };
900    let timer = runnable.itimer_real.borrow();
901    // The siginfo_t structure only has an i32. Presumably we want to just truncate in
902    // case of overflow.
903    let expiration_count = timer.expiration_count() as i32;
904    let siginfo_t = siginfo_t::new_for_timer(Signal::SIGALRM, 0, expiration_count);
905    process.signal(host, None, &siginfo_t);
906}
907
908impl Process {
909    fn common(&self) -> Ref<'_, Common> {
910        Ref::map(self.state.borrow(), |state| {
911            state.as_ref().unwrap().common()
912        })
913    }
914
915    fn common_mut(&self) -> RefMut<'_, Common> {
916        RefMut::map(self.state.borrow_mut(), |state| {
917            state.as_mut().unwrap().common_mut()
918        })
919    }
920
921    fn as_runnable(&self) -> Option<Ref<'_, RunnableProcess>> {
922        Ref::filter_map(self.state.borrow(), |state| {
923            state.as_ref().unwrap().as_runnable()
924        })
925        .ok()
926    }
927
928    fn as_runnable_mut(&self) -> Option<RefMut<'_, RunnableProcess>> {
929        RefMut::filter_map(self.state.borrow_mut(), |state| {
930            state.as_mut().unwrap().as_runnable_mut()
931        })
932        .ok()
933    }
934
935    /// Borrows a reference to the internal [`RunnableProcess`] if `self` is runnable.
936    pub fn borrow_as_runnable(&self) -> Option<impl Deref<Target = RunnableProcess> + '_> {
937        self.as_runnable()
938    }
939
940    fn as_zombie(&self) -> Option<Ref<'_, ZombieProcess>> {
941        Ref::filter_map(self.state.borrow(), |state| {
942            state.as_ref().unwrap().as_zombie()
943        })
944        .ok()
945    }
946
947    /// Borrows a reference to the internal [`ZombieProcess`] if `self` is a zombie.
948    pub fn borrow_as_zombie(&self) -> Option<impl Deref<Target = ZombieProcess> + '_> {
949        self.as_zombie()
950    }
951
952    /// Spawn a new process. The process will be runnable via [`Self::resume`]
953    /// once it has been added to the `Host`'s process list.
954    pub fn spawn(
955        host: &Host,
956        plugin_name: CString,
957        plugin_path: &CStr,
958        argv: Vec<CString>,
959        envv: Vec<CString>,
960        pause_for_debugging: bool,
961        strace_logging_options: Option<FmtOptions>,
962        expected_final_state: ProcessFinalState,
963    ) -> Result<RootedRc<RootedRefCell<Process>>, Errno> {
964        debug!("starting process '{plugin_name:?}'");
965
966        let main_thread_id = host.get_new_thread_id();
967        let process_id = ProcessId::from(main_thread_id);
968
969        let desc_table = RootedRc::new(
970            host.root(),
971            RootedRefCell::new(host.root(), DescriptorTable::new()),
972        );
973        let itimer_real = RefCell::new(Timer::new(move |host| {
974            itimer_real_expiration(host, process_id)
975        }));
976
977        let name = make_name(host, plugin_name.to_str().unwrap(), process_id);
978
979        let mut file_basename = PathBuf::new();
980        file_basename.push(host.data_dir_path());
981        file_basename.push(format!(
982            "{exe_name}.{id}",
983            exe_name = plugin_name.to_str().unwrap(),
984            id = u32::from(process_id)
985        ));
986
987        let strace_logging = strace_logging_options.map(|options| {
988            let file =
989                std::fs::File::create(Self::static_output_file_name(&file_basename, "strace"))
990                    .unwrap();
991            debug_assert_cloexec(&file);
992            Arc::new(StraceLogging {
993                file: RootedRefCell::new(host.root(), file),
994                options,
995            })
996        });
997
998        let shim_shared_mem = ProcessShmem::new(
999            &host.shim_shmem_lock_borrow().unwrap().root,
1000            host.shim_shmem().serialize(),
1001            host.id(),
1002            strace_logging
1003                .as_ref()
1004                .map(|x| x.file.borrow(host.root()).as_raw_fd()),
1005        );
1006        let shim_shared_mem_block = shadow_shmem::allocator::shmalloc(shim_shared_mem);
1007
1008        let working_dir = utility::pathbuf_to_nul_term_cstring(
1009            std::fs::canonicalize(host.data_dir_path()).unwrap(),
1010        );
1011
1012        {
1013            let mut descriptor_table = desc_table.borrow_mut(host.root());
1014            Self::open_stdio_file_helper(
1015                &mut descriptor_table,
1016                libc::STDIN_FILENO.try_into().unwrap(),
1017                "/dev/null".into(),
1018                OFlag::O_RDONLY,
1019            );
1020
1021            let name = Self::static_output_file_name(&file_basename, "stdout");
1022            Self::open_stdio_file_helper(
1023                &mut descriptor_table,
1024                libc::STDOUT_FILENO.try_into().unwrap(),
1025                name,
1026                OFlag::O_WRONLY,
1027            );
1028
1029            let name = Self::static_output_file_name(&file_basename, "stderr");
1030            Self::open_stdio_file_helper(
1031                &mut descriptor_table,
1032                libc::STDERR_FILENO.try_into().unwrap(),
1033                name,
1034                OFlag::O_WRONLY,
1035            );
1036        }
1037
1038        let shimlog_file = Arc::new(
1039            std::fs::File::create(Self::static_output_file_name(&file_basename, "shimlog"))
1040                .unwrap(),
1041        );
1042        debug_assert_cloexec(&shimlog_file);
1043
1044        let mthread = ManagedThread::spawn(
1045            plugin_path,
1046            argv,
1047            envv,
1048            strace_logging
1049                .as_ref()
1050                .map(|s| s.file.borrow(host.root()))
1051                .as_deref(),
1052            &shimlog_file,
1053            host.preload_paths(),
1054        )?;
1055        let native_pid = mthread.native_pid();
1056        let main_thread =
1057            Thread::wrap_mthread(host, mthread, desc_table, process_id, main_thread_id).unwrap();
1058
1059        debug!("process '{plugin_name:?}' started");
1060
1061        if pause_for_debugging {
1062            // will block until logger output has been flushed
1063            // there is a race condition where other threads may log between the
1064            // `eprintln` and `raise` below, but it should be rare
1065            log::logger().flush();
1066
1067            // Use a single `eprintln` to ensure we hold the lock for the whole message.
1068            // Defensively pre-construct a single string so that `eprintln` is
1069            // more likely to use a single `write` call, to minimize the chance
1070            // of more lines being written to stdout in the meantime, and in
1071            // case of C code writing to `STDERR` directly without taking Rust's
1072            // lock.
1073            let msg = format!(
1074                "\
1075              \n** Pausing with SIGTSTP to enable debugger attachment to managed process\
1076              \n** '{plugin_name:?}' (pid {native_pid:?}).\
1077              \n** If running Shadow under Bash, resume Shadow by pressing Ctrl-Z to background\
1078              \n** this task, and then typing \"fg\".\
1079              \n** If running GDB, resume Shadow by typing \"signal SIGCONT\"."
1080            );
1081            eprintln!("{msg}");
1082
1083            rustix::process::kill_process(rustix::process::getpid(), rustix::process::Signal::Tstp)
1084                .unwrap();
1085        }
1086
1087        // Initialize emulated rlimits to their native values.
1088        // TODO: For determinism, we may want to use fixed limits for some or all of these.
1089        // Some applications like Tor will change behavior depending on these limits.
1090        // Ultimately they'd need to be compatible with the native limits though.
1091        let mut rlimits: [linux_api::resource::rlimit64; _] =
1092            [shadow_pod::zeroed(); linux_api::resource::RLIM_NLIMITS as usize];
1093        for r in 0..linux_api::resource::RLIM_NLIMITS {
1094            let r = linux_api::resource::Resource::try_from(r).unwrap();
1095            // SAFETY: target process isn't our own, and we're only retrieving
1096            // limits, not changing them.
1097            unsafe {
1098                linux_api::resource::prlimit64(
1099                    native_pid,
1100                    r,
1101                    None,
1102                    Some(&mut rlimits[usize::try_from(u32::from(r)).unwrap()]),
1103                )
1104            }
1105            .unwrap();
1106        }
1107
1108        let memory_manager = unsafe { MemoryManager::new(native_pid) };
1109        let threads = RefCell::new(BTreeMap::from([(
1110            main_thread_id,
1111            RootedRc::new(host.root(), RootedRefCell::new(host.root(), main_thread)),
1112        )]));
1113
1114        let common = Common {
1115            id: process_id,
1116            host_id: host.id(),
1117            working_dir,
1118            name,
1119            plugin_name,
1120            parent_pid: Cell::new(ProcessId::INIT),
1121            group_id: Cell::new(ProcessId::INIT),
1122            session_id: Cell::new(ProcessId::INIT),
1123            // Exit signal is moot; since parent is INIT there will never
1124            // be a valid target for it.
1125            exit_signal: None,
1126            parent_death_signal: Cell::new(None),
1127            rlimits,
1128        };
1129        Ok(RootedRc::new(
1130            host.root(),
1131            RootedRefCell::new(
1132                host.root(),
1133                Self {
1134                    state: RefCell::new(Some(ProcessState::Runnable(RunnableProcess {
1135                        common,
1136                        expected_final_state: Some(expected_final_state),
1137                        shim_shared_mem_block,
1138                        memory_manager: Box::new(RefCell::new(memory_manager)),
1139                        itimer_real,
1140                        strace_logging,
1141                        dumpable: Cell::new(SuidDump::SUID_DUMP_USER),
1142                        native_pid,
1143                        unsafe_borrow_mut: RefCell::new(None),
1144                        unsafe_borrows: RefCell::new(Vec::new()),
1145                        threads,
1146                        #[cfg(feature = "perf_timers")]
1147                        cpu_delay_timer: RefCell::new(PerfTimer::new_stopped()),
1148                        #[cfg(feature = "perf_timers")]
1149                        total_run_time: Cell::new(Duration::ZERO),
1150                        child_process_event_listeners: Default::default(),
1151                        shimlog_file,
1152                    }))),
1153                },
1154            ),
1155        ))
1156    }
1157
1158    pub fn id(&self) -> ProcessId {
1159        self.common().id
1160    }
1161
1162    pub fn parent_id(&self) -> ProcessId {
1163        self.common().parent_pid.get()
1164    }
1165
1166    pub fn set_parent_id(&self, pid: ProcessId) {
1167        self.common().parent_pid.set(pid)
1168    }
1169
1170    pub fn group_id(&self) -> ProcessId {
1171        self.common().group_id.get()
1172    }
1173
1174    pub fn set_group_id(&self, id: ProcessId) {
1175        self.common().group_id.set(id)
1176    }
1177
1178    pub fn session_id(&self) -> ProcessId {
1179        self.common().session_id.get()
1180    }
1181
1182    pub fn set_session_id(&self, id: ProcessId) {
1183        self.common().session_id.set(id)
1184    }
1185
1186    pub fn host_id(&self) -> HostId {
1187        self.common().host_id
1188    }
1189
1190    /// Get process's "dumpable" state, as manipulated by the prctl operations `PR_SET_DUMPABLE` and
1191    /// `PR_GET_DUMPABLE`.
1192    pub fn dumpable(&self) -> SuidDump {
1193        self.as_runnable().unwrap().dumpable.get()
1194    }
1195
1196    /// Set process's "dumpable" state, as manipulated by the prctl operations `PR_SET_DUMPABLE` and
1197    /// `PR_GET_DUMPABLE`.
1198    pub fn set_dumpable(&self, val: SuidDump) {
1199        assert!(val == SuidDump::SUID_DUMP_DISABLE || val == SuidDump::SUID_DUMP_USER);
1200        self.as_runnable().unwrap().dumpable.set(val)
1201    }
1202
1203    /// Deprecated wrapper for `RunnableProcess::start_cpu_delay_timer`
1204    #[cfg(feature = "perf_timers")]
1205    pub fn start_cpu_delay_timer(&self) {
1206        self.as_runnable().unwrap().start_cpu_delay_timer()
1207    }
1208
1209    /// Deprecated wrapper for `RunnableProcess::stop_cpu_delay_timer`
1210    #[cfg(feature = "perf_timers")]
1211    pub fn stop_cpu_delay_timer(&self, host: &Host) -> Duration {
1212        self.as_runnable().unwrap().stop_cpu_delay_timer(host)
1213    }
1214
1215    pub fn thread_group_leader_id(&self) -> ThreadId {
1216        self.common().thread_group_leader_id()
1217    }
1218
1219    /// Resume execution of `tid` (if it exists).
1220    /// Should only be called from `Host::resume`.
1221    pub fn resume(&self, host: &Host, tid: ThreadId) {
1222        trace!("Continuing thread {} in process {}", tid, self.id());
1223
1224        let threadrc = {
1225            let Some(runnable) = self.as_runnable() else {
1226                debug!("Process {} is no longer running", &*self.name());
1227                return;
1228            };
1229            let threads = runnable.threads.borrow();
1230            let Some(thread) = threads.get(&tid) else {
1231                debug!("Thread {tid} no longer exists");
1232                return;
1233            };
1234            // Clone the thread reference, so that we don't hold a dynamically
1235            // borrowed reference to the thread list while running the thread.
1236            thread.clone(host.root())
1237        };
1238        let threadrc = ExplicitDropper::new(threadrc, |t| {
1239            t.explicit_drop_recursive(host.root(), host);
1240        });
1241        let thread = threadrc.borrow(host.root());
1242
1243        Worker::set_active_thread(&threadrc);
1244
1245        #[cfg(feature = "perf_timers")]
1246        self.start_cpu_delay_timer();
1247
1248        Process::set_shared_time(host);
1249
1250        // Discard any unapplied latency.
1251        // We currently only want this mechanism to force a yield if the thread itself
1252        // never yields; we don't want unapplied latency to accumulate and force a yield
1253        // under normal circumstances.
1254        host.shim_shmem_lock_borrow_mut()
1255            .unwrap()
1256            .unapplied_cpu_latency = SimulationTime::ZERO;
1257
1258        let ctx = ProcessContext::new(host, self);
1259        let res = thread.resume(&ctx);
1260
1261        #[cfg(feature = "perf_timers")]
1262        {
1263            let delay = self.stop_cpu_delay_timer(host);
1264            debug!("process '{}' ran for {:?}", &*self.name(), delay);
1265        }
1266        #[cfg(not(feature = "perf_timers"))]
1267        debug!("process '{}' done continuing", &*self.name());
1268
1269        match res {
1270            crate::host::thread::ResumeResult::Blocked => {
1271                debug!(
1272                    "thread {tid} in process '{}' still running, but blocked",
1273                    &*self.name()
1274                );
1275            }
1276            crate::host::thread::ResumeResult::ExitedThread(return_code) => {
1277                debug!(
1278                    "thread {tid} in process '{}' exited with code {return_code}",
1279                    &*self.name(),
1280                );
1281                let (threadrc, last_thread) = {
1282                    let runnable = self.as_runnable().unwrap();
1283                    let mut threads = runnable.threads.borrow_mut();
1284                    let threadrc = threads.remove(&tid).unwrap();
1285                    (threadrc, threads.is_empty())
1286                };
1287                self.as_runnable().unwrap().reap_thread(host, threadrc);
1288                if last_thread {
1289                    self.handle_process_exit(host, false);
1290                }
1291            }
1292            crate::host::thread::ResumeResult::ExitedProcess => {
1293                debug!(
1294                    "Process {} exited while running thread {tid}",
1295                    &*self.name(),
1296                );
1297                self.handle_process_exit(host, false);
1298            }
1299        };
1300
1301        Worker::clear_active_thread();
1302    }
1303
1304    /// Terminate the Process.
1305    ///
1306    /// Should only be called from [`Host::free_all_applications`].
1307    pub fn stop(&self, host: &Host) {
1308        // Scope for `runnable`
1309        {
1310            let Some(runnable) = self.as_runnable() else {
1311                debug!("process {} has already stopped", &*self.name());
1312                return;
1313            };
1314            debug!("terminating process {}", &*self.name());
1315
1316            #[cfg(feature = "perf_timers")]
1317            runnable.start_cpu_delay_timer();
1318
1319            if let Err(err) = rustix::process::kill_process(
1320                runnable.native_pid().into(),
1321                rustix::process::Signal::Kill,
1322            ) {
1323                warn!("kill: {err:?}");
1324            }
1325
1326            #[cfg(feature = "perf_timers")]
1327            {
1328                let delay = runnable.stop_cpu_delay_timer(host);
1329                debug!("process '{}' stopped in {:?}", &*self.name(), delay);
1330            }
1331            #[cfg(not(feature = "perf_timers"))]
1332            debug!("process '{}' stopped", &*self.name());
1333        }
1334
1335        // Mutates `self.state`, so we need to have dropped `runnable`.
1336        self.handle_process_exit(host, true);
1337    }
1338
1339    /// See `RunnableProcess::signal`.
1340    ///
1341    /// No-op if the `self` is a `ZombieProcess`.
1342    pub fn signal(&self, host: &Host, current_thread: Option<&Thread>, siginfo_t: &siginfo_t) {
1343        // Using full-match here to force update if we add more states later.
1344        match self.state.borrow().as_ref().unwrap() {
1345            ProcessState::Runnable(r) => r.signal(host, current_thread, siginfo_t),
1346            ProcessState::Zombie(_) => {
1347                // Sending a signal to a zombie process is a no-op.
1348                debug!("Process {} no longer running", &*self.name());
1349            }
1350        }
1351    }
1352
1353    fn open_stdio_file_helper(
1354        descriptor_table: &mut DescriptorTable,
1355        fd: DescriptorHandle,
1356        path: PathBuf,
1357        access_mode: OFlag,
1358    ) {
1359        let stdfile = unsafe { cshadow::regularfile_new() };
1360        let cwd = rustix::process::getcwd(Vec::new()).unwrap();
1361        let path = utility::pathbuf_to_nul_term_cstring(path);
1362        // "Convert" to libc int, assuming here that the kernel's `OFlag` values
1363        // are compatible with libc's values.
1364        // XXX: We're assuming here that the kernel and libc flags are ABI
1365        // compatible, which isn't guaranteed, but is mostly true in practice.
1366        // TODO: We probably ought to change `regularfile_open` and friends to
1367        // use a direct syscall instead of libc's wrappers, and explicitly take
1368        // the kernel version of flags, mode, etc.
1369        let access_mode = access_mode.bits();
1370        let errorcode = unsafe {
1371            cshadow::regularfile_open(
1372                stdfile,
1373                path.as_ptr(),
1374                access_mode | libc::O_CREAT | libc::O_TRUNC,
1375                libc::S_IRUSR | libc::S_IWUSR | libc::S_IRGRP | libc::S_IROTH,
1376                cwd.as_ptr(),
1377            )
1378        };
1379        if errorcode != 0 {
1380            panic!(
1381                "Opening {}: {:?}",
1382                path.to_str().unwrap(),
1383                linux_api::errno::Errno::try_from(-errorcode).unwrap()
1384            );
1385        }
1386        let desc = unsafe {
1387            Descriptor::from_legacy_file(
1388                stdfile as *mut cshadow::LegacyFile,
1389                linux_api::fcntl::OFlag::empty(),
1390            )
1391        };
1392        let prev = descriptor_table.register_descriptor_with_fd(desc, fd);
1393        assert!(prev.is_none());
1394        trace!(
1395            "Successfully opened fd {} at {}",
1396            fd,
1397            path.to_str().unwrap()
1398        );
1399    }
1400
1401    // Needed during early init, before `Self` is created.
1402    fn static_output_file_name(file_basename: &Path, extension: &str) -> PathBuf {
1403        let mut path = file_basename.to_owned().into_os_string();
1404        path.push(".");
1405        path.push(extension);
1406        path.into()
1407    }
1408
1409    pub fn name(&self) -> impl Deref<Target = str> + '_ {
1410        Ref::map(self.common(), |c| c.name.to_str().unwrap())
1411    }
1412
1413    pub fn plugin_name(&self) -> impl Deref<Target = str> + '_ {
1414        Ref::map(self.common(), |c| c.plugin_name.to_str().unwrap())
1415    }
1416
1417    /// Deprecated wrapper for `RunnableProcess::memory_borrow_mut`
1418    #[track_caller]
1419    pub fn memory_borrow_mut(&self) -> impl DerefMut<Target = MemoryManager> + '_ {
1420        std_util::nested_ref::NestedRefMut::map(self.as_runnable().unwrap(), |runnable| {
1421            runnable.memory_manager.borrow_mut()
1422        })
1423    }
1424
1425    /// Deprecated wrapper for `RunnableProcess::memory_borrow`
1426    #[track_caller]
1427    pub fn memory_borrow(&self) -> impl Deref<Target = MemoryManager> + '_ {
1428        std_util::nested_ref::NestedRef::map(self.as_runnable().unwrap(), |runnable| {
1429            runnable.memory_manager.borrow()
1430        })
1431    }
1432
1433    /// Deprecated wrapper for `RunnableProcess::strace_logging_options`
1434    pub fn strace_logging_options(&self) -> Option<FmtOptions> {
1435        self.as_runnable().unwrap().strace_logging_options()
1436    }
1437
1438    /// Deprecated wrapper for `RunnableProcess::with_strace_file`
1439    pub fn with_strace_file<T>(&self, f: impl FnOnce(&mut std::fs::File) -> T) -> Option<T> {
1440        self.as_runnable().unwrap().with_strace_file(f)
1441    }
1442
1443    /// Deprecated wrapper for `RunnableProcess::native_pid`
1444    pub fn native_pid(&self) -> Pid {
1445        self.as_runnable().unwrap().native_pid()
1446    }
1447
1448    /// Deprecated wrapper for `RunnableProcess::realtime_timer_borrow`
1449    #[track_caller]
1450    pub fn realtime_timer_borrow(&self) -> impl Deref<Target = Timer> + '_ {
1451        std_util::nested_ref::NestedRef::map(self.as_runnable().unwrap(), |runnable| {
1452            runnable.itimer_real.borrow()
1453        })
1454    }
1455
1456    /// Deprecated wrapper for `RunnableProcess::realtime_timer_borrow_mut`
1457    #[track_caller]
1458    pub fn realtime_timer_borrow_mut(&self) -> impl DerefMut<Target = Timer> + '_ {
1459        std_util::nested_ref::NestedRefMut::map(self.as_runnable().unwrap(), |runnable| {
1460            runnable.itimer_real.borrow_mut()
1461        })
1462    }
1463
1464    /// Deprecated wrapper for `RunnableProcess::first_live_thread_borrow`
1465    #[track_caller]
1466    pub fn first_live_thread_borrow(
1467        &self,
1468        root: &Root,
1469    ) -> Option<impl Deref<Target = RootedRc<RootedRefCell<Thread>>> + '_> {
1470        std_util::nested_ref::NestedRef::filter_map(self.as_runnable()?, |runnable| {
1471            runnable.first_live_thread(root)
1472        })
1473    }
1474
1475    /// Deprecated wrapper for `RunnableProcess::thread_borrow`
1476    pub fn thread_borrow(
1477        &self,
1478        virtual_tid: ThreadId,
1479    ) -> Option<impl Deref<Target = RootedRc<RootedRefCell<Thread>>> + '_> {
1480        std_util::nested_ref::NestedRef::filter_map(self.as_runnable()?, |runnable| {
1481            runnable.thread(virtual_tid)
1482        })
1483    }
1484
1485    /// Deprecated wrapper for [`RunnableProcess::free_unsafe_borrows_flush`].
1486    pub fn free_unsafe_borrows_flush(&self) -> Result<(), Errno> {
1487        self.as_runnable().unwrap().free_unsafe_borrows_flush()
1488    }
1489
1490    /// Deprecated wrapper for [`RunnableProcess::free_unsafe_borrows_noflush`].
1491    pub fn free_unsafe_borrows_noflush(&self) {
1492        self.as_runnable().unwrap().free_unsafe_borrows_noflush()
1493    }
1494
1495    pub fn physical_address(&self, vptr: ForeignPtr<()>) -> ManagedPhysicalMemoryAddr {
1496        self.common().physical_address(vptr)
1497    }
1498
1499    pub fn is_running(&self) -> bool {
1500        self.as_runnable().is_some()
1501    }
1502
1503    /// Transitions `self` from a `RunnableProcess` to a `ZombieProcess`.
1504    fn handle_process_exit(&self, host: &Host, killed_by_shadow: bool) {
1505        debug!(
1506            "process '{}' has completed or is otherwise no longer running",
1507            &*self.name()
1508        );
1509
1510        // Take and dispose of all of the threads.
1511        // TODO: consider doing this while the `self.state` mutable reference is held
1512        // as with the other cleanup below. Right now this breaks some C code that expects
1513        // to be able to lookup the thread's process name.
1514        {
1515            let runnable = self.as_runnable().unwrap();
1516            let threads = std::mem::take(&mut *runnable.threads.borrow_mut());
1517            for (_tid, threadrc) in threads.into_iter() {
1518                threadrc.borrow(host.root()).handle_process_exit();
1519                runnable.reap_thread(host, threadrc);
1520            }
1521        }
1522
1523        // Intentionally hold the borrow on self.state to ensure the state
1524        // transition is "atomic".
1525        let mut opt_state = self.state.borrow_mut();
1526
1527        let state = opt_state.take().unwrap();
1528        let ProcessState::Runnable(runnable) = state else {
1529            unreachable!("Tried to handle process exit of non-running process");
1530        };
1531
1532        #[cfg(feature = "perf_timers")]
1533        debug!(
1534            "total runtime for process '{}' was {:?}",
1535            runnable.common.name(),
1536            runnable.total_run_time.get()
1537        );
1538
1539        let wait_res: Option<WaitStatus> =
1540            rustix::process::waitpid(Some(runnable.native_pid().into()), WaitOptions::empty())
1541                .unwrap_or_else(|e| {
1542                    panic!("Error waiting for {:?}: {:?}", runnable.native_pid(), e)
1543                });
1544        let wait_status = wait_res.unwrap();
1545        let exit_status = if killed_by_shadow {
1546            if wait_status.terminating_signal()
1547                != Some(Signal::SIGKILL.as_i32().try_into().unwrap())
1548            {
1549                warn!("Unexpected waitstatus after killed by shadow: {wait_status:?}");
1550            }
1551            ExitStatus::StoppedByShadow
1552        } else if let Some(code) = wait_status.exit_status() {
1553            ExitStatus::Normal(code.try_into().unwrap())
1554        } else if let Some(signal) = wait_status.terminating_signal() {
1555            ExitStatus::Signaled(Signal::try_from(i32::try_from(signal).unwrap()).unwrap())
1556        } else {
1557            panic!(
1558                "Unexpected status: {wait_status:?} for pid {:?}",
1559                runnable.native_pid()
1560            );
1561        };
1562
1563        let (main_result_string, log_level) = {
1564            let mut s = format!(
1565                "process '{name}' exited with status {exit_status:?}",
1566                name = runnable.common.name()
1567            );
1568            if let Some(expected_final_state) = runnable.expected_final_state {
1569                let actual_final_state = match exit_status {
1570                    ExitStatus::Normal(i) => ProcessFinalState::Exited { exited: i },
1571                    ExitStatus::Signaled(s) => ProcessFinalState::Signaled {
1572                        // This conversion will fail on realtime signals, but that
1573                        // should currently be impossible since we don't support
1574                        // sending realtime signals.
1575                        signaled: s.try_into().unwrap(),
1576                    },
1577                    ExitStatus::StoppedByShadow => ProcessFinalState::Running(RunningVal::Running),
1578                };
1579                if expected_final_state == actual_final_state {
1580                    (s, log::Level::Debug)
1581                } else {
1582                    Worker::increment_plugin_error_count();
1583                    write!(s, "; expected end state was {expected_final_state} but was {actual_final_state}").unwrap();
1584                    (s, log::Level::Error)
1585                }
1586            } else {
1587                (s, log::Level::Debug)
1588            }
1589        };
1590        log::log!(log_level, "{main_result_string}");
1591
1592        let zombie = ZombieProcess {
1593            common: runnable.into_common(),
1594            exit_status,
1595        };
1596        zombie.notify_parent_of_exit(host);
1597
1598        *opt_state = Some(ProcessState::Zombie(zombie));
1599    }
1600
1601    /// Deprecated wrapper for `RunnableProcess::add_thread`
1602    pub fn add_thread(&self, host: &Host, thread: RootedRc<RootedRefCell<Thread>>) {
1603        self.as_runnable().unwrap().add_thread(host, thread)
1604    }
1605
1606    /// Emulate the `prlimit64` syscall for this process, operating on potentially both
1607    /// native and emulated resource limits.
1608    pub fn prlimit64(
1609        &self,
1610        resource: linux_api::resource::Resource,
1611        new_rlim: Option<&linux_api::resource::rlimit64>,
1612        old_rlim: Option<&mut linux_api::resource::rlimit64>,
1613    ) -> Result<(), linux_api::errno::Errno> {
1614        let idx = usize::try_from(u32::from(resource)).unwrap();
1615        let cur = self.common().rlimits[idx];
1616        if let Some(old_rlim) = old_rlim {
1617            *old_rlim = cur;
1618        }
1619        let Some(new_rlim) = new_rlim else {
1620            // Nothing else to do.
1621            return Ok(());
1622        };
1623        if new_rlim.rlim_cur > new_rlim.rlim_max {
1624            return Err(linux_api::errno::Errno::EINVAL);
1625        }
1626        // For now don't allow increasing rlim_max. We'd only be able to actually do this
1627        // natively if shadow is running with CAP_SYS_RESOURCE. We could pretend to do it
1628        // without changing the native limit, but that might just lead to confusion if and
1629        // when the native limit is exceeded without exceeding the emulated limit.
1630        if new_rlim.rlim_max > cur.rlim_max {
1631            return Err(linux_api::errno::Errno::EPERM);
1632        }
1633
1634        // Update our emulated limits to what was requested.
1635        self.common_mut().rlimits[idx] = *new_rlim;
1636
1637        let native_pid = if let Some(runnable) = self.as_runnable() {
1638            runnable.native_pid()
1639        } else {
1640            // The process is a zombie. No need to update the native limits, and
1641            // we can't since we already reaped the native process when
1642            // converting to the zombie state.
1643            return Ok(());
1644        };
1645
1646        // Get the current native limit.
1647        // Theoretically we could cache this, but doesn't seem worth the
1648        // complexity and fragility.
1649        let mut native_rlim = shadow_pod::zeroed();
1650        // SAFETY: we're only getting, not setting.
1651        unsafe {
1652            linux_api::resource::prlimit64(native_pid, resource, None, Some(&mut native_rlim))
1653        }
1654        .unwrap();
1655
1656        let mut new_rlim = *new_rlim;
1657        if new_rlim.rlim_cur < native_rlim.rlim_cur {
1658            // We don't permit lowering limits past their initial values,
1659            // since the shadow shim may use resources beyond what the managed process
1660            // itself needs (see <https://github.com/shadow/shadow/issues/3681>).
1661            log::warn!(
1662                "Only pretending to lower native {resource:?} rlim_cur from {} to {}",
1663                native_rlim.rlim_cur,
1664                new_rlim.rlim_cur
1665            );
1666            new_rlim.rlim_cur = native_rlim.rlim_cur;
1667        }
1668        if new_rlim.rlim_max < native_rlim.rlim_cur {
1669            // We can allow lowering the native max, since we currently never try to increase
1670            // the limit beyond its initial value. But the kernel won't let us lower beyond rlim_cur.
1671            log::warn!(
1672                "Only pretending to lower native {resource:?} rlim_max from {} to {}",
1673                native_rlim.rlim_max,
1674                new_rlim.rlim_max
1675            );
1676            new_rlim.rlim_max = native_rlim.rlim_cur;
1677        }
1678
1679        // Update the native limits. This should always succeed with the validations we already did above.
1680        // SAFETY: Not our process, and the checks we did above should ensure we don't lower
1681        // the limits to something the shim can't handle.
1682        unsafe { linux_api::resource::prlimit64(native_pid, resource, Some(&new_rlim), None) }
1683            .unwrap();
1684
1685        Ok(())
1686    }
1687
1688    /// FIXME: still needed? Time is now updated more granularly in the Thread code
1689    /// when xferring control to/from shim.
1690    fn set_shared_time(host: &Host) {
1691        let mut host_shmem = host.shim_shmem_lock_borrow_mut().unwrap();
1692        host_shmem.max_runahead_time = Worker::max_event_runahead_time(host);
1693        host.shim_shmem()
1694            .sim_time
1695            .store(Worker::current_time().unwrap(), Ordering::Relaxed);
1696    }
1697
1698    /// Deprecated wrapper for `RunnableProcess::shmem`
1699    pub fn shmem(&self) -> impl Deref<Target = ShMemBlock<'static, ProcessShmem>> + '_ {
1700        Ref::map(self.as_runnable().unwrap(), |r| &r.shim_shared_mem_block)
1701    }
1702
1703    /// Resource usage, as returned e.g. by the `getrusage` syscall.
1704    pub fn rusage(&self) -> linux_api::resource::rusage {
1705        warn_once_then_debug!(
1706            "resource usage (rusage) tracking unimplemented; Returning bogus zeroed values"
1707        );
1708        // TODO: Actually track some of these.
1709        // Assuming we want to support `RUSAGE_THREAD` in the `getrusage`
1710        // syscall, we'll actually want to track at the thread level, and either
1711        // increment at both thread and process level at the points where we do
1712        // the tracking, or dynamically iterate over the threads here and sum
1713        // the results.
1714        linux_api::resource::rusage {
1715            ru_utime: linux_api::time::kernel_old_timeval {
1716                tv_sec: 0,
1717                tv_usec: 0,
1718            },
1719            ru_stime: linux_api::time::kernel_old_timeval {
1720                tv_sec: 0,
1721                tv_usec: 0,
1722            },
1723            ru_maxrss: 0,
1724            ru_ixrss: 0,
1725            ru_idrss: 0,
1726            ru_isrss: 0,
1727            ru_minflt: 0,
1728            ru_majflt: 0,
1729            ru_nswap: 0,
1730            ru_inblock: 0,
1731            ru_oublock: 0,
1732            ru_msgsnd: 0,
1733            ru_msgrcv: 0,
1734            ru_nsignals: 0,
1735            ru_nvcsw: 0,
1736            ru_nivcsw: 0,
1737        }
1738    }
1739
1740    /// Signal that will be sent to parent process on exit. Typically `Some(SIGCHLD)`.
1741    pub fn exit_signal(&self) -> Option<Signal> {
1742        self.common().exit_signal
1743    }
1744
1745    /// Signal that will be sent to this process when its parent exits.
1746    pub fn parent_death_signal(&self) -> Option<Signal> {
1747        self.common().parent_death_signal.get()
1748    }
1749
1750    /// Set the signal that should be sent to this process when its parent exits.
1751    pub fn set_parent_death_signal(&self, signal: Option<Signal>) {
1752        self.common().parent_death_signal.set(signal);
1753    }
1754
1755    pub fn current_working_dir(&self) -> impl Deref<Target = CString> + '_ {
1756        Ref::map(self.common(), |common| &common.working_dir)
1757    }
1758
1759    /// Set the process's working directory.
1760    /// This must be kept in sync with the actual working dir of the native process.
1761    /// See <https://github.com/shadow/shadow/issues/2960>
1762    // TODO: This ought to be at the thread level, to support `CLONE_FS`.
1763    pub fn set_current_working_dir(&self, path: CString) {
1764        self.common_mut().working_dir = path;
1765    }
1766
1767    /// Update `self` to complete an `exec` syscall from thread `tid`, replacing
1768    /// the running managed process with `mthread`.
1769    pub fn update_for_exec(&mut self, host: &Host, tid: ThreadId, mthread: ManagedThread) {
1770        let Some(mut runnable) = self.as_runnable_mut() else {
1771            // This could happen if another event runs before the "execve completion" event
1772            // and kills the process. e.g. another thread in the process could run and
1773            // execute the `exit_group` syscall.
1774            log::debug!(
1775                "Process {:?} exited before it could complete execve",
1776                self.id()
1777            );
1778            mthread.kill_and_drop();
1779            return;
1780        };
1781        let old_native_pid = std::mem::replace(&mut runnable.native_pid, mthread.native_pid());
1782
1783        // Kill the previous native process
1784        rustix::process::kill_process(old_native_pid.into(), rustix::process::Signal::Kill)
1785            .expect("Unable to send kill signal to managed process {old_native_pid:?}");
1786        let wait_res = rustix::process::waitpid(Some(old_native_pid.into()), WaitOptions::empty())
1787            .unwrap()
1788            .unwrap();
1789        assert_eq!(
1790            wait_res.terminating_signal(),
1791            Some(Signal::SIGKILL.as_i32().try_into().unwrap())
1792        );
1793
1794        let execing_thread = runnable.threads.borrow_mut().remove(&tid).unwrap();
1795
1796        // Dispose of all threads other than the thread that's running `exec`.
1797        for (_tid, thread) in runnable.threads.replace(BTreeMap::new()) {
1798            // Notify the ManagedThread that the native process has exited.
1799            thread.borrow(host.root()).mthread().handle_process_exit();
1800
1801            thread.explicit_drop_recursive(host.root(), host);
1802        }
1803
1804        // Recreate the `MemoryManager`
1805        {
1806            // We can't safely replace the memory manager if there are outstanding
1807            // unsafe references in C code. There shouldn't be any, though, since
1808            // this is only called from the `execve` and `execveat` syscall handlers,
1809            // which are in Rust.
1810            let unsafe_borrow_mut = runnable.unsafe_borrow_mut.borrow();
1811            let unsafe_borrows = runnable.unsafe_borrows.borrow();
1812            assert!(unsafe_borrow_mut.is_none());
1813            assert!(unsafe_borrows.is_empty());
1814            // Replace the MM, while still holding the references to the unsafe borrows
1815            // to ensure none exist.
1816            runnable
1817                .memory_manager
1818                .replace(unsafe { MemoryManager::new(mthread.native_pid()) });
1819        }
1820
1821        let new_tid = runnable.common.thread_group_leader_id();
1822        log::trace!(
1823            "updating for exec; pid:{pid}, tid:{tid:?}, new_tid:{new_tid:?}",
1824            pid = runnable.common.id
1825        );
1826        execing_thread
1827            .borrow_mut(host.root())
1828            .update_for_exec(host, mthread, new_tid);
1829
1830        runnable
1831            .threads
1832            .borrow_mut()
1833            .insert(new_tid, execing_thread);
1834
1835        // Exit signal is reset to SIGCHLD.
1836        runnable.common.exit_signal = Some(Signal::SIGCHLD);
1837
1838        // Reset signal actions to default.
1839        // `execve(2)`:
1840        // POSIX.1 specifies that the dispositions of any signals that
1841        // are ignored or set to the default are left unchanged.  POSIX.1
1842        // specifies one exception: if SIGCHLD is being ignored, then an
1843        // implementation may leave the disposition unchanged or reset it
1844        // to the default; Linux does the former.
1845        let host_shmem_prot = host.shim_shmem_lock_borrow_mut().unwrap();
1846        let mut shmem_prot = runnable
1847            .shim_shared_mem_block
1848            .protected
1849            .borrow_mut(&host_shmem_prot.root);
1850        for signal in Signal::standard_signals() {
1851            let current_action = unsafe { shmem_prot.signal_action(signal) };
1852            if !(current_action.is_default()
1853                || current_action.is_ignore()
1854                || signal == Signal::SIGCHLD && current_action.is_ignore())
1855            {
1856                unsafe {
1857                    *shmem_prot.signal_action_mut(signal) = linux_api::signal::sigaction::new_raw(
1858                        linux_api::signal::SignalHandler::SigDfl,
1859                        SigActionFlags::empty(),
1860                        sigset_t::EMPTY,
1861                        None,
1862                    )
1863                };
1864            }
1865        }
1866    }
1867}
1868
1869impl Drop for Process {
1870    fn drop(&mut self) {
1871        // Should have been explicitly dropped.
1872        debug_assert!(self.state.borrow().is_none());
1873    }
1874}
1875
1876impl ExplicitDrop for Process {
1877    type ExplicitDropParam = Host;
1878    type ExplicitDropResult = ();
1879
1880    fn explicit_drop(mut self, host: &Self::ExplicitDropParam) -> Self::ExplicitDropResult {
1881        // Should normally only be dropped in the zombie state.
1882        debug_assert!(self.as_zombie().is_some() || std::thread::panicking());
1883
1884        let state = self.state.get_mut().take().unwrap();
1885        state.explicit_drop(host);
1886    }
1887}
1888
1889/// Tracks a memory reference made by a legacy C memory-read API.
1890struct UnsafeBorrow {
1891    // Must come before `manager`, so that it's dropped first, since it's
1892    // borrowed from it.
1893    _memory: ProcessMemoryRef<'static, u8>,
1894    _manager: Ref<'static, MemoryManager>,
1895}
1896
1897impl UnsafeBorrow {
1898    /// Creates a raw readable pointer, and saves an instance of `Self` into
1899    /// `process` for later clean-up.
1900    ///
1901    /// # Safety
1902    ///
1903    /// The pointer is invalidated when one of the Process memory flush methods is called.
1904    unsafe fn readable_ptr(
1905        process: &Process,
1906        ptr: ForeignArrayPtr<u8>,
1907    ) -> Result<*const c_void, Errno> {
1908        let runnable = process.as_runnable().unwrap();
1909        let manager = runnable.memory_manager.borrow();
1910        // SAFETY: We ensure that the `memory` is dropped before the `manager`,
1911        // and `Process` ensures that this whole object is dropped before
1912        // `MemoryManager` can be moved, freed, etc.
1913        let manager = unsafe {
1914            std::mem::transmute::<Ref<'_, MemoryManager>, Ref<'static, MemoryManager>>(manager)
1915        };
1916        let memory = manager.memory_ref(ptr)?;
1917        let memory = unsafe {
1918            std::mem::transmute::<ProcessMemoryRef<'_, u8>, ProcessMemoryRef<'static, u8>>(memory)
1919        };
1920        let vptr = memory.as_ptr() as *mut c_void;
1921        runnable.unsafe_borrows.borrow_mut().push(Self {
1922            _manager: manager,
1923            _memory: memory,
1924        });
1925        Ok(vptr)
1926    }
1927
1928    /// Creates a raw readable string, and saves an instance of `Self` into
1929    /// `process` for later clean-up.
1930    ///
1931    /// # Safety
1932    ///
1933    /// The pointer is invalidated when one of the Process memory flush methods is called.
1934    unsafe fn readable_string(
1935        process: &Process,
1936        ptr: ForeignArrayPtr<c_char>,
1937    ) -> Result<(*const c_char, libc::size_t), Errno> {
1938        let runnable = process.as_runnable().unwrap();
1939        let manager = runnable.memory_manager.borrow();
1940        // SAFETY: We ensure that the `memory` is dropped before the `manager`,
1941        // and `Process` ensures that this whole object is dropped before
1942        // `MemoryManager` can be moved, freed, etc.
1943        let manager = unsafe {
1944            std::mem::transmute::<Ref<'_, MemoryManager>, Ref<'static, MemoryManager>>(manager)
1945        };
1946        let ptr = ptr.cast_u8();
1947        let memory = manager.memory_ref_prefix(ptr)?;
1948        let memory = unsafe {
1949            std::mem::transmute::<ProcessMemoryRef<'_, u8>, ProcessMemoryRef<'static, u8>>(memory)
1950        };
1951        if !memory.contains(&0) {
1952            return Err(Errno::ENAMETOOLONG);
1953        }
1954        assert_eq!(std::mem::size_of::<c_char>(), std::mem::size_of::<u8>());
1955        let ptr = memory.as_ptr() as *const c_char;
1956        let len = memory.len();
1957        runnable.unsafe_borrows.borrow_mut().push(Self {
1958            _manager: manager,
1959            _memory: memory,
1960        });
1961        Ok((ptr, len))
1962    }
1963}
1964
1965// Safety: Normally the Ref would make this non-Send, since it could end then
1966// end up trying to manipulate the source RefCell (which is !Sync) from multiple
1967// threads.  We ensure that these objects never escape Process, which itself is
1968// non-Sync, ensuring this doesn't happen.
1969//
1970// This is admittedly hand-wavy and making some assumptions about the
1971// implementation of RefCell, but this whole type is temporary scaffolding to
1972// support legacy C code.
1973unsafe impl Send for UnsafeBorrow {}
1974
1975/// Tracks a memory reference made by a legacy C memory-write API.
1976struct UnsafeBorrowMut {
1977    // Must come before `manager`, so that it's dropped first, since it's
1978    // borrowed from it.
1979    memory: Option<ProcessMemoryRefMut<'static, u8>>,
1980    _manager: RefMut<'static, MemoryManager>,
1981}
1982
1983impl UnsafeBorrowMut {
1984    /// Creates a raw writable pointer, and saves an instance of `Self` into
1985    /// `process` for later clean-up. The initial contents of the pointer is unspecified.
1986    ///
1987    /// # Safety
1988    ///
1989    /// The pointer is invalidated when one of the Process memory flush methods is called.
1990    unsafe fn writable_ptr(
1991        process: &Process,
1992        ptr: ForeignArrayPtr<u8>,
1993    ) -> Result<*mut c_void, Errno> {
1994        let runnable = process.as_runnable().unwrap();
1995        let manager = runnable.memory_manager.borrow_mut();
1996        // SAFETY: We ensure that the `memory` is dropped before the `manager`,
1997        // and `Process` ensures that this whole object is dropped before
1998        // `MemoryManager` can be moved, freed, etc.
1999        let mut manager = unsafe {
2000            std::mem::transmute::<RefMut<'_, MemoryManager>, RefMut<'static, MemoryManager>>(
2001                manager,
2002            )
2003        };
2004        let memory = manager.memory_ref_mut_uninit(ptr)?;
2005        let mut memory = unsafe {
2006            std::mem::transmute::<ProcessMemoryRefMut<'_, u8>, ProcessMemoryRefMut<'static, u8>>(
2007                memory,
2008            )
2009        };
2010        let vptr = memory.as_mut_ptr() as *mut c_void;
2011        let prev = runnable.unsafe_borrow_mut.borrow_mut().replace(Self {
2012            _manager: manager,
2013            memory: Some(memory),
2014        });
2015        assert!(prev.is_none());
2016        Ok(vptr)
2017    }
2018
2019    /// Creates a raw mutable pointer, and saves an instance of `Self` into
2020    /// `process` for later clean-up.
2021    ///
2022    /// # Safety
2023    ///
2024    /// The pointer is invalidated when one of the Process memory flush methods is called.
2025    unsafe fn mutable_ptr(
2026        process: &Process,
2027        ptr: ForeignArrayPtr<u8>,
2028    ) -> Result<*mut c_void, Errno> {
2029        let runnable = process.as_runnable().unwrap();
2030        let manager = runnable.memory_manager.borrow_mut();
2031        // SAFETY: We ensure that the `memory` is dropped before the `manager`,
2032        // and `Process` ensures that this whole object is dropped before
2033        // `MemoryManager` can be moved, freed, etc.
2034        let mut manager = unsafe {
2035            std::mem::transmute::<RefMut<'_, MemoryManager>, RefMut<'static, MemoryManager>>(
2036                manager,
2037            )
2038        };
2039        let memory = manager.memory_ref_mut(ptr)?;
2040        let mut memory = unsafe {
2041            std::mem::transmute::<ProcessMemoryRefMut<'_, u8>, ProcessMemoryRefMut<'static, u8>>(
2042                memory,
2043            )
2044        };
2045        let vptr = memory.as_mut_ptr() as *mut c_void;
2046        let prev = runnable.unsafe_borrow_mut.borrow_mut().replace(Self {
2047            _manager: manager,
2048            memory: Some(memory),
2049        });
2050        assert!(prev.is_none());
2051        Ok(vptr)
2052    }
2053
2054    /// Free this reference, writing back to process memory.
2055    fn flush(mut self) -> Result<(), Errno> {
2056        self.memory.take().unwrap().flush()
2057    }
2058
2059    /// Free this reference without writing back to process memory.
2060    fn noflush(mut self) {
2061        self.memory.take().unwrap().noflush()
2062    }
2063}
2064
2065// Safety: Normally the RefMut would make this non-Send, since it could end then
2066// end up trying to manipulate the source RefCell (which is !Sync) from multiple
2067// threads.  We ensure that these objects never escape Process, which itself is
2068// non-Sync, ensuring this doesn't happen.
2069//
2070// This is admittedly hand-wavy and making some assumptions about the implementation of
2071// RefCell, but this whole type is temporary scaffolding to support legacy C code.
2072unsafe impl Send for UnsafeBorrowMut {}
2073
2074fn make_name(host: &Host, exe_name: &str, id: ProcessId) -> CString {
2075    CString::new(format!(
2076        "{host_name}.{exe_name}.{id}",
2077        host_name = host.name(),
2078        exe_name = exe_name,
2079        id = u32::from(id)
2080    ))
2081    .unwrap()
2082}
2083
2084mod export {
2085    use std::os::raw::c_void;
2086
2087    use libc::size_t;
2088    use log::trace;
2089    use shadow_shim_helper_rs::notnull::*;
2090    use shadow_shim_helper_rs::shim_shmem::export::ShimShmemProcess;
2091    use shadow_shim_helper_rs::syscall_types::UntypedForeignPtr;
2092
2093    use super::*;
2094    use crate::utility::HostTreePointer;
2095
2096    /// Copy `n` bytes from `src` to `dst`. Returns 0 on success or -EFAULT if any of
2097    /// the specified range couldn't be accessed. Always succeeds with n==0.
2098    #[unsafe(no_mangle)]
2099    pub extern "C-unwind" fn process_readPtr(
2100        proc: *const Process,
2101        dst: *mut c_void,
2102        src: UntypedForeignPtr,
2103        n: usize,
2104    ) -> i32 {
2105        let proc = unsafe { proc.as_ref().unwrap() };
2106        let src = ForeignArrayPtr::new(src.cast::<u8>(), n);
2107        let dst = unsafe { std::slice::from_raw_parts_mut(notnull_mut_debug(dst) as *mut u8, n) };
2108
2109        match proc.memory_borrow().copy_from_ptr(dst, src) {
2110            Ok(_) => 0,
2111            Err(e) => {
2112                trace!("Couldn't read {src:?} into {dst:?}: {e:?}");
2113                e.to_negated_i32()
2114            }
2115        }
2116    }
2117
2118    /// Copy `n` bytes from `src` to `dst`. Returns 0 on success or -EFAULT if any of
2119    /// the specified range couldn't be accessed. The write is flushed immediately.
2120    #[unsafe(no_mangle)]
2121    pub unsafe extern "C-unwind" fn process_writePtr(
2122        proc: *const Process,
2123        dst: UntypedForeignPtr,
2124        src: *const c_void,
2125        n: usize,
2126    ) -> i32 {
2127        let proc = unsafe { proc.as_ref().unwrap() };
2128        let dst = ForeignArrayPtr::new(dst.cast::<u8>(), n);
2129        let src = unsafe { std::slice::from_raw_parts(notnull_debug(src) as *const u8, n) };
2130        match proc.memory_borrow_mut().copy_to_ptr(dst, src) {
2131            Ok(_) => 0,
2132            Err(e) => {
2133                trace!("Couldn't write {src:?} into {dst:?}: {e:?}");
2134                e.to_negated_i32()
2135            }
2136        }
2137    }
2138
2139    /// Make the data at plugin_src available in shadow's address space.
2140    ///
2141    /// The returned pointer is invalidated when one of the process memory flush
2142    /// methods is called; typically after a syscall has completed.
2143    #[unsafe(no_mangle)]
2144    pub unsafe extern "C-unwind" fn process_getReadablePtr(
2145        proc: *const Process,
2146        plugin_src: UntypedForeignPtr,
2147        n: usize,
2148    ) -> *const c_void {
2149        let proc = unsafe { proc.as_ref().unwrap() };
2150        let plugin_src = ForeignArrayPtr::new(plugin_src.cast::<u8>(), n);
2151        unsafe { UnsafeBorrow::readable_ptr(proc, plugin_src).unwrap_or(std::ptr::null()) }
2152    }
2153
2154    /// Returns a writable pointer corresponding to the named region. The
2155    /// initial contents of the returned memory are unspecified.
2156    ///
2157    /// The returned pointer is invalidated when one of the process memory flush
2158    /// methods is called; typically after a syscall has completed.
2159    ///
2160    /// CAUTION: if the unspecified contents aren't overwritten, and the pointer
2161    /// isn't explicitly freed via `process_freePtrsWithoutFlushing`, those
2162    /// unspecified contents may be written back into process memory.
2163    #[unsafe(no_mangle)]
2164    pub unsafe extern "C-unwind" fn process_getWriteablePtr(
2165        proc: *const Process,
2166        plugin_src: UntypedForeignPtr,
2167        n: usize,
2168    ) -> *mut c_void {
2169        let proc = unsafe { proc.as_ref().unwrap() };
2170        let plugin_src = ForeignArrayPtr::new(plugin_src.cast::<u8>(), n);
2171        unsafe { UnsafeBorrowMut::writable_ptr(proc, plugin_src).unwrap_or(std::ptr::null_mut()) }
2172    }
2173
2174    /// Returns a writeable pointer corresponding to the specified src. Use when
2175    /// the data at the given address needs to be both read and written.
2176    ///
2177    /// The returned pointer is invalidated when one of the process memory flush
2178    /// methods is called; typically after a syscall has completed.
2179    #[unsafe(no_mangle)]
2180    pub unsafe extern "C-unwind" fn process_getMutablePtr(
2181        proc: *const Process,
2182        plugin_src: UntypedForeignPtr,
2183        n: usize,
2184    ) -> *mut c_void {
2185        let proc = unsafe { proc.as_ref().unwrap() };
2186        let plugin_src = ForeignArrayPtr::new(plugin_src.cast::<u8>(), n);
2187        unsafe { UnsafeBorrowMut::mutable_ptr(proc, plugin_src).unwrap_or(std::ptr::null_mut()) }
2188    }
2189
2190    /// Reads up to `n` bytes into `str`.
2191    ///
2192    /// Returns:
2193    /// strlen(str) on success.
2194    /// -ENAMETOOLONG if there was no NULL byte in the first `n` characters.
2195    /// -EFAULT if the string extends beyond the accessible address space.
2196    #[unsafe(no_mangle)]
2197    pub unsafe extern "C-unwind" fn process_readString(
2198        proc: *const Process,
2199        strbuf: *mut libc::c_char,
2200        ptr: UntypedForeignPtr,
2201        maxlen: libc::size_t,
2202    ) -> libc::ssize_t {
2203        let proc = unsafe { proc.as_ref().unwrap() };
2204        let memory_manager = proc.memory_borrow();
2205        let buf =
2206            unsafe { std::slice::from_raw_parts_mut(notnull_mut_debug(strbuf) as *mut u8, maxlen) };
2207        let cstr = match memory_manager
2208            .copy_str_from_ptr(buf, ForeignArrayPtr::new(ptr.cast::<u8>(), maxlen))
2209        {
2210            Ok(cstr) => cstr,
2211            Err(e) => return e.to_negated_i32() as isize,
2212        };
2213        cstr.to_bytes().len().try_into().unwrap()
2214    }
2215
2216    /// Reads up to `n` bytes into `str`.
2217    ///
2218    /// Returns:
2219    /// strlen(str) on success.
2220    /// -ENAMETOOLONG if there was no NULL byte in the first `n` characters.
2221    /// -EFAULT if the string extends beyond the accessible address space.
2222    #[unsafe(no_mangle)]
2223    pub unsafe extern "C-unwind" fn process_getReadableString(
2224        proc: *const Process,
2225        plugin_src: UntypedForeignPtr,
2226        n: usize,
2227        out_str: *mut *const c_char,
2228        out_strlen: *mut size_t,
2229    ) -> i32 {
2230        let proc = unsafe { proc.as_ref().unwrap() };
2231        let ptr = ForeignArrayPtr::new(plugin_src.cast::<c_char>(), n);
2232        match unsafe { UnsafeBorrow::readable_string(proc, ptr) } {
2233            Ok((str, strlen)) => {
2234                assert!(!out_str.is_null());
2235                unsafe { out_str.write(str) };
2236                if !out_strlen.is_null() {
2237                    unsafe { out_strlen.write(strlen) };
2238                }
2239                0
2240            }
2241            Err(e) => e.to_negated_i32(),
2242        }
2243    }
2244
2245    /// Returns the processID that was assigned to us in process_new
2246    #[unsafe(no_mangle)]
2247    pub unsafe extern "C-unwind" fn process_getProcessID(proc: *const Process) -> libc::pid_t {
2248        let proc = unsafe { proc.as_ref().unwrap() };
2249        proc.id().into()
2250    }
2251
2252    #[unsafe(no_mangle)]
2253    pub unsafe extern "C-unwind" fn process_getName(proc: *const Process) -> *const c_char {
2254        let proc = unsafe { proc.as_ref().unwrap() };
2255        proc.common().name.as_ptr()
2256    }
2257
2258    /// Safety:
2259    ///
2260    /// The returned pointer is invalidated when the host shmem lock is released, e.g. via
2261    /// Host::unlock_shmem.
2262    #[unsafe(no_mangle)]
2263    pub unsafe extern "C-unwind" fn process_getSharedMem(
2264        proc: *const Process,
2265    ) -> *const ShimShmemProcess {
2266        let proc = unsafe { proc.as_ref().unwrap() };
2267        std::ptr::from_ref(proc.as_runnable().unwrap().shim_shared_mem_block.deref())
2268    }
2269
2270    #[unsafe(no_mangle)]
2271    pub unsafe extern "C-unwind" fn process_getWorkingDir(proc: *const Process) -> *const c_char {
2272        let proc = unsafe { proc.as_ref().unwrap() };
2273        proc.common().working_dir.as_ptr()
2274    }
2275
2276    #[unsafe(no_mangle)]
2277    pub unsafe extern "C-unwind" fn process_straceLoggingMode(
2278        proc: *const Process,
2279    ) -> StraceFmtMode {
2280        let proc = unsafe { proc.as_ref().unwrap() };
2281        proc.strace_logging_options().into()
2282    }
2283
2284    #[unsafe(no_mangle)]
2285    pub unsafe extern "C-unwind" fn process_getNativePid(proc: *const Process) -> libc::pid_t {
2286        let proc = unsafe { proc.as_ref().unwrap() };
2287        proc.native_pid().as_raw_nonzero().get()
2288    }
2289
2290    /// Flushes and invalidates all previously returned readable/writable plugin
2291    /// pointers, as if returning control to the plugin. This can be useful in
2292    /// conjunction with `thread_nativeSyscall` operations that touch memory, or
2293    /// to gracefully handle failed writes.
2294    ///
2295    /// Returns 0 on success or a negative errno on failure.
2296    #[unsafe(no_mangle)]
2297    pub unsafe extern "C-unwind" fn process_flushPtrs(proc: *const Process) -> i32 {
2298        let proc = unsafe { proc.as_ref().unwrap() };
2299        match proc.free_unsafe_borrows_flush() {
2300            Ok(_) => 0,
2301            Err(e) => e.to_negated_i32(),
2302        }
2303    }
2304
2305    /// Frees all readable/writable foreign pointers. Unlike process_flushPtrs, any
2306    /// previously returned writable pointer is *not* written back. Useful
2307    /// if an uninitialized writable pointer was obtained via `process_getWriteablePtr`,
2308    /// and we end up not wanting to write anything after all (in particular, don't
2309    /// write back whatever garbage data was in the uninialized bueffer).
2310    #[unsafe(no_mangle)]
2311    pub unsafe extern "C-unwind" fn process_freePtrsWithoutFlushing(proc: *const Process) {
2312        let proc = unsafe { proc.as_ref().unwrap() };
2313        proc.free_unsafe_borrows_noflush();
2314    }
2315
2316    #[unsafe(no_mangle)]
2317    pub unsafe extern "C-unwind" fn process_getThread(
2318        proc: *const Process,
2319        tid: libc::pid_t,
2320    ) -> *const Thread {
2321        let proc = unsafe { proc.as_ref().unwrap() };
2322        Worker::with_active_host(|host| {
2323            let tid = ThreadId::try_from(tid).unwrap();
2324            let Some(thread) = proc.thread_borrow(tid) else {
2325                return std::ptr::null();
2326            };
2327            let thread = thread.borrow(host.root());
2328            &*thread
2329        })
2330        .unwrap()
2331    }
2332
2333    /// Returns a pointer to an arbitrary live thread in the process.
2334    #[unsafe(no_mangle)]
2335    pub unsafe extern "C-unwind" fn process_firstLiveThread(proc: *const Process) -> *const Thread {
2336        let proc = unsafe { proc.as_ref().unwrap() };
2337        Worker::with_active_host(|host| {
2338            let Some(thread) = proc.first_live_thread_borrow(host.root()) else {
2339                return std::ptr::null();
2340            };
2341            let thread = thread.borrow(host.root());
2342            &*thread
2343        })
2344        .unwrap()
2345    }
2346
2347    #[unsafe(no_mangle)]
2348    pub unsafe extern "C-unwind" fn process_isRunning(proc: *const Process) -> bool {
2349        let proc = unsafe { proc.as_ref().unwrap() };
2350        proc.is_running()
2351    }
2352
2353    // FIXME: still needed? Time is now updated more granularly in the Thread code
2354    // when xferring control to/from shim.
2355    #[unsafe(no_mangle)]
2356    pub unsafe extern "C-unwind" fn process_setSharedTime() {
2357        Worker::with_active_host(Process::set_shared_time).unwrap();
2358    }
2359
2360    #[unsafe(no_mangle)]
2361    pub unsafe extern "C-unwind" fn process_getPhysicalAddress(
2362        proc: *const Process,
2363        vptr: UntypedForeignPtr,
2364    ) -> ManagedPhysicalMemoryAddr {
2365        let proc = unsafe { proc.as_ref().unwrap() };
2366        proc.physical_address(vptr)
2367    }
2368
2369    #[unsafe(no_mangle)]
2370    pub unsafe extern "C-unwind" fn process_addChildEventListener(
2371        host: *const Host,
2372        process: *const Process,
2373        listener: *mut cshadow::StatusListener,
2374    ) {
2375        let host = unsafe { host.as_ref().unwrap() };
2376        let process = unsafe { process.as_ref().unwrap() };
2377        let listener = HostTreePointer::new_for_host(host.id(), listener);
2378        process
2379            .borrow_as_runnable()
2380            .unwrap()
2381            .child_process_event_listeners
2382            .borrow_mut()
2383            .add_legacy_listener(listener)
2384    }
2385
2386    #[unsafe(no_mangle)]
2387    pub unsafe extern "C-unwind" fn process_removeChildEventListener(
2388        _host: *const Host,
2389        process: *const Process,
2390        listener: *mut cshadow::StatusListener,
2391    ) {
2392        let process = unsafe { process.as_ref().unwrap() };
2393        process
2394            .borrow_as_runnable()
2395            .unwrap()
2396            .child_process_event_listeners
2397            .borrow_mut()
2398            .remove_legacy_listener(listener)
2399    }
2400}