shadow_rs/host/
managed_thread.rs

1//! A thread of a managed process.
2//!
3//! This contains the code where the simulator can create or communicate with a managed process.
4
5use std::cell::{Cell, RefCell};
6use std::ffi::{CStr, CString};
7use std::io::Write;
8use std::os::fd::AsRawFd;
9use std::os::unix::prelude::OsStrExt;
10use std::path::PathBuf;
11use std::sync::{Arc, atomic};
12
13use linux_api::errno::Errno;
14use linux_api::posix_types::Pid;
15use linux_api::sched::CloneFlags;
16use linux_api::signal::tgkill;
17use log::{Level, debug, error, log_enabled, trace};
18use rand::Rng as _;
19use rustix::pipe::PipeFlags;
20use rustix::process::WaitOptions;
21use shadow_shim_helper_rs::ipc::IPCData;
22use shadow_shim_helper_rs::shim_event::{
23    ShimEventAddThreadReq, ShimEventAddThreadRes, ShimEventStartRes, ShimEventSyscall,
24    ShimEventSyscallComplete, ShimEventToShadow, ShimEventToShim,
25};
26use shadow_shim_helper_rs::syscall_types::{ForeignPtr, SyscallArgs, SyscallReg};
27use shadow_shmem::allocator::ShMemBlock;
28use vasi_sync::scchannel::SelfContainedChannelError;
29
30use super::context::ThreadContext;
31use super::host::Host;
32use super::syscall::condition::SyscallCondition;
33use crate::core::worker::{WORKER_SHARED, Worker};
34use crate::cshadow;
35use crate::host::syscall::handler::SyscallHandler;
36use crate::host::syscall::types::{ForeignArrayPtr, SyscallReturn};
37use crate::utility::{VerifyPluginPathError, inject_preloads, syscall, verify_plugin_path};
38
39/// The ManagedThread's state after having been allowed to execute some code.
40#[derive(Debug)]
41#[must_use]
42pub enum ResumeResult {
43    /// Blocked on a SyscallCondition.
44    Blocked(SyscallCondition),
45    /// The native thread has exited with the given code.
46    ExitedThread(i32),
47    /// The thread's process has exited.
48    ExitedProcess,
49}
50
51pub struct ManagedThread {
52    ipc_shmem: Arc<ShMemBlock<'static, IPCData>>,
53    is_running: Cell<bool>,
54    return_code: Cell<Option<i32>>,
55
56    /* holds the event for the most recent call from the plugin/shim */
57    current_event: RefCell<ShimEventToShadow>,
58
59    native_pid: linux_api::posix_types::Pid,
60    native_tid: linux_api::posix_types::Pid,
61
62    // Value storing the current CPU affinity of the thread (more precisely,
63    // of the native thread backing this thread object). This value will be set
64    // to AFFINITY_UNINIT if CPU pinning is not enabled or if the thread has
65    // not yet been pinned to a CPU.
66    affinity: Cell<i32>,
67}
68
69impl ManagedThread {
70    pub fn native_pid(&self) -> linux_api::posix_types::Pid {
71        self.native_pid
72    }
73
74    pub fn native_tid(&self) -> linux_api::posix_types::Pid {
75        self.native_tid
76    }
77
78    /// Make the specified syscall on the native thread.
79    ///
80    /// Panics if the native thread is dead or dies during the syscall,
81    /// including if the syscall itself is SYS_exit or SYS_exit_group.
82    pub fn native_syscall(&self, ctx: &ThreadContext, n: i64, args: &[SyscallReg]) -> SyscallReg {
83        let mut syscall_args = SyscallArgs {
84            number: n,
85            args: [SyscallReg::from(0u64); 6],
86        };
87        syscall_args.args[..args.len()].copy_from_slice(args);
88        match self.continue_plugin(
89            ctx.host,
90            &ShimEventToShim::Syscall(ShimEventSyscall { syscall_args }),
91        ) {
92            ShimEventToShadow::SyscallComplete(res) => res.retval,
93            other => panic!("Unexpected response from plugin: {other:?}"),
94        }
95    }
96
97    pub fn spawn(
98        plugin_path: &CStr,
99        argv: Vec<CString>,
100        envv: Vec<CString>,
101        strace_file: Option<&std::fs::File>,
102        log_file: &std::fs::File,
103        injected_preloads: &[PathBuf],
104    ) -> Result<Self, Errno> {
105        debug!(
106            "spawning new mthread '{plugin_path:?}' with environment '{envv:?}', arguments '{argv:?}'"
107        );
108
109        let envv = inject_preloads(envv, injected_preloads);
110
111        debug!("env after preload injection: {envv:?}");
112
113        let ipc_shmem = Arc::new(shadow_shmem::allocator::shmalloc(IPCData::new()));
114
115        let child_pid =
116            Self::spawn_native(plugin_path, argv, envv, strace_file, log_file, &ipc_shmem)?;
117
118        // In Linux, the PID is equal to the TID of its first thread.
119        let native_pid = child_pid;
120        let native_tid = child_pid;
121
122        // Configure the child_pid_watcher to close the IPC channel when the child dies.
123        {
124            let worker = WORKER_SHARED.borrow();
125            let watcher = worker.as_ref().unwrap().child_pid_watcher();
126
127            watcher.register_pid(child_pid);
128            let ipc = ipc_shmem.clone();
129            watcher.register_callback(child_pid, move |_pid| {
130                ipc.from_plugin().close_writer();
131            })
132        };
133
134        trace!("waiting for start event from shim with native pid {native_pid:?}");
135        let start_req = ipc_shmem.from_plugin().receive().unwrap();
136        match &start_req {
137            ShimEventToShadow::StartReq(_) => {
138                // Expected result; shim is ready to initialize.
139            }
140            ShimEventToShadow::ProcessDeath => {
141                // The process died before initializing the shim.
142                //
143                // Reap the dead process and return an error.
144                let status =
145                    rustix::process::waitpid(Some(native_pid.into()), WaitOptions::empty())
146                        .unwrap()
147                        .unwrap();
148                if status.exit_status() == Some(127) {
149                    // posix_spawn(3):
150                    // > If  the child  fails  in  any  of the
151                    // > housekeeping steps described below, or fails to
152                    // > execute the desired file, it exits with a status of
153                    // > 127.
154                    debug!("posix_spawn failed to exec the process");
155                    // Assume that execve failed, and return a plausible reason
156                    // why it might have done so.
157                    // TODO: replace our usage of posix_spawn with a custom
158                    // implementation that can return the execve failure code?
159                    return Err(Errno::EPERM);
160                }
161                // TODO: handle more gracefully.
162                // * The native stdout/stderr might have a clue as to
163                // why the process died.  Consider logging a hint to
164                // check it (currently in the corresponding shimlog), or
165                // directly capture it and display it here.
166                // https://github.com/shadow/shadow/issues/3142
167                // * Consider logging a warning here and continuing on to handle
168                // the managed process exit normally. e.g. when this happens
169                // as part of an emulated `execve`, we might want to continue
170                // the simulation.
171                panic!("Child process died unexpectedly before initialization: {status:?}");
172            }
173            other => panic!("Unexpected result from shim: {other:?}"),
174        };
175
176        Ok(Self {
177            ipc_shmem,
178            is_running: Cell::new(true),
179            return_code: Cell::new(None),
180            current_event: RefCell::new(start_req),
181            native_pid,
182            native_tid,
183            affinity: Cell::new(cshadow::AFFINITY_UNINIT),
184        })
185    }
186
187    pub fn resume(
188        &self,
189        ctx: &ThreadContext,
190        syscall_handler: &mut SyscallHandler,
191    ) -> ResumeResult {
192        debug_assert!(self.is_running());
193
194        self.sync_affinity_with_worker();
195
196        // Flush any pending writes, e.g. from a previous mthread that exited
197        // without flushing.
198        ctx.process.free_unsafe_borrows_flush().unwrap();
199
200        loop {
201            let mut current_event = self.current_event.borrow_mut();
202            let last_event = *current_event;
203            *current_event = match last_event {
204                ShimEventToShadow::StartReq(start_req) => {
205                    // Write the serialized thread shmem handle directly to shim
206                    // memory.
207                    ctx.process
208                        .memory_borrow_mut()
209                        .write(
210                            start_req.thread_shmem_block_to_init,
211                            &ctx.thread.shmem().serialize(),
212                        )
213                        .unwrap();
214
215                    if !start_req.process_shmem_block_to_init.is_null() {
216                        // Write the serialized process shmem handle directly to
217                        // shim memory.
218                        ctx.process
219                            .memory_borrow_mut()
220                            .write(
221                                start_req.process_shmem_block_to_init,
222                                &ctx.process.shmem().serialize(),
223                            )
224                            .unwrap();
225                    }
226
227                    if !start_req.initial_working_dir_to_init.is_null() {
228                        // Write the working dir.
229                        let mut mem = ctx.process.memory_borrow_mut();
230                        let mut writer = mem.writer(ForeignArrayPtr::new(
231                            start_req.initial_working_dir_to_init,
232                            start_req.initial_working_dir_to_init_len,
233                        ));
234                        writer
235                            .write_all(ctx.process.current_working_dir().to_bytes_with_nul())
236                            .unwrap();
237                        writer.flush().unwrap();
238                    }
239
240                    // send the message to the shim to call main().
241                    trace!("sending start event code to shim");
242                    self.continue_plugin(
243                        ctx.host,
244                        &ShimEventToShim::StartRes(ShimEventStartRes {
245                            auxvec_random: ctx.host.random_mut().random(),
246                        }),
247                    )
248                }
249                ShimEventToShadow::ProcessDeath => {
250                    // The native threads are all dead or zombies. Nothing to do but
251                    // clean up.
252                    self.cleanup_after_exit_initiated();
253                    return ResumeResult::ExitedProcess;
254                }
255                ShimEventToShadow::Syscall(syscall) => {
256                    // Emulate the given syscall.
257
258                    // `exit` is tricky since it only exits the *mthread*, and we don't have a way
259                    // to be notified that the mthread has exited. We have to "fire and forget"
260                    // the command to execute the syscall natively.
261                    //
262                    // TODO: We could use a tid futex in shared memory, as set by
263                    // `set_tid_address`, to block here until the thread has
264                    // actually exited.
265                    if syscall.syscall_args.number == libc::SYS_exit {
266                        let return_code = syscall.syscall_args.args[0].into();
267                        debug!("Short-circuiting syscall exit({return_code})");
268                        self.return_code.set(Some(return_code));
269                        // Tell mthread to go ahead and make the exit syscall itself.
270                        // We *don't* call `_managedthread_continuePlugin` here,
271                        // since that'd release the ShimSharedMemHostLock, and we
272                        // aren't going to get a message back to know when it'd be
273                        // safe to take it again.
274                        self.ipc_shmem
275                            .to_plugin()
276                            .send(ShimEventToShim::SyscallDoNative);
277                        self.cleanup_after_exit_initiated();
278                        return ResumeResult::ExitedThread(return_code);
279                    }
280
281                    let scr = syscall_handler.syscall(ctx, &syscall.syscall_args).into();
282
283                    // remove the mthread's old syscall condition since it's no longer needed
284                    ctx.thread.cleanup_syscall_condition();
285
286                    assert!(self.is_running());
287
288                    // Flush any writes that legacy C syscallhandlers may have
289                    // made.
290                    ctx.process.free_unsafe_borrows_flush().unwrap();
291
292                    match scr {
293                        SyscallReturn::Block(b) => {
294                            return ResumeResult::Blocked(unsafe {
295                                SyscallCondition::consume_from_c(b.cond)
296                            });
297                        }
298                        SyscallReturn::Done(d) => self.continue_plugin(
299                            ctx.host,
300                            &ShimEventToShim::SyscallComplete(ShimEventSyscallComplete {
301                                retval: d.retval,
302                                restartable: d.restartable,
303                            }),
304                        ),
305                        SyscallReturn::Native => {
306                            self.continue_plugin(ctx.host, &ShimEventToShim::SyscallDoNative)
307                        }
308                    }
309                }
310                ShimEventToShadow::AddThreadRes(res) => {
311                    // We get here in the child process after forking.
312
313                    // Child should have gotten 0 back from its native clone syscall.
314                    assert_eq!(res.clone_res, 0);
315
316                    // Complete the virtualized clone syscall.
317                    self.continue_plugin(
318                        ctx.host,
319                        &ShimEventToShim::SyscallComplete(ShimEventSyscallComplete {
320                            retval: 0.into(),
321                            restartable: false,
322                        }),
323                    )
324                }
325                e @ ShimEventToShadow::SyscallComplete(_) => panic!("Unexpected event: {e:?}"),
326            };
327            assert!(self.is_running());
328        }
329    }
330
331    pub fn handle_process_exit(&self) {
332        // TODO: Only do this once per process; maybe by moving into `Process`.
333        WORKER_SHARED
334            .borrow()
335            .as_ref()
336            .unwrap()
337            .child_pid_watcher()
338            .unregister_pid(self.native_pid());
339
340        self.cleanup_after_exit_initiated();
341    }
342
343    pub fn return_code(&self) -> Option<i32> {
344        self.return_code.get()
345    }
346
347    pub fn is_running(&self) -> bool {
348        self.is_running.get()
349    }
350
351    /// Execute the specified `clone` syscall in `self`, and use create a new
352    /// `ManagedThread` object to manage it. The new thread will be managed
353    /// by Shadow, and suitable for use with `Thread::wrap_mthread`.
354    ///
355    /// If the `clone` syscall fails, the native error is returned.
356    pub fn native_clone(
357        &self,
358        ctx: &ThreadContext,
359        flags: CloneFlags,
360        child_stack: ForeignPtr<()>,
361        ptid: ForeignPtr<libc::pid_t>,
362        ctid: ForeignPtr<libc::pid_t>,
363        newtls: libc::c_ulong,
364    ) -> Result<ManagedThread, linux_api::errno::Errno> {
365        let child_ipc_shmem = Arc::new(shadow_shmem::allocator::shmalloc(IPCData::new()));
366
367        // Send the IPC block for the new mthread to use.
368        let clone_res: i64 = match self.continue_plugin(
369            ctx.host,
370            &ShimEventToShim::AddThreadReq(ShimEventAddThreadReq {
371                ipc_block: child_ipc_shmem.serialize(),
372                flags: flags.bits(),
373                child_stack,
374                ptid: ptid.cast::<()>(),
375                ctid: ctid.cast::<()>(),
376                newtls,
377            }),
378        ) {
379            ShimEventToShadow::AddThreadRes(ShimEventAddThreadRes { clone_res }) => clone_res,
380            r => panic!("Unexpected result: {r:?}"),
381        };
382        let clone_res: SyscallReg = syscall::raw_return_value_to_result(clone_res)?;
383        let child_native_tid = Pid::from_raw(libc::pid_t::from(clone_res)).unwrap();
384        trace!("native clone treated tid {child_native_tid:?}");
385
386        trace!("waiting for start event from shim with native tid {child_native_tid:?}");
387        let start_req = child_ipc_shmem.from_plugin().receive().unwrap();
388        match &start_req {
389            ShimEventToShadow::StartReq(_) => (),
390            other => panic!("Unexpected result from shim: {other:?}"),
391        };
392
393        let native_pid = if flags.contains(CloneFlags::CLONE_THREAD) {
394            self.native_pid
395        } else {
396            child_native_tid
397        };
398
399        if !flags.contains(CloneFlags::CLONE_THREAD) {
400            // Child is a new process; register it.
401            WORKER_SHARED
402                .borrow()
403                .as_ref()
404                .unwrap()
405                .child_pid_watcher()
406                .register_pid(native_pid);
407        }
408
409        // Register the child thread's IPC block with the ChildPidWatcher.
410        {
411            let child_ipc_shmem = child_ipc_shmem.clone();
412            WORKER_SHARED
413                .borrow()
414                .as_ref()
415                .unwrap()
416                .child_pid_watcher()
417                .register_callback(native_pid, move |_pid| {
418                    child_ipc_shmem.from_plugin().close_writer();
419                })
420        };
421
422        Ok(Self {
423            ipc_shmem: child_ipc_shmem,
424            is_running: Cell::new(true),
425            return_code: Cell::new(None),
426            current_event: RefCell::new(start_req),
427            native_pid,
428            native_tid: child_native_tid,
429            // TODO: can we assume it's inherited from the current thread affinity?
430            affinity: Cell::new(cshadow::AFFINITY_UNINIT),
431        })
432    }
433
434    #[must_use]
435    fn continue_plugin(&self, host: &Host, event: &ShimEventToShim) -> ShimEventToShadow {
436        // Update shared state before transferring control.
437        host.shim_shmem_lock_borrow_mut().unwrap().max_runahead_time =
438            Worker::max_event_runahead_time(host);
439        host.shim_shmem()
440            .sim_time
441            .store(Worker::current_time().unwrap(), atomic::Ordering::Relaxed);
442
443        // Release lock so that plugin can take it. Reacquired in `wait_for_next_event`.
444        host.unlock_shmem();
445
446        self.ipc_shmem.to_plugin().send(*event);
447
448        let event = match self.ipc_shmem.from_plugin().receive() {
449            Ok(e) => e,
450            Err(SelfContainedChannelError::WriterIsClosed) => ShimEventToShadow::ProcessDeath,
451        };
452
453        // Reacquire the shared memory lock, now that the shim has yielded control
454        // back to us.
455        host.lock_shmem();
456
457        // Update time, which may have been incremented in the shim.
458        let shim_time = host.shim_shmem().sim_time.load(atomic::Ordering::Relaxed);
459        if log_enabled!(Level::Trace) {
460            let worker_time = Worker::current_time().unwrap();
461            if shim_time != worker_time {
462                trace!(
463                    "Updating time from {worker_time:?} to {shim_time:?} (+{:?})",
464                    shim_time - worker_time
465                );
466            }
467        }
468        Worker::set_current_time(shim_time);
469
470        event
471    }
472
473    /// To be called after we expect the native thread to have exited, or to
474    /// exit imminently.
475    fn cleanup_after_exit_initiated(&self) {
476        if !self.is_running.get() {
477            return;
478        }
479        self.wait_for_native_exit();
480        trace!("child {:?} exited", self.native_tid());
481        self.is_running.set(false);
482    }
483
484    /// Wait until the managed thread is no longer running.
485    fn wait_for_native_exit(&self) {
486        let native_pid = self.native_pid();
487        let native_tid = self.native_tid();
488
489        // We use `tgkill` and `/proc/x/stat` to detect whether the thread is still running,
490        // looping until it doesn't.
491        //
492        // Alternatively we could use `set_tid_address` or `set_robust_list` to
493        // be notified on a futex. Those are a bit underdocumented and fragile,
494        // though. In practice this shouldn't have to loop significantly.
495        trace!("Waiting for native thread {native_pid:?}.{native_tid:?} to exit");
496        loop {
497            if self.ipc_shmem.from_plugin().writer_is_closed() {
498                // This indicates that the whole process has stopped executing;
499                // no need to poll the individual thread.
500                break;
501            }
502            match tgkill(native_pid, native_tid, None) {
503                Err(Errno::ESRCH) => {
504                    trace!("Thread is done exiting; proceeding with cleanup");
505                    break;
506                }
507                Err(e) => {
508                    error!("Unexpected tgkill error: {e:?}");
509                    break;
510                }
511                Ok(()) if native_pid == native_tid => {
512                    // Thread leader could be in a zombie state waiting for
513                    // the other threads to exit.
514                    let filename = format!("/proc/{}/stat", native_pid.as_raw_nonzero().get());
515                    let stat = match std::fs::read_to_string(filename) {
516                        Err(e) => {
517                            assert!(e.kind() == std::io::ErrorKind::NotFound);
518                            trace!("tgl {native_pid:?} is fully dead");
519                            break;
520                        }
521                        Ok(s) => s,
522                    };
523                    if stat.contains(") Z") {
524                        trace!("tgl {native_pid:?} is a zombie");
525                        break;
526                    }
527                    // Still alive and in a non-zombie state; continue
528                }
529                Ok(()) => {
530                    // Thread is still alive; continue.
531                }
532            };
533            std::thread::yield_now();
534        }
535    }
536
537    fn sync_affinity_with_worker(&self) {
538        let current_affinity = scheduler::core_affinity()
539            .map(|x| i32::try_from(x).unwrap())
540            .unwrap_or(cshadow::AFFINITY_UNINIT);
541        self.affinity.set(unsafe {
542            cshadow::affinity_setProcessAffinity(
543                self.native_tid().as_raw_nonzero().get(),
544                current_affinity,
545                self.affinity.get(),
546            )
547        });
548    }
549
550    fn spawn_native(
551        plugin_path: &CStr,
552        argv: Vec<CString>,
553        envv: Vec<CString>,
554        strace_file: Option<&std::fs::File>,
555        shimlog_file: &std::fs::File,
556        shmem_block: &ShMemBlock<IPCData>,
557    ) -> Result<Pid, Errno> {
558        // Preemptively check for likely reasons that execve might fail.
559        // In particular we want to ensure that we  don't launch a statically
560        // linked executable, since we'd then deadlock the whole simulation
561        // waiting for the plugin to initialize.
562        //
563        // This is also helpful since we can't retrieve specific `execve` errors
564        // through `posix_spawn`.
565        fn map_verify_err(e: VerifyPluginPathError) -> Errno {
566            match e {
567                // execve(2): ENOENT The file pathname [...] does not exist.
568                VerifyPluginPathError::NotFound => Errno::ENOENT,
569                // execve(2): EACCES The file or a script interpreter is not a regular file.
570                VerifyPluginPathError::NotFile => Errno::EACCES,
571                // execve(2): EACCES Execute permission is denied for the file or a script or ELF interpreter.
572                VerifyPluginPathError::NotExecutable => Errno::EACCES,
573                // execve(2): ENOEXEC An executable is not in a recognized
574                // format, is for the wrong architecture, or has some other
575                // format error that means it cannot be executed.
576                VerifyPluginPathError::UnknownFileType => Errno::ENOEXEC,
577                VerifyPluginPathError::NotDynamicallyLinkedElf => Errno::ENOEXEC,
578                VerifyPluginPathError::IncompatibleInterpreter(e) => map_verify_err(*e),
579                // execve(2): EACCES Search permission is denied on a component
580                // of the path prefix of pathname or the name of a script
581                // interpreter.
582                VerifyPluginPathError::PathPermissionDenied => Errno::EACCES,
583                VerifyPluginPathError::UnhandledIoError(_) => {
584                    // Arbitrary error that should be handled by callers.
585                    Errno::ENOEXEC
586                }
587            }
588        }
589        verify_plugin_path(std::ffi::OsStr::from_bytes(plugin_path.to_bytes()))
590            .map_err(map_verify_err)?;
591
592        // posix_spawn is documented as taking pointers to *mutable* char for argv and
593        // envv. It *probably* doesn't actually mutate them, but we
594        // conservatively give it what it asks for. We have to "reconstitute"
595        // the CString's after the fork + exec to deallocate them.
596        let argv_ptrs: Vec<*mut i8> = argv
597            .into_iter()
598            .map(CString::into_raw)
599            // the last element of argv must be NULL
600            .chain(std::iter::once(std::ptr::null_mut()))
601            .collect();
602        let envv_ptrs: Vec<*mut i8> = envv
603            .into_iter()
604            .map(CString::into_raw)
605            // the last element of argv must be NULL
606            .chain(std::iter::once(std::ptr::null_mut()))
607            .collect();
608
609        let mut file_actions: libc::posix_spawn_file_actions_t = shadow_pod::zeroed();
610        Errno::result_from_libc_errnum(unsafe {
611            libc::posix_spawn_file_actions_init(&mut file_actions)
612        })
613        .unwrap();
614
615        // Set up stdin
616        let (stdin_reader, stdin_writer) = rustix::pipe::pipe_with(PipeFlags::CLOEXEC).unwrap();
617        Errno::result_from_libc_errnum(unsafe {
618            libc::posix_spawn_file_actions_adddup2(
619                &mut file_actions,
620                stdin_reader.as_raw_fd(),
621                libc::STDIN_FILENO,
622            )
623        })
624        .unwrap();
625
626        // Dup straceFd; the dup'd descriptor won't have O_CLOEXEC set.
627        //
628        // Since dup2 is a no-op when the new and old file descriptors are equal, we have
629        // to arrange to call dup2 twice - first to a temporary descriptor, and then back
630        // to the original descriptor number.
631        //
632        // Here we use STDOUT_FILENO as the temporary descriptor, since we later
633        // replace that below.
634        //
635        // Once we drop support for platforms with glibc older than 2.29, we *could*
636        // consider taking advantage of a new feature that would let us just use a
637        // single `posix_spawn_file_actions_adddup2` call with equal descriptors.
638        // OTOH it's a non-standard extension, and I think ultimately uses the same
639        // number of syscalls, so it might be better to continue using this slightly
640        // more awkward method anyway.
641        // https://github.com/bminor/glibc/commit/805334b26c7e6e83557234f2008497c72176a6cd
642        // https://austingroupbugs.net/view.php?id=411
643        if let Some(strace_file) = strace_file {
644            Errno::result_from_libc_errnum(unsafe {
645                libc::posix_spawn_file_actions_adddup2(
646                    &mut file_actions,
647                    strace_file.as_raw_fd(),
648                    libc::STDOUT_FILENO,
649                )
650            })
651            .unwrap();
652            Errno::result_from_libc_errnum(unsafe {
653                libc::posix_spawn_file_actions_adddup2(
654                    &mut file_actions,
655                    libc::STDOUT_FILENO,
656                    strace_file.as_raw_fd(),
657                )
658            })
659            .unwrap();
660        }
661
662        // set stdout/stderr as the shim log. This also clears the FD_CLOEXEC flag.
663        Errno::result_from_libc_errnum(unsafe {
664            libc::posix_spawn_file_actions_adddup2(
665                &mut file_actions,
666                shimlog_file.as_raw_fd(),
667                libc::STDOUT_FILENO,
668            )
669        })
670        .unwrap();
671        Errno::result_from_libc_errnum(unsafe {
672            libc::posix_spawn_file_actions_adddup2(
673                &mut file_actions,
674                shimlog_file.as_raw_fd(),
675                libc::STDERR_FILENO,
676            )
677        })
678        .unwrap();
679
680        let mut spawn_attr: libc::posix_spawnattr_t = shadow_pod::zeroed();
681        Errno::result_from_libc_errnum(unsafe { libc::posix_spawnattr_init(&mut spawn_attr) })
682            .unwrap();
683
684        // In versions of glibc before 2.24, we need this to tell posix_spawn
685        // to use vfork instead of fork. In later versions it's a no-op.
686        Errno::result_from_libc_errnum(unsafe {
687            libc::posix_spawnattr_setflags(
688                &mut spawn_attr,
689                libc::POSIX_SPAWN_USEVFORK.try_into().unwrap(),
690            )
691        })
692        .unwrap();
693
694        let child_pid_res = {
695            let mut child_pid = -1;
696            Errno::result_from_libc_errnum(unsafe {
697                libc::posix_spawn(
698                    &mut child_pid,
699                    plugin_path.as_ptr(),
700                    &file_actions,
701                    &spawn_attr,
702                    argv_ptrs.as_ptr(),
703                    envv_ptrs.as_ptr(),
704                )
705            })
706            .map(|_| Pid::from_raw(child_pid).unwrap_or_else(|| panic!("Invalid pid: {child_pid}")))
707        };
708
709        // Write the serialized shmem descriptor to the stdin pipe. The pipe
710        // buffer should be large enough that we can write it all without having
711        // to wait for data to be read.
712        if child_pid_res.is_ok() {
713            // we avoid using the rustix write wrapper here, since we can't guarantee
714            // that all bytes of the serialized shmem block are initd, and hence
715            // can't safely construct the &[u8] that it wants.
716            let serialized = shmem_block.serialize();
717            let serialized_bytes = shadow_pod::as_u8_slice(&serialized);
718            let written = Errno::result_from_libc_errno(-1, unsafe {
719                libc::write(
720                    stdin_writer.as_raw_fd(),
721                    serialized_bytes.as_ptr().cast(),
722                    serialized_bytes.len(),
723                )
724            })
725            .unwrap();
726            // TODO: loop if needed. Shouldn't be in practice, though.
727            assert_eq!(written, isize::try_from(serialized_bytes.len()).unwrap());
728        }
729
730        Errno::result_from_libc_errnum(unsafe {
731            libc::posix_spawn_file_actions_destroy(&mut file_actions)
732        })
733        .unwrap();
734        Errno::result_from_libc_errnum(unsafe { libc::posix_spawnattr_destroy(&mut spawn_attr) })
735            .unwrap();
736
737        // Drop the cloned argv and env.
738        drop(
739            argv_ptrs
740                .into_iter()
741                .filter(|p| !p.is_null())
742                .map(|p| unsafe { CString::from_raw(p) }),
743        );
744        drop(
745            envv_ptrs
746                .into_iter()
747                .filter(|p| !p.is_null())
748                .map(|p| unsafe { CString::from_raw(p) }),
749        );
750
751        debug!(
752            "starting process {}, result: {child_pid_res:?}",
753            plugin_path.to_str().unwrap()
754        );
755
756        child_pid_res
757    }
758
759    /// `ManagedThread` panics if dropped while the underlying process is still running,
760    /// since otherwise that process could continue writing to shared memory regions
761    /// that shadow reallocates.
762    ///
763    /// This method kills the process that `self` belongs to (not just the
764    /// thread!) and then drops `self`.
765    pub fn kill_and_drop(self) {
766        if let Err(err) =
767            rustix::process::kill_process(self.native_pid().into(), rustix::process::Signal::Kill)
768        {
769            log::warn!(
770                "Couldn't kill managed process {:?}. kill: {:?}",
771                self.native_pid(),
772                err
773            );
774        }
775        self.handle_process_exit();
776    }
777}
778
779impl Drop for ManagedThread {
780    fn drop(&mut self) {
781        // Dropping while the thread is running is unsound because the running
782        // thread still has access to shared memory regions that will be
783        // deallocated, and potentially reallocated for another purpose. The
784        // running thread accessing a deallocated or repurposed memory region
785        // can cause numerous problems.
786        assert!(!self.is_running());
787    }
788}