1use std::cell::{Cell, RefCell};
6use std::ffi::{CStr, CString};
7use std::io::Write;
8use std::os::fd::AsRawFd;
9use std::os::unix::prelude::OsStrExt;
10use std::path::PathBuf;
11use std::sync::{Arc, atomic};
12
13use linux_api::errno::Errno;
14use linux_api::posix_types::Pid;
15use linux_api::sched::CloneFlags;
16use linux_api::signal::tgkill;
17use log::{Level, debug, error, log_enabled, trace};
18use rand::Rng as _;
19use rustix::pipe::PipeFlags;
20use rustix::process::WaitOptions;
21use shadow_shim_helper_rs::ipc::IPCData;
22use shadow_shim_helper_rs::shim_event::{
23 ShimEventAddThreadReq, ShimEventAddThreadRes, ShimEventStartRes, ShimEventSyscall,
24 ShimEventSyscallComplete, ShimEventToShadow, ShimEventToShim,
25};
26use shadow_shim_helper_rs::syscall_types::{ForeignPtr, SyscallArgs, SyscallReg};
27use shadow_shmem::allocator::ShMemBlock;
28use vasi_sync::scchannel::SelfContainedChannelError;
29
30use super::context::ThreadContext;
31use super::host::Host;
32use super::syscall::condition::SyscallCondition;
33use crate::core::worker::{WORKER_SHARED, Worker};
34use crate::cshadow;
35use crate::host::syscall::handler::SyscallHandler;
36use crate::host::syscall::types::{ForeignArrayPtr, SyscallReturn};
37use crate::utility::{VerifyPluginPathError, inject_preloads, syscall, verify_plugin_path};
38
39#[derive(Debug)]
41#[must_use]
42pub enum ResumeResult {
43 Blocked(SyscallCondition),
45 ExitedThread(i32),
47 ExitedProcess,
49}
50
51pub struct ManagedThread {
52 ipc_shmem: Arc<ShMemBlock<'static, IPCData>>,
53 is_running: Cell<bool>,
54 return_code: Cell<Option<i32>>,
55
56 current_event: RefCell<ShimEventToShadow>,
58
59 native_pid: linux_api::posix_types::Pid,
60 native_tid: linux_api::posix_types::Pid,
61
62 affinity: Cell<i32>,
67}
68
69impl ManagedThread {
70 pub fn native_pid(&self) -> linux_api::posix_types::Pid {
71 self.native_pid
72 }
73
74 pub fn native_tid(&self) -> linux_api::posix_types::Pid {
75 self.native_tid
76 }
77
78 pub fn native_syscall(&self, ctx: &ThreadContext, n: i64, args: &[SyscallReg]) -> SyscallReg {
83 let mut syscall_args = SyscallArgs {
84 number: n,
85 args: [SyscallReg::from(0u64); 6],
86 };
87 syscall_args.args[..args.len()].copy_from_slice(args);
88 match self.continue_plugin(
89 ctx.host,
90 &ShimEventToShim::Syscall(ShimEventSyscall { syscall_args }),
91 ) {
92 ShimEventToShadow::SyscallComplete(res) => res.retval,
93 other => panic!("Unexpected response from plugin: {other:?}"),
94 }
95 }
96
97 pub fn spawn(
98 plugin_path: &CStr,
99 argv: Vec<CString>,
100 envv: Vec<CString>,
101 strace_file: Option<&std::fs::File>,
102 log_file: &std::fs::File,
103 injected_preloads: &[PathBuf],
104 ) -> Result<Self, Errno> {
105 debug!(
106 "spawning new mthread '{plugin_path:?}' with environment '{envv:?}', arguments '{argv:?}'"
107 );
108
109 let envv = inject_preloads(envv, injected_preloads);
110
111 debug!("env after preload injection: {envv:?}");
112
113 let ipc_shmem = Arc::new(shadow_shmem::allocator::shmalloc(IPCData::new()));
114
115 let child_pid =
116 Self::spawn_native(plugin_path, argv, envv, strace_file, log_file, &ipc_shmem)?;
117
118 let native_pid = child_pid;
120 let native_tid = child_pid;
121
122 {
124 let worker = WORKER_SHARED.borrow();
125 let watcher = worker.as_ref().unwrap().child_pid_watcher();
126
127 watcher.register_pid(child_pid);
128 let ipc = ipc_shmem.clone();
129 watcher.register_callback(child_pid, move |_pid| {
130 ipc.from_plugin().close_writer();
131 })
132 };
133
134 trace!("waiting for start event from shim with native pid {native_pid:?}");
135 let start_req = ipc_shmem.from_plugin().receive().unwrap();
136 match &start_req {
137 ShimEventToShadow::StartReq(_) => {
138 }
140 ShimEventToShadow::ProcessDeath => {
141 let status =
145 rustix::process::waitpid(Some(native_pid.into()), WaitOptions::empty())
146 .unwrap()
147 .unwrap();
148 if status.exit_status() == Some(127) {
149 debug!("posix_spawn failed to exec the process");
155 return Err(Errno::EPERM);
160 }
161 panic!("Child process died unexpectedly before initialization: {status:?}");
172 }
173 other => panic!("Unexpected result from shim: {other:?}"),
174 };
175
176 Ok(Self {
177 ipc_shmem,
178 is_running: Cell::new(true),
179 return_code: Cell::new(None),
180 current_event: RefCell::new(start_req),
181 native_pid,
182 native_tid,
183 affinity: Cell::new(cshadow::AFFINITY_UNINIT),
184 })
185 }
186
187 pub fn resume(
188 &self,
189 ctx: &ThreadContext,
190 syscall_handler: &mut SyscallHandler,
191 ) -> ResumeResult {
192 debug_assert!(self.is_running());
193
194 self.sync_affinity_with_worker();
195
196 ctx.process.free_unsafe_borrows_flush().unwrap();
199
200 loop {
201 let mut current_event = self.current_event.borrow_mut();
202 let last_event = *current_event;
203 *current_event = match last_event {
204 ShimEventToShadow::StartReq(start_req) => {
205 ctx.process
208 .memory_borrow_mut()
209 .write(
210 start_req.thread_shmem_block_to_init,
211 &ctx.thread.shmem().serialize(),
212 )
213 .unwrap();
214
215 if !start_req.process_shmem_block_to_init.is_null() {
216 ctx.process
219 .memory_borrow_mut()
220 .write(
221 start_req.process_shmem_block_to_init,
222 &ctx.process.shmem().serialize(),
223 )
224 .unwrap();
225 }
226
227 if !start_req.initial_working_dir_to_init.is_null() {
228 let mut mem = ctx.process.memory_borrow_mut();
230 let mut writer = mem.writer(ForeignArrayPtr::new(
231 start_req.initial_working_dir_to_init,
232 start_req.initial_working_dir_to_init_len,
233 ));
234 writer
235 .write_all(ctx.process.current_working_dir().to_bytes_with_nul())
236 .unwrap();
237 writer.flush().unwrap();
238 }
239
240 trace!("sending start event code to shim");
242 self.continue_plugin(
243 ctx.host,
244 &ShimEventToShim::StartRes(ShimEventStartRes {
245 auxvec_random: ctx.host.random_mut().random(),
246 }),
247 )
248 }
249 ShimEventToShadow::ProcessDeath => {
250 self.cleanup_after_exit_initiated();
253 return ResumeResult::ExitedProcess;
254 }
255 ShimEventToShadow::Syscall(syscall) => {
256 if syscall.syscall_args.number == libc::SYS_exit {
266 let return_code = syscall.syscall_args.args[0].into();
267 debug!("Short-circuiting syscall exit({return_code})");
268 self.return_code.set(Some(return_code));
269 self.ipc_shmem
275 .to_plugin()
276 .send(ShimEventToShim::SyscallDoNative);
277 self.cleanup_after_exit_initiated();
278 return ResumeResult::ExitedThread(return_code);
279 }
280
281 let scr = syscall_handler.syscall(ctx, &syscall.syscall_args).into();
282
283 ctx.thread.cleanup_syscall_condition();
285
286 assert!(self.is_running());
287
288 ctx.process.free_unsafe_borrows_flush().unwrap();
291
292 match scr {
293 SyscallReturn::Block(b) => {
294 return ResumeResult::Blocked(unsafe {
295 SyscallCondition::consume_from_c(b.cond)
296 });
297 }
298 SyscallReturn::Done(d) => self.continue_plugin(
299 ctx.host,
300 &ShimEventToShim::SyscallComplete(ShimEventSyscallComplete {
301 retval: d.retval,
302 restartable: d.restartable,
303 }),
304 ),
305 SyscallReturn::Native => {
306 self.continue_plugin(ctx.host, &ShimEventToShim::SyscallDoNative)
307 }
308 }
309 }
310 ShimEventToShadow::AddThreadRes(res) => {
311 assert_eq!(res.clone_res, 0);
315
316 self.continue_plugin(
318 ctx.host,
319 &ShimEventToShim::SyscallComplete(ShimEventSyscallComplete {
320 retval: 0.into(),
321 restartable: false,
322 }),
323 )
324 }
325 e @ ShimEventToShadow::SyscallComplete(_) => panic!("Unexpected event: {e:?}"),
326 };
327 assert!(self.is_running());
328 }
329 }
330
331 pub fn handle_process_exit(&self) {
332 WORKER_SHARED
334 .borrow()
335 .as_ref()
336 .unwrap()
337 .child_pid_watcher()
338 .unregister_pid(self.native_pid());
339
340 self.cleanup_after_exit_initiated();
341 }
342
343 pub fn return_code(&self) -> Option<i32> {
344 self.return_code.get()
345 }
346
347 pub fn is_running(&self) -> bool {
348 self.is_running.get()
349 }
350
351 pub fn native_clone(
357 &self,
358 ctx: &ThreadContext,
359 flags: CloneFlags,
360 child_stack: ForeignPtr<()>,
361 ptid: ForeignPtr<libc::pid_t>,
362 ctid: ForeignPtr<libc::pid_t>,
363 newtls: libc::c_ulong,
364 ) -> Result<ManagedThread, linux_api::errno::Errno> {
365 let child_ipc_shmem = Arc::new(shadow_shmem::allocator::shmalloc(IPCData::new()));
366
367 let clone_res: i64 = match self.continue_plugin(
369 ctx.host,
370 &ShimEventToShim::AddThreadReq(ShimEventAddThreadReq {
371 ipc_block: child_ipc_shmem.serialize(),
372 flags: flags.bits(),
373 child_stack,
374 ptid: ptid.cast::<()>(),
375 ctid: ctid.cast::<()>(),
376 newtls,
377 }),
378 ) {
379 ShimEventToShadow::AddThreadRes(ShimEventAddThreadRes { clone_res }) => clone_res,
380 r => panic!("Unexpected result: {r:?}"),
381 };
382 let clone_res: SyscallReg = syscall::raw_return_value_to_result(clone_res)?;
383 let child_native_tid = Pid::from_raw(libc::pid_t::from(clone_res)).unwrap();
384 trace!("native clone treated tid {child_native_tid:?}");
385
386 trace!("waiting for start event from shim with native tid {child_native_tid:?}");
387 let start_req = child_ipc_shmem.from_plugin().receive().unwrap();
388 match &start_req {
389 ShimEventToShadow::StartReq(_) => (),
390 other => panic!("Unexpected result from shim: {other:?}"),
391 };
392
393 let native_pid = if flags.contains(CloneFlags::CLONE_THREAD) {
394 self.native_pid
395 } else {
396 child_native_tid
397 };
398
399 if !flags.contains(CloneFlags::CLONE_THREAD) {
400 WORKER_SHARED
402 .borrow()
403 .as_ref()
404 .unwrap()
405 .child_pid_watcher()
406 .register_pid(native_pid);
407 }
408
409 {
411 let child_ipc_shmem = child_ipc_shmem.clone();
412 WORKER_SHARED
413 .borrow()
414 .as_ref()
415 .unwrap()
416 .child_pid_watcher()
417 .register_callback(native_pid, move |_pid| {
418 child_ipc_shmem.from_plugin().close_writer();
419 })
420 };
421
422 Ok(Self {
423 ipc_shmem: child_ipc_shmem,
424 is_running: Cell::new(true),
425 return_code: Cell::new(None),
426 current_event: RefCell::new(start_req),
427 native_pid,
428 native_tid: child_native_tid,
429 affinity: Cell::new(cshadow::AFFINITY_UNINIT),
431 })
432 }
433
434 #[must_use]
435 fn continue_plugin(&self, host: &Host, event: &ShimEventToShim) -> ShimEventToShadow {
436 host.shim_shmem_lock_borrow_mut().unwrap().max_runahead_time =
438 Worker::max_event_runahead_time(host);
439 host.shim_shmem()
440 .sim_time
441 .store(Worker::current_time().unwrap(), atomic::Ordering::Relaxed);
442
443 host.unlock_shmem();
445
446 self.ipc_shmem.to_plugin().send(*event);
447
448 let event = match self.ipc_shmem.from_plugin().receive() {
449 Ok(e) => e,
450 Err(SelfContainedChannelError::WriterIsClosed) => ShimEventToShadow::ProcessDeath,
451 };
452
453 host.lock_shmem();
456
457 let shim_time = host.shim_shmem().sim_time.load(atomic::Ordering::Relaxed);
459 if log_enabled!(Level::Trace) {
460 let worker_time = Worker::current_time().unwrap();
461 if shim_time != worker_time {
462 trace!(
463 "Updating time from {worker_time:?} to {shim_time:?} (+{:?})",
464 shim_time - worker_time
465 );
466 }
467 }
468 Worker::set_current_time(shim_time);
469
470 event
471 }
472
473 fn cleanup_after_exit_initiated(&self) {
476 if !self.is_running.get() {
477 return;
478 }
479 self.wait_for_native_exit();
480 trace!("child {:?} exited", self.native_tid());
481 self.is_running.set(false);
482 }
483
484 fn wait_for_native_exit(&self) {
486 let native_pid = self.native_pid();
487 let native_tid = self.native_tid();
488
489 trace!("Waiting for native thread {native_pid:?}.{native_tid:?} to exit");
496 loop {
497 if self.ipc_shmem.from_plugin().writer_is_closed() {
498 break;
501 }
502 match tgkill(native_pid, native_tid, None) {
503 Err(Errno::ESRCH) => {
504 trace!("Thread is done exiting; proceeding with cleanup");
505 break;
506 }
507 Err(e) => {
508 error!("Unexpected tgkill error: {e:?}");
509 break;
510 }
511 Ok(()) if native_pid == native_tid => {
512 let filename = format!("/proc/{}/stat", native_pid.as_raw_nonzero().get());
515 let stat = match std::fs::read_to_string(filename) {
516 Err(e) => {
517 assert!(e.kind() == std::io::ErrorKind::NotFound);
518 trace!("tgl {native_pid:?} is fully dead");
519 break;
520 }
521 Ok(s) => s,
522 };
523 if stat.contains(") Z") {
524 trace!("tgl {native_pid:?} is a zombie");
525 break;
526 }
527 }
529 Ok(()) => {
530 }
532 };
533 std::thread::yield_now();
534 }
535 }
536
537 fn sync_affinity_with_worker(&self) {
538 let current_affinity = scheduler::core_affinity()
539 .map(|x| i32::try_from(x).unwrap())
540 .unwrap_or(cshadow::AFFINITY_UNINIT);
541 self.affinity.set(unsafe {
542 cshadow::affinity_setProcessAffinity(
543 self.native_tid().as_raw_nonzero().get(),
544 current_affinity,
545 self.affinity.get(),
546 )
547 });
548 }
549
550 fn spawn_native(
551 plugin_path: &CStr,
552 argv: Vec<CString>,
553 envv: Vec<CString>,
554 strace_file: Option<&std::fs::File>,
555 shimlog_file: &std::fs::File,
556 shmem_block: &ShMemBlock<IPCData>,
557 ) -> Result<Pid, Errno> {
558 fn map_verify_err(e: VerifyPluginPathError) -> Errno {
566 match e {
567 VerifyPluginPathError::NotFound => Errno::ENOENT,
569 VerifyPluginPathError::NotFile => Errno::EACCES,
571 VerifyPluginPathError::NotExecutable => Errno::EACCES,
573 VerifyPluginPathError::UnknownFileType => Errno::ENOEXEC,
577 VerifyPluginPathError::NotDynamicallyLinkedElf => Errno::ENOEXEC,
578 VerifyPluginPathError::IncompatibleInterpreter(e) => map_verify_err(*e),
579 VerifyPluginPathError::PathPermissionDenied => Errno::EACCES,
583 VerifyPluginPathError::UnhandledIoError(_) => {
584 Errno::ENOEXEC
586 }
587 }
588 }
589 verify_plugin_path(std::ffi::OsStr::from_bytes(plugin_path.to_bytes()))
590 .map_err(map_verify_err)?;
591
592 let argv_ptrs: Vec<*mut i8> = argv
597 .into_iter()
598 .map(CString::into_raw)
599 .chain(std::iter::once(std::ptr::null_mut()))
601 .collect();
602 let envv_ptrs: Vec<*mut i8> = envv
603 .into_iter()
604 .map(CString::into_raw)
605 .chain(std::iter::once(std::ptr::null_mut()))
607 .collect();
608
609 let mut file_actions: libc::posix_spawn_file_actions_t = shadow_pod::zeroed();
610 Errno::result_from_libc_errnum(unsafe {
611 libc::posix_spawn_file_actions_init(&mut file_actions)
612 })
613 .unwrap();
614
615 let (stdin_reader, stdin_writer) = rustix::pipe::pipe_with(PipeFlags::CLOEXEC).unwrap();
617 Errno::result_from_libc_errnum(unsafe {
618 libc::posix_spawn_file_actions_adddup2(
619 &mut file_actions,
620 stdin_reader.as_raw_fd(),
621 libc::STDIN_FILENO,
622 )
623 })
624 .unwrap();
625
626 if let Some(strace_file) = strace_file {
644 Errno::result_from_libc_errnum(unsafe {
645 libc::posix_spawn_file_actions_adddup2(
646 &mut file_actions,
647 strace_file.as_raw_fd(),
648 libc::STDOUT_FILENO,
649 )
650 })
651 .unwrap();
652 Errno::result_from_libc_errnum(unsafe {
653 libc::posix_spawn_file_actions_adddup2(
654 &mut file_actions,
655 libc::STDOUT_FILENO,
656 strace_file.as_raw_fd(),
657 )
658 })
659 .unwrap();
660 }
661
662 Errno::result_from_libc_errnum(unsafe {
664 libc::posix_spawn_file_actions_adddup2(
665 &mut file_actions,
666 shimlog_file.as_raw_fd(),
667 libc::STDOUT_FILENO,
668 )
669 })
670 .unwrap();
671 Errno::result_from_libc_errnum(unsafe {
672 libc::posix_spawn_file_actions_adddup2(
673 &mut file_actions,
674 shimlog_file.as_raw_fd(),
675 libc::STDERR_FILENO,
676 )
677 })
678 .unwrap();
679
680 let mut spawn_attr: libc::posix_spawnattr_t = shadow_pod::zeroed();
681 Errno::result_from_libc_errnum(unsafe { libc::posix_spawnattr_init(&mut spawn_attr) })
682 .unwrap();
683
684 Errno::result_from_libc_errnum(unsafe {
687 libc::posix_spawnattr_setflags(
688 &mut spawn_attr,
689 libc::POSIX_SPAWN_USEVFORK.try_into().unwrap(),
690 )
691 })
692 .unwrap();
693
694 let child_pid_res = {
695 let mut child_pid = -1;
696 Errno::result_from_libc_errnum(unsafe {
697 libc::posix_spawn(
698 &mut child_pid,
699 plugin_path.as_ptr(),
700 &file_actions,
701 &spawn_attr,
702 argv_ptrs.as_ptr(),
703 envv_ptrs.as_ptr(),
704 )
705 })
706 .map(|_| Pid::from_raw(child_pid).unwrap_or_else(|| panic!("Invalid pid: {child_pid}")))
707 };
708
709 if child_pid_res.is_ok() {
713 let serialized = shmem_block.serialize();
717 let serialized_bytes = shadow_pod::as_u8_slice(&serialized);
718 let written = Errno::result_from_libc_errno(-1, unsafe {
719 libc::write(
720 stdin_writer.as_raw_fd(),
721 serialized_bytes.as_ptr().cast(),
722 serialized_bytes.len(),
723 )
724 })
725 .unwrap();
726 assert_eq!(written, isize::try_from(serialized_bytes.len()).unwrap());
728 }
729
730 Errno::result_from_libc_errnum(unsafe {
731 libc::posix_spawn_file_actions_destroy(&mut file_actions)
732 })
733 .unwrap();
734 Errno::result_from_libc_errnum(unsafe { libc::posix_spawnattr_destroy(&mut spawn_attr) })
735 .unwrap();
736
737 drop(
739 argv_ptrs
740 .into_iter()
741 .filter(|p| !p.is_null())
742 .map(|p| unsafe { CString::from_raw(p) }),
743 );
744 drop(
745 envv_ptrs
746 .into_iter()
747 .filter(|p| !p.is_null())
748 .map(|p| unsafe { CString::from_raw(p) }),
749 );
750
751 debug!(
752 "starting process {}, result: {child_pid_res:?}",
753 plugin_path.to_str().unwrap()
754 );
755
756 child_pid_res
757 }
758
759 pub fn kill_and_drop(self) {
766 if let Err(err) =
767 rustix::process::kill_process(self.native_pid().into(), rustix::process::Signal::Kill)
768 {
769 log::warn!(
770 "Couldn't kill managed process {:?}. kill: {:?}",
771 self.native_pid(),
772 err
773 );
774 }
775 self.handle_process_exit();
776 }
777}
778
779impl Drop for ManagedThread {
780 fn drop(&mut self) {
781 assert!(!self.is_running());
787 }
788}