1use std::cell::{Cell, RefCell};
6use std::ffi::{CStr, CString};
7use std::io::Write;
8use std::os::fd::AsRawFd;
9use std::os::unix::prelude::OsStrExt;
10use std::path::PathBuf;
11use std::sync::{Arc, atomic};
12
13use linux_api::errno::Errno;
14use linux_api::posix_types::Pid;
15use linux_api::sched::CloneFlags;
16use linux_api::signal::tgkill;
17use log::{Level, debug, error, log_enabled, trace};
18use rand::Rng as _;
19use rustix::pipe::PipeFlags;
20use rustix::process::WaitOptions;
21use shadow_shim_helper_rs::ipc::IPCData;
22use shadow_shim_helper_rs::shim_event::{
23 ShimEventAddThreadReq, ShimEventAddThreadRes, ShimEventStartRes, ShimEventSyscall,
24 ShimEventSyscallComplete, ShimEventToShadow, ShimEventToShim,
25};
26use shadow_shim_helper_rs::syscall_types::{ForeignPtr, SyscallArgs, SyscallReg};
27use shadow_shmem::allocator::ShMemBlock;
28use vasi_sync::scchannel::SelfContainedChannelError;
29
30use super::context::ThreadContext;
31use super::host::Host;
32use super::syscall::condition::SyscallCondition;
33use crate::core::worker::{WORKER_SHARED, Worker};
34use crate::cshadow;
35use crate::host::syscall::handler::SyscallHandler;
36use crate::host::syscall::types::{ForeignArrayPtr, SyscallReturn};
37use crate::utility::{VerifyPluginPathError, inject_preloads, syscall, verify_plugin_path};
38
39#[derive(Debug)]
41#[must_use]
42pub enum ResumeResult {
43 Blocked(SyscallCondition),
45 ExitedThread(i32),
47 ExitedProcess,
49}
50
51pub struct ManagedThread {
52 ipc_shmem: Arc<ShMemBlock<'static, IPCData>>,
53 is_running: Cell<bool>,
54 return_code: Cell<Option<i32>>,
55
56 current_event: RefCell<ShimEventToShadow>,
58
59 native_pid: linux_api::posix_types::Pid,
60 native_tid: linux_api::posix_types::Pid,
61
62 affinity: Cell<i32>,
67}
68
69impl ManagedThread {
70 pub fn native_pid(&self) -> linux_api::posix_types::Pid {
71 self.native_pid
72 }
73
74 pub fn native_tid(&self) -> linux_api::posix_types::Pid {
75 self.native_tid
76 }
77
78 pub fn native_syscall(&self, ctx: &ThreadContext, n: i64, args: &[SyscallReg]) -> SyscallReg {
83 let mut syscall_args = SyscallArgs {
84 number: n,
85 args: [SyscallReg::from(0u64); 6],
86 };
87 syscall_args.args[..args.len()].copy_from_slice(args);
88 match self.continue_plugin(
89 ctx.host,
90 &ShimEventToShim::Syscall(ShimEventSyscall { syscall_args }),
91 ) {
92 ShimEventToShadow::SyscallComplete(res) => res.retval,
93 other => panic!("Unexpected response from plugin: {other:?}"),
94 }
95 }
96
97 pub fn spawn(
98 plugin_path: &CStr,
99 argv: Vec<CString>,
100 envv: Vec<CString>,
101 strace_file: Option<&std::fs::File>,
102 log_file: &std::fs::File,
103 injected_preloads: &[PathBuf],
104 ) -> Result<Self, Errno> {
105 debug!(
106 "spawning new mthread '{plugin_path:?}' with environment '{envv:?}', arguments '{argv:?}'"
107 );
108
109 let envv = inject_preloads(envv, injected_preloads);
110
111 debug!("env after preload injection: {envv:?}");
112
113 let ipc_shmem = Arc::new(shadow_shmem::allocator::shmalloc(IPCData::new()));
114
115 let child_pid =
116 Self::spawn_native(plugin_path, argv, envv, strace_file, log_file, &ipc_shmem)?;
117
118 let native_pid = child_pid;
120 let native_tid = child_pid;
121
122 {
124 let worker = WORKER_SHARED.borrow();
125 let watcher = worker.as_ref().unwrap().child_pid_watcher();
126
127 watcher.register_pid(child_pid);
128 let ipc = ipc_shmem.clone();
129 watcher.register_callback(child_pid, move |_pid| {
130 ipc.from_plugin().close_writer();
131 })
132 };
133
134 trace!(
135 "waiting for start event from shim with native pid {:?}",
136 native_pid
137 );
138 let start_req = ipc_shmem.from_plugin().receive().unwrap();
139 match &start_req {
140 ShimEventToShadow::StartReq(_) => {
141 }
143 ShimEventToShadow::ProcessDeath => {
144 let status =
148 rustix::process::waitpid(Some(native_pid.into()), WaitOptions::empty())
149 .unwrap()
150 .unwrap();
151 if status.exit_status() == Some(127) {
152 debug!("posix_spawn failed to exec the process");
158 return Err(Errno::EPERM);
163 }
164 panic!("Child process died unexpectedly before initialization: {status:?}");
175 }
176 other => panic!("Unexpected result from shim: {other:?}"),
177 };
178
179 Ok(Self {
180 ipc_shmem,
181 is_running: Cell::new(true),
182 return_code: Cell::new(None),
183 current_event: RefCell::new(start_req),
184 native_pid,
185 native_tid,
186 affinity: Cell::new(cshadow::AFFINITY_UNINIT),
187 })
188 }
189
190 pub fn resume(
191 &self,
192 ctx: &ThreadContext,
193 syscall_handler: &mut SyscallHandler,
194 ) -> ResumeResult {
195 debug_assert!(self.is_running());
196
197 self.sync_affinity_with_worker();
198
199 ctx.process.free_unsafe_borrows_flush().unwrap();
202
203 loop {
204 let mut current_event = self.current_event.borrow_mut();
205 let last_event = *current_event;
206 *current_event = match last_event {
207 ShimEventToShadow::StartReq(start_req) => {
208 ctx.process
211 .memory_borrow_mut()
212 .write(
213 start_req.thread_shmem_block_to_init,
214 &ctx.thread.shmem().serialize(),
215 )
216 .unwrap();
217
218 if !start_req.process_shmem_block_to_init.is_null() {
219 ctx.process
222 .memory_borrow_mut()
223 .write(
224 start_req.process_shmem_block_to_init,
225 &ctx.process.shmem().serialize(),
226 )
227 .unwrap();
228 }
229
230 if !start_req.initial_working_dir_to_init.is_null() {
231 let mut mem = ctx.process.memory_borrow_mut();
233 let mut writer = mem.writer(ForeignArrayPtr::new(
234 start_req.initial_working_dir_to_init,
235 start_req.initial_working_dir_to_init_len,
236 ));
237 writer
238 .write_all(ctx.process.current_working_dir().to_bytes_with_nul())
239 .unwrap();
240 writer.flush().unwrap();
241 }
242
243 trace!("sending start event code to shim");
245 self.continue_plugin(
246 ctx.host,
247 &ShimEventToShim::StartRes(ShimEventStartRes {
248 auxvec_random: ctx.host.random_mut().random(),
249 }),
250 )
251 }
252 ShimEventToShadow::ProcessDeath => {
253 self.cleanup_after_exit_initiated();
256 return ResumeResult::ExitedProcess;
257 }
258 ShimEventToShadow::Syscall(syscall) => {
259 if syscall.syscall_args.number == libc::SYS_exit {
269 let return_code = syscall.syscall_args.args[0].into();
270 debug!("Short-circuiting syscall exit({return_code})");
271 self.return_code.set(Some(return_code));
272 self.ipc_shmem
278 .to_plugin()
279 .send(ShimEventToShim::SyscallDoNative);
280 self.cleanup_after_exit_initiated();
281 return ResumeResult::ExitedThread(return_code);
282 }
283
284 let scr = syscall_handler.syscall(ctx, &syscall.syscall_args).into();
285
286 ctx.thread.cleanup_syscall_condition();
288
289 assert!(self.is_running());
290
291 ctx.process.free_unsafe_borrows_flush().unwrap();
294
295 match scr {
296 SyscallReturn::Block(b) => {
297 return ResumeResult::Blocked(unsafe {
298 SyscallCondition::consume_from_c(b.cond)
299 });
300 }
301 SyscallReturn::Done(d) => self.continue_plugin(
302 ctx.host,
303 &ShimEventToShim::SyscallComplete(ShimEventSyscallComplete {
304 retval: d.retval,
305 restartable: d.restartable,
306 }),
307 ),
308 SyscallReturn::Native => {
309 self.continue_plugin(ctx.host, &ShimEventToShim::SyscallDoNative)
310 }
311 }
312 }
313 ShimEventToShadow::AddThreadRes(res) => {
314 assert_eq!(res.clone_res, 0);
318
319 self.continue_plugin(
321 ctx.host,
322 &ShimEventToShim::SyscallComplete(ShimEventSyscallComplete {
323 retval: 0.into(),
324 restartable: false,
325 }),
326 )
327 }
328 e @ ShimEventToShadow::SyscallComplete(_) => panic!("Unexpected event: {e:?}"),
329 };
330 assert!(self.is_running());
331 }
332 }
333
334 pub fn handle_process_exit(&self) {
335 WORKER_SHARED
337 .borrow()
338 .as_ref()
339 .unwrap()
340 .child_pid_watcher()
341 .unregister_pid(self.native_pid());
342
343 self.cleanup_after_exit_initiated();
344 }
345
346 pub fn return_code(&self) -> Option<i32> {
347 self.return_code.get()
348 }
349
350 pub fn is_running(&self) -> bool {
351 self.is_running.get()
352 }
353
354 pub fn native_clone(
360 &self,
361 ctx: &ThreadContext,
362 flags: CloneFlags,
363 child_stack: ForeignPtr<()>,
364 ptid: ForeignPtr<libc::pid_t>,
365 ctid: ForeignPtr<libc::pid_t>,
366 newtls: libc::c_ulong,
367 ) -> Result<ManagedThread, linux_api::errno::Errno> {
368 let child_ipc_shmem = Arc::new(shadow_shmem::allocator::shmalloc(IPCData::new()));
369
370 let clone_res: i64 = match self.continue_plugin(
372 ctx.host,
373 &ShimEventToShim::AddThreadReq(ShimEventAddThreadReq {
374 ipc_block: child_ipc_shmem.serialize(),
375 flags: flags.bits(),
376 child_stack,
377 ptid: ptid.cast::<()>(),
378 ctid: ctid.cast::<()>(),
379 newtls,
380 }),
381 ) {
382 ShimEventToShadow::AddThreadRes(ShimEventAddThreadRes { clone_res }) => clone_res,
383 r => panic!("Unexpected result: {r:?}"),
384 };
385 let clone_res: SyscallReg = syscall::raw_return_value_to_result(clone_res)?;
386 let child_native_tid = Pid::from_raw(libc::pid_t::from(clone_res)).unwrap();
387 trace!("native clone treated tid {child_native_tid:?}");
388
389 trace!(
390 "waiting for start event from shim with native tid {:?}",
391 child_native_tid
392 );
393 let start_req = child_ipc_shmem.from_plugin().receive().unwrap();
394 match &start_req {
395 ShimEventToShadow::StartReq(_) => (),
396 other => panic!("Unexpected result from shim: {other:?}"),
397 };
398
399 let native_pid = if flags.contains(CloneFlags::CLONE_THREAD) {
400 self.native_pid
401 } else {
402 child_native_tid
403 };
404
405 if !flags.contains(CloneFlags::CLONE_THREAD) {
406 WORKER_SHARED
408 .borrow()
409 .as_ref()
410 .unwrap()
411 .child_pid_watcher()
412 .register_pid(native_pid);
413 }
414
415 {
417 let child_ipc_shmem = child_ipc_shmem.clone();
418 WORKER_SHARED
419 .borrow()
420 .as_ref()
421 .unwrap()
422 .child_pid_watcher()
423 .register_callback(native_pid, move |_pid| {
424 child_ipc_shmem.from_plugin().close_writer();
425 })
426 };
427
428 Ok(Self {
429 ipc_shmem: child_ipc_shmem,
430 is_running: Cell::new(true),
431 return_code: Cell::new(None),
432 current_event: RefCell::new(start_req),
433 native_pid,
434 native_tid: child_native_tid,
435 affinity: Cell::new(cshadow::AFFINITY_UNINIT),
437 })
438 }
439
440 #[must_use]
441 fn continue_plugin(&self, host: &Host, event: &ShimEventToShim) -> ShimEventToShadow {
442 host.shim_shmem_lock_borrow_mut().unwrap().max_runahead_time =
444 Worker::max_event_runahead_time(host);
445 host.shim_shmem()
446 .sim_time
447 .store(Worker::current_time().unwrap(), atomic::Ordering::Relaxed);
448
449 host.unlock_shmem();
451
452 self.ipc_shmem.to_plugin().send(*event);
453
454 let event = match self.ipc_shmem.from_plugin().receive() {
455 Ok(e) => e,
456 Err(SelfContainedChannelError::WriterIsClosed) => ShimEventToShadow::ProcessDeath,
457 };
458
459 host.lock_shmem();
462
463 let shim_time = host.shim_shmem().sim_time.load(atomic::Ordering::Relaxed);
465 if log_enabled!(Level::Trace) {
466 let worker_time = Worker::current_time().unwrap();
467 if shim_time != worker_time {
468 trace!(
469 "Updating time from {worker_time:?} to {shim_time:?} (+{:?})",
470 shim_time - worker_time
471 );
472 }
473 }
474 Worker::set_current_time(shim_time);
475
476 event
477 }
478
479 fn cleanup_after_exit_initiated(&self) {
482 if !self.is_running.get() {
483 return;
484 }
485 self.wait_for_native_exit();
486 trace!("child {:?} exited", self.native_tid());
487 self.is_running.set(false);
488 }
489
490 fn wait_for_native_exit(&self) {
492 let native_pid = self.native_pid();
493 let native_tid = self.native_tid();
494
495 trace!("Waiting for native thread {native_pid:?}.{native_tid:?} to exit");
502 loop {
503 if self.ipc_shmem.from_plugin().writer_is_closed() {
504 break;
507 }
508 match tgkill(native_pid, native_tid, None) {
509 Err(Errno::ESRCH) => {
510 trace!("Thread is done exiting; proceeding with cleanup");
511 break;
512 }
513 Err(e) => {
514 error!("Unexpected tgkill error: {:?}", e);
515 break;
516 }
517 Ok(()) if native_pid == native_tid => {
518 let filename = format!("/proc/{}/stat", native_pid.as_raw_nonzero().get());
521 let stat = match std::fs::read_to_string(filename) {
522 Err(e) => {
523 assert!(e.kind() == std::io::ErrorKind::NotFound);
524 trace!("tgl {native_pid:?} is fully dead");
525 break;
526 }
527 Ok(s) => s,
528 };
529 if stat.contains(") Z") {
530 trace!("tgl {native_pid:?} is a zombie");
531 break;
532 }
533 }
535 Ok(()) => {
536 }
538 };
539 std::thread::yield_now();
540 }
541 }
542
543 fn sync_affinity_with_worker(&self) {
544 let current_affinity = scheduler::core_affinity()
545 .map(|x| i32::try_from(x).unwrap())
546 .unwrap_or(cshadow::AFFINITY_UNINIT);
547 self.affinity.set(unsafe {
548 cshadow::affinity_setProcessAffinity(
549 self.native_tid().as_raw_nonzero().get(),
550 current_affinity,
551 self.affinity.get(),
552 )
553 });
554 }
555
556 fn spawn_native(
557 plugin_path: &CStr,
558 argv: Vec<CString>,
559 envv: Vec<CString>,
560 strace_file: Option<&std::fs::File>,
561 shimlog_file: &std::fs::File,
562 shmem_block: &ShMemBlock<IPCData>,
563 ) -> Result<Pid, Errno> {
564 fn map_verify_err(e: VerifyPluginPathError) -> Errno {
572 match e {
573 VerifyPluginPathError::NotFound => Errno::ENOENT,
575 VerifyPluginPathError::NotFile => Errno::EACCES,
577 VerifyPluginPathError::NotExecutable => Errno::EACCES,
579 VerifyPluginPathError::UnknownFileType => Errno::ENOEXEC,
583 VerifyPluginPathError::NotDynamicallyLinkedElf => Errno::ENOEXEC,
584 VerifyPluginPathError::IncompatibleInterpreter(e) => map_verify_err(*e),
585 VerifyPluginPathError::PathPermissionDenied => Errno::EACCES,
589 VerifyPluginPathError::UnhandledIoError(_) => {
590 Errno::ENOEXEC
592 }
593 }
594 }
595 verify_plugin_path(std::ffi::OsStr::from_bytes(plugin_path.to_bytes()))
596 .map_err(map_verify_err)?;
597
598 let argv_ptrs: Vec<*mut i8> = argv
603 .into_iter()
604 .map(CString::into_raw)
605 .chain(std::iter::once(std::ptr::null_mut()))
607 .collect();
608 let envv_ptrs: Vec<*mut i8> = envv
609 .into_iter()
610 .map(CString::into_raw)
611 .chain(std::iter::once(std::ptr::null_mut()))
613 .collect();
614
615 let mut file_actions: libc::posix_spawn_file_actions_t = shadow_pod::zeroed();
616 Errno::result_from_libc_errnum(unsafe {
617 libc::posix_spawn_file_actions_init(&mut file_actions)
618 })
619 .unwrap();
620
621 let (stdin_reader, stdin_writer) = rustix::pipe::pipe_with(PipeFlags::CLOEXEC).unwrap();
623 Errno::result_from_libc_errnum(unsafe {
624 libc::posix_spawn_file_actions_adddup2(
625 &mut file_actions,
626 stdin_reader.as_raw_fd(),
627 libc::STDIN_FILENO,
628 )
629 })
630 .unwrap();
631
632 if let Some(strace_file) = strace_file {
650 Errno::result_from_libc_errnum(unsafe {
651 libc::posix_spawn_file_actions_adddup2(
652 &mut file_actions,
653 strace_file.as_raw_fd(),
654 libc::STDOUT_FILENO,
655 )
656 })
657 .unwrap();
658 Errno::result_from_libc_errnum(unsafe {
659 libc::posix_spawn_file_actions_adddup2(
660 &mut file_actions,
661 libc::STDOUT_FILENO,
662 strace_file.as_raw_fd(),
663 )
664 })
665 .unwrap();
666 }
667
668 Errno::result_from_libc_errnum(unsafe {
670 libc::posix_spawn_file_actions_adddup2(
671 &mut file_actions,
672 shimlog_file.as_raw_fd(),
673 libc::STDOUT_FILENO,
674 )
675 })
676 .unwrap();
677 Errno::result_from_libc_errnum(unsafe {
678 libc::posix_spawn_file_actions_adddup2(
679 &mut file_actions,
680 shimlog_file.as_raw_fd(),
681 libc::STDERR_FILENO,
682 )
683 })
684 .unwrap();
685
686 let mut spawn_attr: libc::posix_spawnattr_t = shadow_pod::zeroed();
687 Errno::result_from_libc_errnum(unsafe { libc::posix_spawnattr_init(&mut spawn_attr) })
688 .unwrap();
689
690 Errno::result_from_libc_errnum(unsafe {
693 libc::posix_spawnattr_setflags(
694 &mut spawn_attr,
695 libc::POSIX_SPAWN_USEVFORK.try_into().unwrap(),
696 )
697 })
698 .unwrap();
699
700 let child_pid_res = {
701 let mut child_pid = -1;
702 Errno::result_from_libc_errnum(unsafe {
703 libc::posix_spawn(
704 &mut child_pid,
705 plugin_path.as_ptr(),
706 &file_actions,
707 &spawn_attr,
708 argv_ptrs.as_ptr(),
709 envv_ptrs.as_ptr(),
710 )
711 })
712 .map(|_| Pid::from_raw(child_pid).unwrap_or_else(|| panic!("Invalid pid: {child_pid}")))
713 };
714
715 if child_pid_res.is_ok() {
719 let serialized = shmem_block.serialize();
723 let serialized_bytes = shadow_pod::as_u8_slice(&serialized);
724 let written = Errno::result_from_libc_errno(-1, unsafe {
725 libc::write(
726 stdin_writer.as_raw_fd(),
727 serialized_bytes.as_ptr().cast(),
728 serialized_bytes.len(),
729 )
730 })
731 .unwrap();
732 assert_eq!(written, isize::try_from(serialized_bytes.len()).unwrap());
734 }
735
736 Errno::result_from_libc_errnum(unsafe {
737 libc::posix_spawn_file_actions_destroy(&mut file_actions)
738 })
739 .unwrap();
740 Errno::result_from_libc_errnum(unsafe { libc::posix_spawnattr_destroy(&mut spawn_attr) })
741 .unwrap();
742
743 drop(
745 argv_ptrs
746 .into_iter()
747 .filter(|p| !p.is_null())
748 .map(|p| unsafe { CString::from_raw(p) }),
749 );
750 drop(
751 envv_ptrs
752 .into_iter()
753 .filter(|p| !p.is_null())
754 .map(|p| unsafe { CString::from_raw(p) }),
755 );
756
757 debug!(
758 "starting process {}, result: {child_pid_res:?}",
759 plugin_path.to_str().unwrap()
760 );
761
762 child_pid_res
763 }
764
765 pub fn kill_and_drop(self) {
772 if let Err(err) =
773 rustix::process::kill_process(self.native_pid().into(), rustix::process::Signal::Kill)
774 {
775 log::warn!(
776 "Couldn't kill managed process {:?}. kill: {:?}",
777 self.native_pid(),
778 err
779 );
780 }
781 self.handle_process_exit();
782 }
783}
784
785impl Drop for ManagedThread {
786 fn drop(&mut self) {
787 assert!(!self.is_running());
793 }
794}