shadow_shim/
syscall.rs

1use core::fmt::Write;
2use core::sync::atomic;
3
4use formatting_nostd::{BorrowedFdWriter, FormatBuffer};
5use linux_api::errno::Errno;
6use linux_api::ucontext::ucontext;
7use linux_syscall::{Result64, syscall};
8use rustix::fd::BorrowedFd;
9use shadow_shim_helper_rs::emulated_time::EmulatedTime;
10use shadow_shim_helper_rs::option::FfiOption;
11use shadow_shim_helper_rs::shim_event::{
12    ShimEventAddThreadRes, ShimEventSyscall, ShimEventSyscallComplete, ShimEventToShadow,
13    ShimEventToShim,
14};
15use shadow_shim_helper_rs::syscall_types::{SyscallArgs, SyscallReg};
16use shadow_shim_helper_rs::util::time::TimeParts;
17
18use crate::{bindings, global_host_shmem, tls_ipc, tls_thread_shmem};
19
20/// # Safety
21///
22/// The specified syscall must be safe to make.
23unsafe fn native_syscall(args: &SyscallArgs) -> SyscallReg {
24    if args.number == libc::SYS_clone {
25        panic!("Shouldn't get here. Should have gone through ShimEventAddThreadReq");
26    } else if args.number == libc::SYS_exit {
27        let exit_status = i32::from(args.args[0]);
28        // This thread is exiting. Arrange for its thread-local-storage and
29        // signal stack to be freed.
30        unsafe { bindings::shim_freeSignalStack() };
31        // SAFETY: We don't try to recover from panics.
32        // TODO: make shim fully no_std and install a panic handler that aborts.
33        // https://doc.rust-lang.org/nomicon/panic-handler.html
34        unsafe { crate::release_and_exit_current_thread(exit_status) };
35    } else {
36        let Ok(num) = u32::try_from(args.number) else {
37            return Errno::ENOSYS.to_negated_i32().into();
38        };
39        let rv: i64 = match unsafe {
40            syscall!(
41                num,
42                u64::from(args.args[0]),
43                u64::from(args.args[1]),
44                u64::from(args.args[2]),
45                u64::from(args.args[3]),
46                u64::from(args.args[4]),
47                u64::from(args.args[5])
48            )
49        }
50        .try_i64()
51        {
52            Ok(x) => x,
53            Err(e) => -i64::from(e.get()),
54        };
55        rv.into()
56    }
57}
58
59/// # Safety
60///
61/// `ctx` must be valid if provided.
62pub(crate) unsafe fn emulated_syscall_event(
63    mut ctx: Option<&mut ucontext>,
64    syscall_event: &ShimEventSyscall,
65) -> SyscallReg {
66    log::trace!(
67        "sending syscall {} event",
68        syscall_event.syscall_args.number
69    );
70
71    crate::tls_ipc::with(|ipc| {
72        ipc.to_shadow()
73            .send(ShimEventToShadow::Syscall(*syscall_event))
74    });
75
76    loop {
77        log::trace!("waiting for event");
78        let res = crate::tls_ipc::with(|ipc| ipc.from_shadow().receive().unwrap());
79        log::trace!("got response {res:?}");
80        match res {
81            ShimEventToShim::SyscallComplete(syscall_complete) => {
82                // Shadow has returned a result for the emulated syscall
83
84                if crate::global_host_shmem::try_get().is_none() {
85                    // We should only get here during early initialization. We don't have what
86                    // we need to process signals yet, so just return the result.
87                    return syscall_complete.retval;
88                }
89
90                if let Some(ctx) = ctx.as_mut() {
91                    // Set the syscall return value now, before potentially
92                    // invoking signal handlers. This appears to be the behavior
93                    // in the kernel; i.e. a handler for a signal that
94                    // is interrupted a blocking syscall should see the syscall
95                    // result (-EINTR) in the context passed to that handler.
96                    ctx.uc_mcontext.rax = syscall_complete.retval.into();
97                }
98
99                // SAFETY: `ctx` should be valid if present.
100                let all_sigactions_had_sa_restart =
101                    unsafe { crate::signals::process_signals(ctx.as_deref_mut()) };
102
103                if i64::from(syscall_complete.retval) == Errno::EINTR.to_negated_i64()
104                    && all_sigactions_had_sa_restart
105                    && syscall_complete.restartable
106                {
107                    // Restart syscall interrupted syscall
108                    crate::tls_ipc::with(|ipc| {
109                        ipc.to_shadow()
110                            .send(ShimEventToShadow::Syscall(*syscall_event))
111                    });
112                    continue;
113                } else {
114                    // Return syscall result
115                    return syscall_complete.retval;
116                }
117            }
118            ShimEventToShim::SyscallDoNative => {
119                // "Emulate" the syscall by executing it natively.
120
121                let rv = unsafe { native_syscall(&syscall_event.syscall_args) };
122
123                if let FfiOption::Some(strace_fd) =
124                    crate::tls_process_shmem::with(|process| process.strace_fd)
125                {
126                    let emulated_time = global_host_shmem::get()
127                        .sim_time
128                        .load(atomic::Ordering::Relaxed)
129                        - EmulatedTime::SIMULATION_START;
130                    let tid = tls_thread_shmem::with(|thread| thread.tid);
131                    let parts = TimeParts::from_nanos(emulated_time.as_nanos());
132                    let mut buffer = FormatBuffer::<200>::new();
133                    writeln!(
134                        &mut buffer,
135                        "{} [tid {}] ^^^ = {:?}",
136                        parts.fmt_hr_min_sec_nano(),
137                        tid,
138                        rv
139                    )
140                    .unwrap();
141                    // SAFETY: file descriptor should be valid and open.
142                    let strace_fd = unsafe { BorrowedFd::borrow_raw(strace_fd) };
143                    let mut strace_file_writer = BorrowedFdWriter::new(strace_fd);
144                    if let Err(e) = strace_file_writer.write_str(buffer.as_str()) {
145                        log::warn!("Couldn't write to strace_fd:{strace_fd:?}: {e:?}");
146                    }
147                }
148
149                return rv;
150            }
151            ShimEventToShim::Syscall(syscall) => {
152                // Execute the syscall and return the result to Shadow.
153
154                let res = unsafe { native_syscall(&syscall.syscall_args) };
155                tls_ipc::with(|ipc| {
156                    ipc.to_shadow().send(ShimEventToShadow::SyscallComplete(
157                        ShimEventSyscallComplete {
158                            retval: res,
159                            restartable: false,
160                        },
161                    ))
162                });
163            }
164            ShimEventToShim::AddThreadReq(r) => {
165                // Create a new native thread under our control
166
167                let clone_res = unsafe { crate::clone::do_clone(ctx.as_mut().unwrap(), &r) };
168                tls_ipc::with(|ipc| {
169                    ipc.to_shadow()
170                        .send(ShimEventToShadow::AddThreadRes(ShimEventAddThreadRes {
171                            clone_res,
172                        }))
173                })
174            }
175            e @ ShimEventToShim::StartRes(_) => {
176                panic!("Unexpected event: {e:?}");
177            }
178        }
179    }
180}
181
182pub mod export {
183    use crate::ExecutionContext;
184
185    use super::*;
186
187    /// # Safety
188    ///
189    /// `ctx` must be valid if provided.
190    #[unsafe(no_mangle)]
191    pub unsafe extern "C-unwind" fn shim_emulated_syscallv(
192        ctx: *mut libc::ucontext_t,
193        n: core::ffi::c_long,
194        mut args: va_list::VaList,
195    ) -> core::ffi::c_long {
196        let _prev = ExecutionContext::Shadow.enter();
197
198        let syscall_args = SyscallArgs {
199            number: n,
200            args: core::array::from_fn(|_| {
201                // SAFETY: syscall args all "fit" in an i64. Reading more arguments
202                // than actually provided is sound because any bit pattern is a
203                // valid i64.
204                let arg = unsafe { args.get::<i64>() };
205                SyscallReg::from(arg)
206            }),
207        };
208
209        let event = ShimEventSyscall { syscall_args };
210
211        let ctx = ctx.cast::<ucontext>();
212        let ctx = unsafe { ctx.as_mut() };
213        let retval = unsafe { emulated_syscall_event(ctx, &event) };
214
215        retval.into()
216    }
217
218    /// # Safety
219    ///
220    /// The specified syscall must be safe to make.
221    #[unsafe(no_mangle)]
222    pub unsafe extern "C-unwind" fn shim_native_syscallv(
223        n: core::ffi::c_long,
224        mut args: va_list::VaList,
225    ) -> core::ffi::c_long {
226        let syscall_args = SyscallArgs {
227            number: n,
228            args: core::array::from_fn(|_| {
229                // SAFETY: syscall args all "fit" in an i64. Reading more arguments
230                // than actually provided is sound because any bit pattern is a
231                // valid i64.
232                let arg = unsafe { args.get::<i64>() };
233                SyscallReg::from(arg)
234            }),
235        };
236        // SAFETY: Ensured by caller.
237        let rv = unsafe { native_syscall(&syscall_args) };
238        rv.into()
239    }
240}