shadow_shim/
syscall.rs

1use core::fmt::Write;
2use core::sync::atomic;
3
4use formatting_nostd::{BorrowedFdWriter, FormatBuffer};
5use linux_api::errno::Errno;
6use linux_api::ucontext::ucontext;
7use rustix::fd::BorrowedFd;
8use shadow_shim_helper_rs::emulated_time::EmulatedTime;
9use shadow_shim_helper_rs::option::FfiOption;
10use shadow_shim_helper_rs::shim_event::{
11    ShimEventAddThreadRes, ShimEventSyscall, ShimEventSyscallComplete, ShimEventToShadow,
12    ShimEventToShim,
13};
14use shadow_shim_helper_rs::syscall_types::{SyscallArgs, SyscallReg};
15use shadow_shim_helper_rs::util::time::TimeParts;
16
17use crate::{bindings, global_host_shmem, tls_ipc, tls_thread_shmem};
18
19/// # Safety
20///
21/// The specified syscall must be safe to make.
22unsafe fn native_syscall(args: &SyscallArgs) -> SyscallReg {
23    if args.number == libc::SYS_clone {
24        panic!("Shouldn't get here. Should have gone through ShimEventAddThreadReq");
25    } else if args.number == libc::SYS_exit {
26        let exit_status = i32::from(args.args[0]);
27        // This thread is exiting. Arrange for its thread-local-storage and
28        // signal stack to be freed.
29        unsafe { bindings::shim_freeSignalStack() };
30        // SAFETY: We don't try to recover from panics.
31        // TODO: make shim fully no_std and install a panic handler that aborts.
32        // https://doc.rust-lang.org/nomicon/panic-handler.html
33        unsafe { crate::release_and_exit_current_thread(exit_status) };
34    } else {
35        let rv: i64;
36        // SAFETY: Caller is responsible for ensuring this syscall is safe to make.
37        unsafe {
38            core::arch::asm!(
39                "syscall",
40                inout("rax") args.number => rv,
41                in("rdi") u64::from(args.args[0]),
42                in("rsi") u64::from(args.args[1]),
43                in("rdx") u64::from(args.args[2]),
44                in("r10") u64::from(args.args[3]),
45                in("r8") u64::from(args.args[4]),
46                in("r9") u64::from(args.args[5]))
47        };
48        return rv.into();
49    }
50}
51
52/// # Safety
53///
54/// `ctx` must be valid if provided.
55pub(crate) unsafe fn emulated_syscall_event(
56    mut ctx: Option<&mut ucontext>,
57    syscall_event: &ShimEventSyscall,
58) -> SyscallReg {
59    log::trace!(
60        "sending syscall {} event",
61        syscall_event.syscall_args.number
62    );
63
64    crate::tls_ipc::with(|ipc| {
65        ipc.to_shadow()
66            .send(ShimEventToShadow::Syscall(*syscall_event))
67    });
68
69    loop {
70        log::trace!("waiting for event");
71        let res = crate::tls_ipc::with(|ipc| ipc.from_shadow().receive().unwrap());
72        log::trace!("got response {res:?}");
73        match res {
74            ShimEventToShim::SyscallComplete(syscall_complete) => {
75                // Shadow has returned a result for the emulated syscall
76
77                if crate::global_host_shmem::try_get().is_none() {
78                    // We should only get here during early initialization. We don't have what
79                    // we need to process signals yet, so just return the result.
80                    return syscall_complete.retval;
81                }
82
83                if let Some(ctx) = ctx.as_mut() {
84                    // Set the syscall return value now, before potentially
85                    // invoking signal handlers. This appears to be the behavior
86                    // in the kernel; i.e. a handler for a signal that
87                    // is interrupted a blocking syscall should see the syscall
88                    // result (-EINTR) in the context passed to that handler.
89                    ctx.uc_mcontext.rax = syscall_complete.retval.into();
90                }
91
92                // SAFETY: `ctx` should be valid if present.
93                let all_sigactions_had_sa_restart =
94                    unsafe { crate::signals::process_signals(ctx.as_deref_mut()) };
95
96                if i64::from(syscall_complete.retval) == Errno::EINTR.to_negated_i64()
97                    && all_sigactions_had_sa_restart
98                    && syscall_complete.restartable
99                {
100                    // Restart syscall interrupted syscall
101                    crate::tls_ipc::with(|ipc| {
102                        ipc.to_shadow()
103                            .send(ShimEventToShadow::Syscall(*syscall_event))
104                    });
105                    continue;
106                } else {
107                    // Return syscall result
108                    return syscall_complete.retval;
109                }
110            }
111            ShimEventToShim::SyscallDoNative => {
112                // "Emulate" the syscall by executing it natively.
113
114                let rv = unsafe { native_syscall(&syscall_event.syscall_args) };
115
116                if let FfiOption::Some(strace_fd) =
117                    crate::tls_process_shmem::with(|process| process.strace_fd)
118                {
119                    let emulated_time = global_host_shmem::get()
120                        .sim_time
121                        .load(atomic::Ordering::Relaxed)
122                        - EmulatedTime::SIMULATION_START;
123                    let tid = tls_thread_shmem::with(|thread| thread.tid);
124                    let parts = TimeParts::from_nanos(emulated_time.as_nanos());
125                    let mut buffer = FormatBuffer::<200>::new();
126                    writeln!(
127                        &mut buffer,
128                        "{} [tid {}] ^^^ = {:?}",
129                        parts.fmt_hr_min_sec_nano(),
130                        tid,
131                        rv
132                    )
133                    .unwrap();
134                    // SAFETY: file descriptor should be valid and open.
135                    let strace_fd = unsafe { BorrowedFd::borrow_raw(strace_fd) };
136                    let mut strace_file_writer = BorrowedFdWriter::new(strace_fd);
137                    if let Err(e) = strace_file_writer.write_str(buffer.as_str()) {
138                        log::warn!("Couldn't write to strace_fd:{strace_fd:?}: {e:?}");
139                    }
140                }
141
142                return rv;
143            }
144            ShimEventToShim::Syscall(syscall) => {
145                // Execute the syscall and return the result to Shadow.
146
147                let res = unsafe { native_syscall(&syscall.syscall_args) };
148                tls_ipc::with(|ipc| {
149                    ipc.to_shadow().send(ShimEventToShadow::SyscallComplete(
150                        ShimEventSyscallComplete {
151                            retval: res,
152                            restartable: false,
153                        },
154                    ))
155                });
156            }
157            ShimEventToShim::AddThreadReq(r) => {
158                // Create a new native thread under our control
159
160                let clone_res = unsafe { crate::clone::do_clone(ctx.as_mut().unwrap(), &r) };
161                tls_ipc::with(|ipc| {
162                    ipc.to_shadow()
163                        .send(ShimEventToShadow::AddThreadRes(ShimEventAddThreadRes {
164                            clone_res,
165                        }))
166                })
167            }
168            e @ ShimEventToShim::StartRes(_) => {
169                panic!("Unexpected event: {e:?}");
170            }
171        }
172    }
173}
174
175pub mod export {
176    use crate::ExecutionContext;
177
178    use super::*;
179
180    /// # Safety
181    ///
182    /// `ctx` must be valid if provided.
183    #[unsafe(no_mangle)]
184    pub unsafe extern "C-unwind" fn shim_emulated_syscallv(
185        ctx: *mut libc::ucontext_t,
186        n: core::ffi::c_long,
187        mut args: va_list::VaList,
188    ) -> core::ffi::c_long {
189        let _prev = ExecutionContext::Shadow.enter();
190
191        let syscall_args = SyscallArgs {
192            number: n,
193            args: core::array::from_fn(|_| {
194                // SAFETY: syscall args all "fit" in an i64. Reading more arguments
195                // than actually provided is sound because any bit pattern is a
196                // valid i64.
197                let arg = unsafe { args.get::<i64>() };
198                SyscallReg::from(arg)
199            }),
200        };
201
202        let event = ShimEventSyscall { syscall_args };
203
204        let ctx = ctx.cast::<ucontext>();
205        let ctx = unsafe { ctx.as_mut() };
206        let retval = unsafe { emulated_syscall_event(ctx, &event) };
207
208        retval.into()
209    }
210
211    /// # Safety
212    ///
213    /// The specified syscall must be safe to make.
214    #[unsafe(no_mangle)]
215    pub unsafe extern "C-unwind" fn shim_native_syscallv(
216        n: core::ffi::c_long,
217        mut args: va_list::VaList,
218    ) -> core::ffi::c_long {
219        let syscall_args = SyscallArgs {
220            number: n,
221            args: core::array::from_fn(|_| {
222                // SAFETY: syscall args all "fit" in an i64. Reading more arguments
223                // than actually provided is sound because any bit pattern is a
224                // valid i64.
225                let arg = unsafe { args.get::<i64>() };
226                SyscallReg::from(arg)
227            }),
228        };
229        // SAFETY: Ensured by caller.
230        let rv = unsafe { native_syscall(&syscall_args) };
231        rv.into()
232    }
233}