shadow_rs/host/syscall/handler/
mod.rs

1use std::borrow::Cow;
2
3#[cfg(feature = "perf_timers")]
4use std::time::Duration;
5
6use linux_api::errno::Errno;
7use linux_api::syscall::SyscallNum;
8use shadow_shim_helper_rs::HostId;
9use shadow_shim_helper_rs::shadow_syscalls::ShadowSyscallNum;
10use shadow_shim_helper_rs::simulation_time::SimulationTime;
11use shadow_shim_helper_rs::syscall_types::SyscallArgs;
12use shadow_shim_helper_rs::syscall_types::SyscallReg;
13use shadow_shim_helper_rs::util::SendPointer;
14
15use crate::core::worker::Worker;
16use crate::cshadow as c;
17use crate::host::context::ThreadContext;
18use crate::host::descriptor::Descriptor;
19use crate::host::descriptor::descriptor_table::{DescriptorHandle, DescriptorTable};
20use crate::host::process::ProcessId;
21use crate::host::syscall::formatter::log_syscall_simple;
22use crate::host::syscall::is_shadow_syscall;
23use crate::host::syscall::types::SyscallReturn;
24use crate::host::syscall::types::{SyscallError, SyscallResult};
25use crate::host::thread::ThreadId;
26use crate::utility::counter::Counter;
27
28#[cfg(feature = "perf_timers")]
29use crate::utility::perf_timer::PerfTimer;
30
31mod clone;
32mod close_range;
33mod epoll;
34mod eventfd;
35mod fcntl;
36mod file;
37mod fileat;
38mod futex;
39mod ioctl;
40mod mman;
41mod poll;
42mod prctl;
43mod random;
44mod resource;
45mod sched;
46mod select;
47mod shadow;
48mod signal;
49mod socket;
50mod stat;
51mod sysinfo;
52mod time;
53mod timerfd;
54mod uio;
55mod unistd;
56mod wait;
57
58type LegacySyscallFn =
59    unsafe extern "C-unwind" fn(*mut SyscallHandler, *const SyscallArgs) -> SyscallReturn;
60
61// Will eventually contain syscall handler state once migrated from the c handler
62pub struct SyscallHandler {
63    /// The host that this `SyscallHandler` belongs to. Intended to be used for logging.
64    host_id: HostId,
65    /// The process that this `SyscallHandler` belongs to. Intended to be used for logging.
66    process_id: ProcessId,
67    /// The thread that this `SyscallHandler` belongs to. Intended to be used for logging.
68    thread_id: ThreadId,
69    /// The total number of syscalls that we have handled.
70    num_syscalls: u64,
71    /// A counter for individual syscalls.
72    syscall_counter: Option<Counter>,
73    /// If we are currently blocking a specific syscall, i.e., waiting for a socket to be
74    /// readable/writable or waiting for a timeout, the syscall number of that function is stored
75    /// here. Will be `None` if a syscall is not currently blocked.
76    blocked_syscall: Option<SyscallNum>,
77    /// In some cases the syscall handler completes, but we block the caller anyway to move time
78    /// forward. This stores the result of the completed syscall, to be returned when the caller
79    /// resumes.
80    pending_result: Option<SyscallResult>,
81    /// We use this epoll to service syscalls that need to block on the status of multiple
82    /// descriptors, like poll.
83    epoll: SendPointer<c::Epoll>,
84    /// The cumulative time consumed while handling the current syscall. This includes the time from
85    /// previous calls that ended up blocking.
86    #[cfg(feature = "perf_timers")]
87    perf_duration_current: Duration,
88    /// The total time elapsed while handling all syscalls.
89    #[cfg(feature = "perf_timers")]
90    perf_duration_total: Duration,
91}
92
93impl SyscallHandler {
94    pub fn new(
95        host_id: HostId,
96        process_id: ProcessId,
97        thread_id: ThreadId,
98        count_syscalls: bool,
99    ) -> SyscallHandler {
100        SyscallHandler {
101            host_id,
102            process_id,
103            thread_id,
104            num_syscalls: 0,
105            syscall_counter: count_syscalls.then(Counter::new),
106            blocked_syscall: None,
107            pending_result: None,
108            epoll: unsafe { SendPointer::new(c::epoll_new()) },
109            #[cfg(feature = "perf_timers")]
110            perf_duration_current: Duration::ZERO,
111            #[cfg(feature = "perf_timers")]
112            perf_duration_total: Duration::ZERO,
113        }
114    }
115
116    pub fn syscall(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
117        // it wouldn't make sense if we were given a different host, process, and thread
118        assert_eq!(ctx.host.id(), self.host_id);
119        assert_eq!(ctx.process.id(), self.process_id);
120        assert_eq!(ctx.thread.id(), self.thread_id);
121
122        let syscall = SyscallNum::new(args.number.try_into().unwrap());
123        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
124
125        // make sure that we either don't have a blocked syscall, or if we blocked a syscall, then
126        // that same syscall should be executed again when it becomes unblocked
127        if let Some(blocked_syscall) = self.blocked_syscall {
128            if blocked_syscall != syscall {
129                panic!(
130                    "We blocked syscall {blocked_syscall} but syscall {syscall} is unexpectedly being invoked"
131                );
132            }
133        }
134
135        // were we previously blocked on this same syscall?
136        let was_blocked = self.blocked_syscall.is_some();
137
138        if let Some(pending_result) = self.pending_result.take() {
139            // The syscall was already completed, but we delayed the response to yield the CPU.
140            // Return that response now.
141            log::trace!("Returning delayed result");
142            assert!(!matches!(pending_result, Err(SyscallError::Blocked(_))));
143
144            self.blocked_syscall = None;
145            self.pending_result = None;
146
147            return pending_result;
148        }
149
150        log::trace!(
151            "SYSCALL_HANDLER_PRE: {} ({}){} — ({}, tid={})",
152            syscall_name,
153            args.number,
154            if was_blocked {
155                " (previously BLOCKed)"
156            } else {
157                ""
158            },
159            &*ctx.process.name(),
160            ctx.thread.id(),
161        );
162
163        // Count the frequency of each syscall, but only on the initial call. This avoids double
164        // counting in the case where the initial call blocked at first, but then later became
165        // unblocked and is now being handled again here.
166        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
167            if !was_blocked {
168                syscall_counter.add_one(syscall_name);
169            }
170        }
171
172        #[cfg(feature = "perf_timers")]
173        let timer = PerfTimer::new_started();
174
175        let mut rv = self.run_handler(ctx, args);
176
177        #[cfg(feature = "perf_timers")]
178        {
179            // add the cumulative elapsed seconds
180            self.perf_duration_current += timer.elapsed();
181
182            log::debug!(
183                "Handling syscall {} ({}) took cumulative {} ms",
184                syscall_name,
185                args.number,
186                self.perf_duration_current.as_millis(),
187            );
188        }
189
190        if !matches!(rv, Err(SyscallError::Blocked(_))) {
191            // the syscall completed, count it and the cumulative time to complete it
192            self.num_syscalls += 1;
193
194            #[cfg(feature = "perf_timers")]
195            {
196                self.perf_duration_total += self.perf_duration_current;
197                self.perf_duration_current = Duration::ZERO;
198            }
199        }
200
201        if log::log_enabled!(log::Level::Trace) {
202            let rv_formatted = match &rv {
203                Ok(reg) => format!("{}", i64::from(*reg)),
204                Err(SyscallError::Failed(failed)) => {
205                    let errno = failed.errno;
206                    format!("{} ({errno})", errno.to_negated_i64())
207                }
208                Err(SyscallError::Native) => "<native>".to_string(),
209                Err(SyscallError::Blocked(_)) => "<blocked>".to_string(),
210            };
211
212            log::trace!(
213                "SYSCALL_HANDLER_POST: {} ({}) result {}{} — ({}, tid={})",
214                syscall_name,
215                args.number,
216                if was_blocked { "BLOCK -> " } else { "" },
217                rv_formatted,
218                &*ctx.process.name(),
219                ctx.thread.id(),
220            );
221        }
222
223        // If the syscall would be blocked, but there's a signal pending, fail with
224        // EINTR instead. The shim-side code will run the signal handlers and then
225        // either return the EINTR or restart the syscall (See SA_RESTART in
226        // signal(7)).
227        //
228        // We do this check *after* (not before) trying the syscall so that we don't
229        // "interrupt" a syscall that wouldn't have blocked in the first place, or
230        // that can return a "partial" result when interrupted. e.g. consider the
231        // sequence:
232        //
233        // * Thread is blocked on reading a file descriptor.
234        // * The read becomes ready and the thread is scheduled to run.
235        // * The thread receives an unblocked signal.
236        // * The thread runs again.
237        //
238        // In this scenario, the `read` call should be allowed to complete successfully.
239        // from signal(7):  "If an I/O call on a slow device has already transferred
240        // some data by the time it is interrupted by a signal handler, then the
241        // call will return a success  status  (normally,  the  number of bytes
242        // transferred)."
243
244        if let Err(SyscallError::Blocked(ref blocked)) = rv {
245            // the syscall wants to block, but is there a signal pending?
246            let is_unblocked_signal_pending = ctx
247                .thread
248                .unblocked_signal_pending(ctx.process, &ctx.host.shim_shmem_lock_borrow().unwrap());
249
250            if is_unblocked_signal_pending {
251                // return EINTR instead
252                rv = Err(SyscallError::new_interrupted(blocked.restartable));
253            }
254        }
255
256        // we only use unsafe borrows from C code, and we should have only called into C syscall
257        // handlers through `Self::legacy_syscall` which should have already flushed the pointers,
258        // but we may as well do it again here just to be safe
259        if rv.is_err() {
260            // the syscall didn't complete successfully; don't write back pointers
261            log::trace!(
262                "Syscall didn't complete successfully; discarding plugin ptrs without writing back"
263            );
264            ctx.process.free_unsafe_borrows_noflush();
265        } else {
266            ctx.process
267                .free_unsafe_borrows_flush()
268                .expect("flushing syscall ptrs");
269        }
270
271        if ctx.process.is_running() && !matches!(rv, Err(SyscallError::Blocked(_))) {
272            let host_shmem = ctx.host.shim_shmem();
273            let mut host_shmem_prot = ctx.host.shim_shmem_lock_borrow_mut().unwrap();
274
275            // increment unblocked syscall latency, but only for non-shadow-syscalls, since the
276            // latter are part of Shadow's internal plumbing; they shouldn't necessarily "consume"
277            // time
278            if ctx.host.shim_shmem().model_unblocked_syscall_latency && !is_shadow_syscall(syscall)
279            {
280                host_shmem_prot.unapplied_cpu_latency += host_shmem.unblocked_syscall_latency;
281            }
282
283            log::trace!(
284                "Unapplied CPU latency amt={}ns max={}ns",
285                host_shmem_prot.unapplied_cpu_latency.as_nanos(),
286                host_shmem.max_unapplied_cpu_latency.as_nanos()
287            );
288
289            if host_shmem_prot.unapplied_cpu_latency > host_shmem.max_unapplied_cpu_latency {
290                let new_time = Worker::current_time().unwrap()
291                    + core::mem::replace(
292                        &mut host_shmem_prot.unapplied_cpu_latency,
293                        SimulationTime::ZERO,
294                    );
295                if new_time <= Worker::max_event_runahead_time(ctx.host) {
296                    // The new time is early enough that we can safely just increment to that time.
297                    // i.e. there are no threads or other events scheduled to
298                    // run on this worker before `new_time`.
299                    log::trace!(
300                        "Reached max-unapplied-cpu-latency, but not max runahead; Incrementing time"
301                    );
302                    Worker::set_current_time(new_time);
303                } else {
304                    // We can't safely increment to the new time, e.g. because
305                    // there are other events or the end of the current
306                    // scheduler round scheduled to happen first.  Reschedule
307                    // the current thread to run at the new time instead of
308                    // incrementing it.
309                    log::trace!(
310                        "Reached max-unapplied-cpu-latency, and max runahead; Rescheduling"
311                    );
312
313                    // Save the syscall result so that we can return it later
314                    // instead of re-executing the syscall.
315                    assert!(self.pending_result.is_none());
316                    self.pending_result = Some(rv);
317
318                    rv = Err(SyscallError::new_blocked_until(new_time, false));
319                }
320            }
321        }
322
323        if matches!(rv, Err(SyscallError::Blocked(_))) {
324            // we are blocking: store the syscall number so we know to expect the same syscall again
325            // when it unblocks
326            self.blocked_syscall = Some(syscall);
327        } else {
328            self.blocked_syscall = None;
329        }
330
331        rv
332    }
333
334    #[allow(non_upper_case_globals)]
335    fn run_handler(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
336        let mut ctx = SyscallContext {
337            objs: ctx,
338            args,
339            handler: self,
340        };
341
342        let syscall = SyscallNum::new(ctx.args.number.try_into().unwrap());
343        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
344
345        macro_rules! handle {
346            ($f:ident) => {{
347                let rv = SyscallHandlerFn::call(Self::$f, &mut ctx);
348
349                // log the syscall if enabled
350                if let Some(strace_fmt_options) = ctx.objs.process.strace_logging_options() {
351                    ctx.objs.process.with_strace_file(|file| {
352                        crate::utility::macros::SyscallLogger::$f(
353                            file,
354                            ctx.args.args,
355                            &rv,
356                            strace_fmt_options,
357                            ctx.objs.thread.id(),
358                            &*ctx.objs.process.memory_borrow(),
359                        )
360                        .unwrap();
361                    });
362                }
363
364                rv
365            }};
366        }
367
368        match syscall {
369            // SHADOW-HANDLED SYSCALLS
370            //
371            SyscallNum::NR_accept => handle!(accept),
372            SyscallNum::NR_accept4 => handle!(accept4),
373            SyscallNum::NR_alarm => handle!(alarm),
374            SyscallNum::NR_bind => handle!(bind),
375            SyscallNum::NR_brk => handle!(brk),
376            SyscallNum::NR_capget => handle!(capget),
377            SyscallNum::NR_capset => handle!(capset),
378            SyscallNum::NR_chdir => handle!(chdir),
379            SyscallNum::NR_clock_getres => handle!(clock_getres),
380            SyscallNum::NR_clock_nanosleep => handle!(clock_nanosleep),
381            SyscallNum::NR_clone => handle!(clone),
382            SyscallNum::NR_clone3 => handle!(clone3),
383            SyscallNum::NR_close => handle!(close),
384            SyscallNum::NR_close_range => handle!(close_range),
385            SyscallNum::NR_connect => handle!(connect),
386            SyscallNum::NR_creat => handle!(creat),
387            SyscallNum::NR_dup => handle!(dup),
388            SyscallNum::NR_dup2 => handle!(dup2),
389            SyscallNum::NR_dup3 => handle!(dup3),
390            SyscallNum::NR_epoll_create => handle!(epoll_create),
391            SyscallNum::NR_epoll_create1 => handle!(epoll_create1),
392            SyscallNum::NR_epoll_ctl => handle!(epoll_ctl),
393            SyscallNum::NR_epoll_pwait => handle!(epoll_pwait),
394            SyscallNum::NR_epoll_pwait2 => handle!(epoll_pwait2),
395            SyscallNum::NR_epoll_wait => handle!(epoll_wait),
396            SyscallNum::NR_eventfd => handle!(eventfd),
397            SyscallNum::NR_eventfd2 => handle!(eventfd2),
398            SyscallNum::NR_execve => handle!(execve),
399            SyscallNum::NR_execveat => handle!(execveat),
400            SyscallNum::NR_exit_group => handle!(exit_group),
401            SyscallNum::NR_faccessat => handle!(faccessat),
402            SyscallNum::NR_fadvise64 => handle!(fadvise64),
403            SyscallNum::NR_fallocate => handle!(fallocate),
404            SyscallNum::NR_fchmod => handle!(fchmod),
405            SyscallNum::NR_fchmodat => handle!(fchmodat),
406            SyscallNum::NR_fchmodat2 => handle!(fchmodat2),
407            SyscallNum::NR_fchown => handle!(fchown),
408            SyscallNum::NR_fchownat => handle!(fchownat),
409            SyscallNum::NR_fcntl => handle!(fcntl),
410            SyscallNum::NR_fdatasync => handle!(fdatasync),
411            SyscallNum::NR_fgetxattr => handle!(fgetxattr),
412            SyscallNum::NR_flistxattr => handle!(flistxattr),
413            SyscallNum::NR_flock => handle!(flock),
414            SyscallNum::NR_fork => handle!(fork),
415            SyscallNum::NR_fremovexattr => handle!(fremovexattr),
416            SyscallNum::NR_fsetxattr => handle!(fsetxattr),
417            SyscallNum::NR_fstat => handle!(fstat),
418            SyscallNum::NR_fstatfs => handle!(fstatfs),
419            SyscallNum::NR_fsync => handle!(fsync),
420            SyscallNum::NR_ftruncate => handle!(ftruncate),
421            SyscallNum::NR_futex => handle!(futex),
422            SyscallNum::NR_futimesat => handle!(futimesat),
423            SyscallNum::NR_get_robust_list => handle!(get_robust_list),
424            SyscallNum::NR_getdents => handle!(getdents),
425            SyscallNum::NR_getdents64 => handle!(getdents64),
426            SyscallNum::NR_getitimer => handle!(getitimer),
427            SyscallNum::NR_getpeername => handle!(getpeername),
428            SyscallNum::NR_getpgid => handle!(getpgid),
429            SyscallNum::NR_getpgrp => handle!(getpgrp),
430            SyscallNum::NR_getpid => handle!(getpid),
431            SyscallNum::NR_getppid => handle!(getppid),
432            SyscallNum::NR_getrandom => handle!(getrandom),
433            SyscallNum::NR_getsid => handle!(getsid),
434            SyscallNum::NR_getsockname => handle!(getsockname),
435            SyscallNum::NR_getsockopt => handle!(getsockopt),
436            SyscallNum::NR_gettid => handle!(gettid),
437            SyscallNum::NR_ioctl => handle!(ioctl),
438            SyscallNum::NR_kill => handle!(kill),
439            SyscallNum::NR_linkat => handle!(linkat),
440            SyscallNum::NR_listen => handle!(listen),
441            SyscallNum::NR_lseek => handle!(lseek),
442            SyscallNum::NR_mkdirat => handle!(mkdirat),
443            SyscallNum::NR_mknodat => handle!(mknodat),
444            SyscallNum::NR_mmap => handle!(mmap),
445            SyscallNum::NR_mprotect => handle!(mprotect),
446            SyscallNum::NR_mremap => handle!(mremap),
447            SyscallNum::NR_munmap => handle!(munmap),
448            SyscallNum::NR_nanosleep => handle!(nanosleep),
449            SyscallNum::NR_newfstatat => handle!(newfstatat),
450            SyscallNum::NR_open => handle!(open),
451            SyscallNum::NR_openat => handle!(openat),
452            SyscallNum::NR_pipe => handle!(pipe),
453            SyscallNum::NR_pipe2 => handle!(pipe2),
454            SyscallNum::NR_poll => handle!(poll),
455            SyscallNum::NR_ppoll => handle!(ppoll),
456            SyscallNum::NR_prctl => handle!(prctl),
457            SyscallNum::NR_pread64 => handle!(pread64),
458            SyscallNum::NR_preadv => handle!(preadv),
459            SyscallNum::NR_preadv2 => handle!(preadv2),
460            SyscallNum::NR_prlimit64 => handle!(prlimit64),
461            SyscallNum::NR_pselect6 => handle!(pselect6),
462            SyscallNum::NR_pwrite64 => handle!(pwrite64),
463            SyscallNum::NR_pwritev => handle!(pwritev),
464            SyscallNum::NR_pwritev2 => handle!(pwritev2),
465            SyscallNum::NR_read => handle!(read),
466            SyscallNum::NR_readahead => handle!(readahead),
467            SyscallNum::NR_readlinkat => handle!(readlinkat),
468            SyscallNum::NR_readv => handle!(readv),
469            SyscallNum::NR_recvfrom => handle!(recvfrom),
470            SyscallNum::NR_recvmsg => handle!(recvmsg),
471            SyscallNum::NR_renameat => handle!(renameat),
472            SyscallNum::NR_renameat2 => handle!(renameat2),
473            SyscallNum::NR_rseq => handle!(rseq),
474            SyscallNum::NR_rt_sigaction => handle!(rt_sigaction),
475            SyscallNum::NR_rt_sigprocmask => handle!(rt_sigprocmask),
476            SyscallNum::NR_sched_getaffinity => handle!(sched_getaffinity),
477            SyscallNum::NR_sched_setaffinity => handle!(sched_setaffinity),
478            SyscallNum::NR_select => handle!(select),
479            SyscallNum::NR_sendmsg => handle!(sendmsg),
480            SyscallNum::NR_sendto => handle!(sendto),
481            SyscallNum::NR_set_robust_list => handle!(set_robust_list),
482            SyscallNum::NR_set_tid_address => handle!(set_tid_address),
483            SyscallNum::NR_setitimer => handle!(setitimer),
484            SyscallNum::NR_setpgid => handle!(setpgid),
485            SyscallNum::NR_setsid => handle!(setsid),
486            SyscallNum::NR_setsockopt => handle!(setsockopt),
487            SyscallNum::NR_shutdown => handle!(shutdown),
488            SyscallNum::NR_sigaltstack => handle!(sigaltstack),
489            SyscallNum::NR_socket => handle!(socket),
490            SyscallNum::NR_socketpair => handle!(socketpair),
491            SyscallNum::NR_statx => handle!(statx),
492            SyscallNum::NR_symlinkat => handle!(symlinkat),
493            SyscallNum::NR_sync_file_range => handle!(sync_file_range),
494            SyscallNum::NR_syncfs => handle!(syncfs),
495            SyscallNum::NR_sysinfo => handle!(sysinfo),
496            SyscallNum::NR_tgkill => handle!(tgkill),
497            SyscallNum::NR_timerfd_create => handle!(timerfd_create),
498            SyscallNum::NR_timerfd_gettime => handle!(timerfd_gettime),
499            SyscallNum::NR_timerfd_settime => handle!(timerfd_settime),
500            SyscallNum::NR_tkill => handle!(tkill),
501            SyscallNum::NR_uname => handle!(uname),
502            SyscallNum::NR_unlinkat => handle!(unlinkat),
503            SyscallNum::NR_utimensat => handle!(utimensat),
504            SyscallNum::NR_vfork => handle!(vfork),
505            SyscallNum::NR_waitid => handle!(waitid),
506            SyscallNum::NR_wait4 => handle!(wait4),
507            SyscallNum::NR_write => handle!(write),
508            SyscallNum::NR_writev => handle!(writev),
509            //
510            // CUSTOM SHADOW-SPECIFIC SYSCALLS
511            //
512            x if ShadowSyscallNum::try_from(x).is_ok() => {
513                match ShadowSyscallNum::try_from(x).expect("Conversion just succeeded above") {
514                    ShadowSyscallNum::hostname_to_addr_ipv4 => {
515                        handle!(shadow_hostname_to_addr_ipv4)
516                    }
517                    ShadowSyscallNum::init_memory_manager => {
518                        handle!(shadow_init_memory_manager)
519                    }
520                    ShadowSyscallNum::shadow_yield => handle!(shadow_yield),
521                }
522            }
523            //
524            // SHIM-ONLY SYSCALLS
525            //
526            SyscallNum::NR_clock_gettime
527            | SyscallNum::NR_gettimeofday
528            | SyscallNum::NR_sched_yield
529            | SyscallNum::NR_time => {
530                panic!(
531                    "Syscall {} ({}) should have been handled in the shim",
532                    syscall_name, ctx.args.number,
533                )
534            }
535            //
536            // NATIVE LINUX-HANDLED SYSCALLS
537            //
538            SyscallNum::NR_access
539            | SyscallNum::NR_arch_prctl
540            | SyscallNum::NR_chmod
541            | SyscallNum::NR_chown
542            | SyscallNum::NR_exit
543            | SyscallNum::NR_getcwd
544            | SyscallNum::NR_geteuid
545            | SyscallNum::NR_getegid
546            | SyscallNum::NR_getgid
547            | SyscallNum::NR_getgroups
548            | SyscallNum::NR_getresgid
549            | SyscallNum::NR_getresuid
550            | SyscallNum::NR_getrlimit
551            | SyscallNum::NR_getuid
552            | SyscallNum::NR_getxattr
553            | SyscallNum::NR_lchown
554            | SyscallNum::NR_lgetxattr
555            | SyscallNum::NR_link
556            | SyscallNum::NR_listxattr
557            | SyscallNum::NR_llistxattr
558            | SyscallNum::NR_lremovexattr
559            | SyscallNum::NR_lsetxattr
560            | SyscallNum::NR_lstat
561            | SyscallNum::NR_madvise
562            | SyscallNum::NR_mkdir
563            | SyscallNum::NR_mknod
564            | SyscallNum::NR_readlink
565            | SyscallNum::NR_removexattr
566            | SyscallNum::NR_rename
567            | SyscallNum::NR_rmdir
568            | SyscallNum::NR_rt_sigreturn
569            | SyscallNum::NR_setfsgid
570            | SyscallNum::NR_setfsuid
571            | SyscallNum::NR_setgid
572            | SyscallNum::NR_setregid
573            | SyscallNum::NR_setresgid
574            | SyscallNum::NR_setresuid
575            | SyscallNum::NR_setreuid
576            | SyscallNum::NR_setrlimit
577            | SyscallNum::NR_setuid
578            | SyscallNum::NR_setxattr
579            | SyscallNum::NR_stat
580            | SyscallNum::NR_statfs
581            | SyscallNum::NR_symlink
582            | SyscallNum::NR_truncate
583            | SyscallNum::NR_unlink
584            | SyscallNum::NR_utime
585            | SyscallNum::NR_utimes => {
586                log::trace!("Native syscall {} ({})", syscall_name, ctx.args.number);
587
588                let rv = Err(SyscallError::Native);
589
590                log_syscall_simple(
591                    ctx.objs.process,
592                    ctx.objs.process.strace_logging_options(),
593                    ctx.objs.thread.id(),
594                    syscall_name,
595                    "...",
596                    &rv,
597                )
598                .unwrap();
599
600                rv
601            }
602            //
603            // UNSUPPORTED SYSCALL
604            //
605            _ => {
606                log_once_per_value_at_level!(
607                    syscall,
608                    SyscallNum,
609                    log::Level::Warn,
610                    log::Level::Debug,
611                    "Detected unsupported syscall {} ({}) called from thread {} in process {} on host {}",
612                    syscall_name,
613                    ctx.args.number,
614                    ctx.objs.thread.id(),
615                    &*ctx.objs.process.plugin_name(),
616                    ctx.objs.host.name(),
617                );
618
619                let rv = Err(Errno::ENOSYS.into());
620
621                let (syscall_name, syscall_args) = match syscall.to_str() {
622                    // log it in the form "poll(...)"
623                    Some(syscall_name) => (syscall_name, Cow::Borrowed("...")),
624                    // log it in the form "syscall(X, ...)"
625                    None => ("syscall", Cow::Owned(format!("{}, ...", ctx.args.number))),
626                };
627
628                log_syscall_simple(
629                    ctx.objs.process,
630                    ctx.objs.process.strace_logging_options(),
631                    ctx.objs.thread.id(),
632                    syscall_name,
633                    &syscall_args,
634                    &rv,
635                )
636                .unwrap();
637
638                rv
639            }
640        }
641    }
642
643    /// Did the last syscall result in `SyscallError::Blocked`? If called from a syscall handler and
644    /// `is_blocked()` returns `true`, then the current syscall is the same syscall that previously
645    /// blocked. For example, if currently running the `connect` syscall handler and `is_blocked()`
646    /// is `true`, then the previous syscall handler that ran was also `connect` and it returned
647    /// `SyscallError::Blocked`.
648    pub fn is_blocked(&self) -> bool {
649        self.blocked_syscall.is_some()
650    }
651
652    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
653    /// EBADF.
654    fn get_descriptor(
655        descriptor_table: &DescriptorTable,
656        fd: impl TryInto<DescriptorHandle>,
657    ) -> Result<&Descriptor, linux_api::errno::Errno> {
658        // check that fd is within bounds
659        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
660
661        match descriptor_table.get(fd) {
662            Some(desc) => Ok(desc),
663            None => Err(linux_api::errno::Errno::EBADF),
664        }
665    }
666
667    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
668    /// EBADF.
669    fn get_descriptor_mut(
670        descriptor_table: &mut DescriptorTable,
671        fd: impl TryInto<DescriptorHandle>,
672    ) -> Result<&mut Descriptor, linux_api::errno::Errno> {
673        // check that fd is within bounds
674        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
675
676        match descriptor_table.get_mut(fd) {
677            Some(desc) => Ok(desc),
678            None => Err(linux_api::errno::Errno::EBADF),
679        }
680    }
681
682    /// Run a legacy C syscall handler.
683    fn legacy_syscall<T: From<SyscallReg>>(
684        syscall: LegacySyscallFn,
685        ctx: &mut SyscallContext,
686    ) -> Result<T, SyscallError> {
687        let rv: SyscallResult =
688            unsafe { syscall(ctx.handler, std::ptr::from_ref(ctx.args)) }.into();
689
690        // we need to flush pointers here so that the syscall formatter can reliably borrow process
691        // memory without an incompatible borrow
692        if rv.is_err() {
693            // the syscall didn't complete successfully; don't write back pointers
694            log::trace!(
695                "Syscall didn't complete successfully; discarding plugin ptrs without writing back."
696            );
697            ctx.objs.process.free_unsafe_borrows_noflush();
698        } else {
699            ctx.objs
700                .process
701                .free_unsafe_borrows_flush()
702                .expect("flushing syscall ptrs");
703        }
704
705        rv.map(Into::into)
706    }
707}
708
709impl std::ops::Drop for SyscallHandler {
710    fn drop(&mut self) {
711        #[cfg(feature = "perf_timers")]
712        log::debug!(
713            "Handled {} syscalls in {} seconds",
714            self.num_syscalls,
715            self.perf_duration_total.as_secs()
716        );
717        #[cfg(not(feature = "perf_timers"))]
718        log::debug!("Handled {} syscalls", self.num_syscalls);
719
720        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
721            // log the plugin thread specific counts
722            log::debug!(
723                "Thread {} syscall counts: {}",
724                self.thread_id,
725                syscall_counter,
726            );
727
728            // add up the counts at the worker level
729            Worker::add_syscall_counts(syscall_counter);
730        }
731
732        unsafe { c::legacyfile_unref(self.epoll.ptr() as *mut std::ffi::c_void) };
733    }
734}
735
736pub struct SyscallContext<'a, 'b> {
737    pub objs: &'a ThreadContext<'b>,
738    pub args: &'a SyscallArgs,
739    pub handler: &'a mut SyscallHandler,
740}
741
742pub trait SyscallHandlerFn<T> {
743    fn call(self, ctx: &mut SyscallContext) -> SyscallResult;
744}
745
746impl<F, E, T0> SyscallHandlerFn<()> for F
747where
748    F: Fn(&mut SyscallContext) -> Result<T0, E>,
749    E: Into<SyscallError>,
750    T0: Into<SyscallReg>,
751{
752    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
753        self(ctx).map(Into::into).map_err(Into::into)
754    }
755}
756
757impl<F, E, T0, T1> SyscallHandlerFn<(T1,)> for F
758where
759    F: Fn(&mut SyscallContext, T1) -> Result<T0, E>,
760    E: Into<SyscallError>,
761    T0: Into<SyscallReg>,
762    T1: From<SyscallReg>,
763{
764    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
765        self(ctx, ctx.args.get(0).into())
766            .map(Into::into)
767            .map_err(Into::into)
768    }
769}
770
771impl<F, E, T0, T1, T2> SyscallHandlerFn<(T1, T2)> for F
772where
773    F: Fn(&mut SyscallContext, T1, T2) -> Result<T0, E>,
774    E: Into<SyscallError>,
775    T0: Into<SyscallReg>,
776    T1: From<SyscallReg>,
777    T2: From<SyscallReg>,
778{
779    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
780        self(ctx, ctx.args.get(0).into(), ctx.args.get(1).into())
781            .map(Into::into)
782            .map_err(Into::into)
783    }
784}
785
786impl<F, E, T0, T1, T2, T3> SyscallHandlerFn<(T1, T2, T3)> for F
787where
788    F: Fn(&mut SyscallContext, T1, T2, T3) -> Result<T0, E>,
789    E: Into<SyscallError>,
790    T0: Into<SyscallReg>,
791    T1: From<SyscallReg>,
792    T2: From<SyscallReg>,
793    T3: From<SyscallReg>,
794{
795    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
796        self(
797            ctx,
798            ctx.args.get(0).into(),
799            ctx.args.get(1).into(),
800            ctx.args.get(2).into(),
801        )
802        .map(Into::into)
803        .map_err(Into::into)
804    }
805}
806
807impl<F, E, T0, T1, T2, T3, T4> SyscallHandlerFn<(T1, T2, T3, T4)> for F
808where
809    F: Fn(&mut SyscallContext, T1, T2, T3, T4) -> Result<T0, E>,
810    E: Into<SyscallError>,
811    T0: Into<SyscallReg>,
812    T1: From<SyscallReg>,
813    T2: From<SyscallReg>,
814    T3: From<SyscallReg>,
815    T4: From<SyscallReg>,
816{
817    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
818        self(
819            ctx,
820            ctx.args.get(0).into(),
821            ctx.args.get(1).into(),
822            ctx.args.get(2).into(),
823            ctx.args.get(3).into(),
824        )
825        .map(Into::into)
826        .map_err(Into::into)
827    }
828}
829
830impl<F, E, T0, T1, T2, T3, T4, T5> SyscallHandlerFn<(T1, T2, T3, T4, T5)> for F
831where
832    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5) -> Result<T0, E>,
833    E: Into<SyscallError>,
834    T0: Into<SyscallReg>,
835    T1: From<SyscallReg>,
836    T2: From<SyscallReg>,
837    T3: From<SyscallReg>,
838    T4: From<SyscallReg>,
839    T5: From<SyscallReg>,
840{
841    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
842        self(
843            ctx,
844            ctx.args.get(0).into(),
845            ctx.args.get(1).into(),
846            ctx.args.get(2).into(),
847            ctx.args.get(3).into(),
848            ctx.args.get(4).into(),
849        )
850        .map(Into::into)
851        .map_err(Into::into)
852    }
853}
854
855impl<F, E, T0, T1, T2, T3, T4, T5, T6> SyscallHandlerFn<(T1, T2, T3, T4, T5, T6)> for F
856where
857    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5, T6) -> Result<T0, E>,
858    E: Into<SyscallError>,
859    T0: Into<SyscallReg>,
860    T1: From<SyscallReg>,
861    T2: From<SyscallReg>,
862    T3: From<SyscallReg>,
863    T4: From<SyscallReg>,
864    T5: From<SyscallReg>,
865    T6: From<SyscallReg>,
866{
867    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
868        self(
869            ctx,
870            ctx.args.get(0).into(),
871            ctx.args.get(1).into(),
872            ctx.args.get(2).into(),
873            ctx.args.get(3).into(),
874            ctx.args.get(4).into(),
875            ctx.args.get(5).into(),
876        )
877        .map(Into::into)
878        .map_err(Into::into)
879    }
880}
881
882mod export {
883    use crate::host::host::Host;
884    use crate::host::process::Process;
885    use crate::host::thread::Thread;
886
887    use super::*;
888
889    /// Returns a pointer to the current running host. The returned pointer is invalidated the next
890    /// time the worker switches hosts. Rust syscall handlers should get the host from the
891    /// [`SyscallContext`] instead.
892    #[unsafe(no_mangle)]
893    pub extern "C-unwind" fn rustsyscallhandler_getHost(sys: *const SyscallHandler) -> *const Host {
894        let sys = unsafe { sys.as_ref() }.unwrap();
895        Worker::with_active_host(|h| {
896            assert_eq!(h.id(), sys.host_id);
897            std::ptr::from_ref(h)
898        })
899        .unwrap()
900    }
901
902    /// Returns a pointer to the current running process. The returned pointer is invalidated the
903    /// next time the worker switches processes. Rust syscall handlers should get the process from
904    /// the [`SyscallContext`] instead.
905    #[unsafe(no_mangle)]
906    pub extern "C-unwind" fn rustsyscallhandler_getProcess(
907        sys: *const SyscallHandler,
908    ) -> *const Process {
909        let sys = unsafe { sys.as_ref() }.unwrap();
910        Worker::with_active_process(|p| {
911            assert_eq!(p.id(), sys.process_id);
912            std::ptr::from_ref(p)
913        })
914        .unwrap()
915    }
916
917    /// Returns a pointer to the current running thread. The returned pointer is invalidated the
918    /// next time the worker switches threads. Rust syscall handlers should get the thread from the
919    /// [`SyscallContext`] instead.
920    #[unsafe(no_mangle)]
921    pub extern "C-unwind" fn rustsyscallhandler_getThread(
922        sys: *const SyscallHandler,
923    ) -> *const Thread {
924        let sys = unsafe { sys.as_ref() }.unwrap();
925        Worker::with_active_thread(|t| {
926            assert_eq!(t.id(), sys.thread_id);
927            std::ptr::from_ref(t)
928        })
929        .unwrap()
930    }
931
932    #[unsafe(no_mangle)]
933    pub extern "C-unwind" fn rustsyscallhandler_wasBlocked(sys: *const SyscallHandler) -> bool {
934        let sys = unsafe { sys.as_ref() }.unwrap();
935        sys.is_blocked()
936    }
937
938    #[unsafe(no_mangle)]
939    pub extern "C-unwind" fn rustsyscallhandler_didListenTimeoutExpire(
940        sys: *const SyscallHandler,
941    ) -> bool {
942        let sys = unsafe { sys.as_ref() }.unwrap();
943
944        // will be `None` if the syscall condition doesn't exist or there's no timeout
945        let timeout = Worker::with_active_thread(|t| {
946            assert_eq!(t.id(), sys.thread_id);
947            t.syscall_condition().and_then(|x| x.timeout())
948        })
949        .unwrap();
950
951        // true if there is a timeout and it's before or at the current time
952        timeout
953            .map(|timeout| Worker::current_time().unwrap() >= timeout)
954            .unwrap_or(false)
955    }
956
957    #[unsafe(no_mangle)]
958    pub extern "C-unwind" fn rustsyscallhandler_getEpoll(
959        sys: *const SyscallHandler,
960    ) -> *mut c::Epoll {
961        let sys = unsafe { sys.as_ref() }.unwrap();
962        sys.epoll.ptr()
963    }
964}