shadow_rs/host/syscall/handler/
mod.rs

1use std::borrow::Cow;
2
3#[cfg(feature = "perf_timers")]
4use std::time::Duration;
5
6use linux_api::errno::Errno;
7use linux_api::syscall::SyscallNum;
8use shadow_shim_helper_rs::HostId;
9use shadow_shim_helper_rs::shadow_syscalls::ShadowSyscallNum;
10use shadow_shim_helper_rs::simulation_time::SimulationTime;
11use shadow_shim_helper_rs::syscall_types::SyscallArgs;
12use shadow_shim_helper_rs::syscall_types::SyscallReg;
13use shadow_shim_helper_rs::util::SendPointer;
14
15use crate::core::worker::Worker;
16use crate::cshadow as c;
17use crate::host::context::ThreadContext;
18use crate::host::descriptor::Descriptor;
19use crate::host::descriptor::descriptor_table::{DescriptorHandle, DescriptorTable};
20use crate::host::process::ProcessId;
21use crate::host::syscall::formatter::log_syscall_simple;
22use crate::host::syscall::is_shadow_syscall;
23use crate::host::syscall::types::SyscallReturn;
24use crate::host::syscall::types::{SyscallError, SyscallResult};
25use crate::host::thread::ThreadId;
26use crate::utility::counter::Counter;
27
28#[cfg(feature = "perf_timers")]
29use crate::utility::perf_timer::PerfTimer;
30
31mod clone;
32mod close_range;
33mod epoll;
34mod eventfd;
35mod fcntl;
36mod file;
37mod fileat;
38mod futex;
39mod ioctl;
40mod mman;
41mod poll;
42mod prctl;
43mod random;
44mod resource;
45mod sched;
46mod select;
47mod shadow;
48mod signal;
49mod socket;
50mod stat;
51mod sysinfo;
52mod time;
53mod timerfd;
54mod uio;
55mod unistd;
56mod wait;
57
58type LegacySyscallFn =
59    unsafe extern "C-unwind" fn(*mut SyscallHandler, *const SyscallArgs) -> SyscallReturn;
60
61// Will eventually contain syscall handler state once migrated from the c handler
62pub struct SyscallHandler {
63    /// The host that this `SyscallHandler` belongs to. Intended to be used for logging.
64    host_id: HostId,
65    /// The process that this `SyscallHandler` belongs to. Intended to be used for logging.
66    process_id: ProcessId,
67    /// The thread that this `SyscallHandler` belongs to. Intended to be used for logging.
68    thread_id: ThreadId,
69    /// The total number of syscalls that we have handled.
70    num_syscalls: u64,
71    /// A counter for individual syscalls.
72    syscall_counter: Option<Counter>,
73    /// If we are currently blocking a specific syscall, i.e., waiting for a socket to be
74    /// readable/writable or waiting for a timeout, the syscall number of that function is stored
75    /// here. Will be `None` if a syscall is not currently blocked.
76    blocked_syscall: Option<SyscallNum>,
77    /// In some cases the syscall handler completes, but we block the caller anyway to move time
78    /// forward. This stores the result of the completed syscall, to be returned when the caller
79    /// resumes.
80    pending_result: Option<SyscallResult>,
81    /// We use this epoll to service syscalls that need to block on the status of multiple
82    /// descriptors, like poll.
83    epoll: SendPointer<c::Epoll>,
84    /// The cumulative time consumed while handling the current syscall. This includes the time from
85    /// previous calls that ended up blocking.
86    #[cfg(feature = "perf_timers")]
87    perf_duration_current: Duration,
88    /// The total time elapsed while handling all syscalls.
89    #[cfg(feature = "perf_timers")]
90    perf_duration_total: Duration,
91}
92
93impl SyscallHandler {
94    pub fn new(
95        host_id: HostId,
96        process_id: ProcessId,
97        thread_id: ThreadId,
98        count_syscalls: bool,
99    ) -> SyscallHandler {
100        SyscallHandler {
101            host_id,
102            process_id,
103            thread_id,
104            num_syscalls: 0,
105            syscall_counter: count_syscalls.then(Counter::new),
106            blocked_syscall: None,
107            pending_result: None,
108            epoll: unsafe { SendPointer::new(c::epoll_new()) },
109            #[cfg(feature = "perf_timers")]
110            perf_duration_current: Duration::ZERO,
111            #[cfg(feature = "perf_timers")]
112            perf_duration_total: Duration::ZERO,
113        }
114    }
115
116    pub fn syscall(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
117        // it wouldn't make sense if we were given a different host, process, and thread
118        assert_eq!(ctx.host.id(), self.host_id);
119        assert_eq!(ctx.process.id(), self.process_id);
120        assert_eq!(ctx.thread.id(), self.thread_id);
121
122        let syscall = SyscallNum::new(args.number.try_into().unwrap());
123        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
124
125        // make sure that we either don't have a blocked syscall, or if we blocked a syscall, then
126        // that same syscall should be executed again when it becomes unblocked
127        if let Some(blocked_syscall) = self.blocked_syscall {
128            if blocked_syscall != syscall {
129                panic!(
130                    "We blocked syscall {blocked_syscall} but syscall {syscall} is unexpectedly being invoked"
131                );
132            }
133        }
134
135        // were we previously blocked on this same syscall?
136        let was_blocked = self.blocked_syscall.is_some();
137
138        if let Some(pending_result) = self.pending_result.take() {
139            // The syscall was already completed, but we delayed the response to yield the CPU.
140            // Return that response now.
141            log::trace!("Returning delayed result");
142            assert!(!matches!(pending_result, Err(SyscallError::Blocked(_))));
143
144            self.blocked_syscall = None;
145            self.pending_result = None;
146
147            return pending_result;
148        }
149
150        log::trace!(
151            "SYSCALL_HANDLER_PRE: {} ({}){} — ({}, tid={})",
152            syscall_name,
153            args.number,
154            if was_blocked {
155                " (previously BLOCKed)"
156            } else {
157                ""
158            },
159            &*ctx.process.name(),
160            ctx.thread.id(),
161        );
162
163        // Count the frequency of each syscall, but only on the initial call. This avoids double
164        // counting in the case where the initial call blocked at first, but then later became
165        // unblocked and is now being handled again here.
166        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
167            if !was_blocked {
168                syscall_counter.add_one(syscall_name);
169            }
170        }
171
172        #[cfg(feature = "perf_timers")]
173        let timer = PerfTimer::new_started();
174
175        let mut rv = self.run_handler(ctx, args);
176
177        #[cfg(feature = "perf_timers")]
178        {
179            // add the cumulative elapsed seconds
180            self.perf_duration_current += timer.elapsed();
181
182            log::debug!(
183                "Handling syscall {} ({}) took cumulative {} ms",
184                syscall_name,
185                args.number,
186                self.perf_duration_current.as_millis(),
187            );
188        }
189
190        if !matches!(rv, Err(SyscallError::Blocked(_))) {
191            // the syscall completed, count it and the cumulative time to complete it
192            self.num_syscalls += 1;
193
194            #[cfg(feature = "perf_timers")]
195            {
196                self.perf_duration_total += self.perf_duration_current;
197                self.perf_duration_current = Duration::ZERO;
198            }
199        }
200
201        if log::log_enabled!(log::Level::Trace) {
202            let rv_formatted = match &rv {
203                Ok(reg) => format!("{}", i64::from(*reg)),
204                Err(SyscallError::Failed(failed)) => {
205                    let errno = failed.errno;
206                    format!("{} ({errno})", errno.to_negated_i64())
207                }
208                Err(SyscallError::Native) => "<native>".to_string(),
209                Err(SyscallError::Blocked(_)) => "<blocked>".to_string(),
210            };
211
212            log::trace!(
213                "SYSCALL_HANDLER_POST: {} ({}) result {}{} — ({}, tid={})",
214                syscall_name,
215                args.number,
216                if was_blocked { "BLOCK -> " } else { "" },
217                rv_formatted,
218                &*ctx.process.name(),
219                ctx.thread.id(),
220            );
221        }
222
223        // If the syscall would be blocked, but there's a signal pending, fail with
224        // EINTR instead. The shim-side code will run the signal handlers and then
225        // either return the EINTR or restart the syscall (See SA_RESTART in
226        // signal(7)).
227        //
228        // We do this check *after* (not before) trying the syscall so that we don't
229        // "interrupt" a syscall that wouldn't have blocked in the first place, or
230        // that can return a "partial" result when interrupted. e.g. consider the
231        // sequence:
232        //
233        // * Thread is blocked on reading a file descriptor.
234        // * The read becomes ready and the thread is scheduled to run.
235        // * The thread receives an unblocked signal.
236        // * The thread runs again.
237        //
238        // In this scenario, the `read` call should be allowed to complete successfully.
239        // from signal(7):  "If an I/O call on a slow device has already transferred
240        // some data by the time it is interrupted by a signal handler, then the
241        // call will return a success  status  (normally,  the  number of bytes
242        // transferred)."
243
244        if let Err(SyscallError::Blocked(ref blocked)) = rv {
245            // the syscall wants to block, but is there a signal pending?
246            let is_unblocked_signal_pending = ctx
247                .thread
248                .unblocked_signal_pending(ctx.process, &ctx.host.shim_shmem_lock_borrow().unwrap());
249
250            if is_unblocked_signal_pending {
251                // return EINTR instead
252                rv = Err(SyscallError::new_interrupted(blocked.restartable));
253            }
254        }
255
256        // we only use unsafe borrows from C code, and we should have only called into C syscall
257        // handlers through `Self::legacy_syscall` which should have already flushed the pointers,
258        // but we may as well do it again here just to be safe
259        if rv.is_err() {
260            // the syscall didn't complete successfully; don't write back pointers
261            log::trace!(
262                "Syscall didn't complete successfully; discarding plugin ptrs without writing back"
263            );
264            ctx.process.free_unsafe_borrows_noflush();
265        } else {
266            ctx.process
267                .free_unsafe_borrows_flush()
268                .expect("flushing syscall ptrs");
269        }
270
271        if ctx.process.is_running() && !matches!(rv, Err(SyscallError::Blocked(_))) {
272            let host_shmem = ctx.host.shim_shmem();
273            let mut host_shmem_prot = ctx.host.shim_shmem_lock_borrow_mut().unwrap();
274
275            // increment unblocked syscall latency, but only for non-shadow-syscalls, since the
276            // latter are part of Shadow's internal plumbing; they shouldn't necessarily "consume"
277            // time
278            if ctx.host.shim_shmem().model_unblocked_syscall_latency && !is_shadow_syscall(syscall)
279            {
280                host_shmem_prot.unapplied_cpu_latency += host_shmem.unblocked_syscall_latency;
281            }
282
283            log::trace!(
284                "Unapplied CPU latency amt={}ns max={}ns",
285                host_shmem_prot.unapplied_cpu_latency.as_nanos(),
286                host_shmem.max_unapplied_cpu_latency.as_nanos()
287            );
288
289            if host_shmem_prot.unapplied_cpu_latency > host_shmem.max_unapplied_cpu_latency {
290                let new_time = Worker::current_time().unwrap()
291                    + core::mem::replace(
292                        &mut host_shmem_prot.unapplied_cpu_latency,
293                        SimulationTime::ZERO,
294                    );
295                if new_time <= Worker::max_event_runahead_time(ctx.host) {
296                    // The new time is early enough that we can safely just increment to that time.
297                    // i.e. there are no threads or other events scheduled to
298                    // run on this worker before `new_time`.
299                    log::trace!(
300                        "Reached max-unapplied-cpu-latency, but not max runahead; Incrementing time"
301                    );
302                    Worker::set_current_time(new_time);
303                } else {
304                    // We can't safely increment to the new time, e.g. because
305                    // there are other events or the end of the current
306                    // scheduler round scheduled to happen first.  Reschedule
307                    // the current thread to run at the new time instead of
308                    // incrementing it.
309                    log::trace!(
310                        "Reached max-unapplied-cpu-latency, and max runahead; Rescheduling"
311                    );
312
313                    // Save the syscall result so that we can return it later
314                    // instead of re-executing the syscall.
315                    assert!(self.pending_result.is_none());
316                    self.pending_result = Some(rv);
317
318                    rv = Err(SyscallError::new_blocked_until(new_time, false));
319                }
320            }
321        }
322
323        if matches!(rv, Err(SyscallError::Blocked(_))) {
324            // we are blocking: store the syscall number so we know to expect the same syscall again
325            // when it unblocks
326            self.blocked_syscall = Some(syscall);
327        } else {
328            self.blocked_syscall = None;
329        }
330
331        rv
332    }
333
334    #[allow(non_upper_case_globals)]
335    fn run_handler(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
336        let mut ctx = SyscallContext {
337            objs: ctx,
338            args,
339            handler: self,
340        };
341
342        let syscall = SyscallNum::new(ctx.args.number.try_into().unwrap());
343        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
344
345        macro_rules! handle {
346            ($f:ident) => {{
347                let rv = SyscallHandlerFn::call(Self::$f, &mut ctx);
348
349                // log the syscall if enabled
350                if let Some(strace_fmt_options) = ctx.objs.process.strace_logging_options() {
351                    ctx.objs.process.with_strace_file(|file| {
352                        crate::utility::macros::SyscallLogger::$f(
353                            file,
354                            ctx.args.args,
355                            &rv,
356                            strace_fmt_options,
357                            ctx.objs.thread.id(),
358                            &*ctx.objs.process.memory_borrow(),
359                        )
360                        .unwrap();
361                    });
362                }
363
364                rv
365            }};
366        }
367
368        match syscall {
369            // SHADOW-HANDLED SYSCALLS
370            //
371            SyscallNum::NR_accept => handle!(accept),
372            SyscallNum::NR_accept4 => handle!(accept4),
373            SyscallNum::NR_alarm => handle!(alarm),
374            SyscallNum::NR_bind => handle!(bind),
375            SyscallNum::NR_brk => handle!(brk),
376            SyscallNum::NR_capget => handle!(capget),
377            SyscallNum::NR_capset => handle!(capset),
378            SyscallNum::NR_chdir => handle!(chdir),
379            SyscallNum::NR_clock_getres => handle!(clock_getres),
380            SyscallNum::NR_clock_nanosleep => handle!(clock_nanosleep),
381            SyscallNum::NR_clone => handle!(clone),
382            SyscallNum::NR_clone3 => handle!(clone3),
383            SyscallNum::NR_close => handle!(close),
384            SyscallNum::NR_close_range => handle!(close_range),
385            SyscallNum::NR_connect => handle!(connect),
386            SyscallNum::NR_creat => handle!(creat),
387            SyscallNum::NR_dup => handle!(dup),
388            SyscallNum::NR_dup2 => handle!(dup2),
389            SyscallNum::NR_dup3 => handle!(dup3),
390            SyscallNum::NR_epoll_create => handle!(epoll_create),
391            SyscallNum::NR_epoll_create1 => handle!(epoll_create1),
392            SyscallNum::NR_epoll_ctl => handle!(epoll_ctl),
393            SyscallNum::NR_epoll_pwait => handle!(epoll_pwait),
394            SyscallNum::NR_epoll_pwait2 => handle!(epoll_pwait2),
395            SyscallNum::NR_epoll_wait => handle!(epoll_wait),
396            SyscallNum::NR_eventfd => handle!(eventfd),
397            SyscallNum::NR_eventfd2 => handle!(eventfd2),
398            SyscallNum::NR_execve => handle!(execve),
399            SyscallNum::NR_execveat => handle!(execveat),
400            SyscallNum::NR_exit_group => handle!(exit_group),
401            SyscallNum::NR_faccessat => handle!(faccessat),
402            SyscallNum::NR_faccessat2 => handle!(faccessat2),
403            SyscallNum::NR_fadvise64 => handle!(fadvise64),
404            SyscallNum::NR_fallocate => handle!(fallocate),
405            SyscallNum::NR_fchmod => handle!(fchmod),
406            SyscallNum::NR_fchmodat => handle!(fchmodat),
407            SyscallNum::NR_fchmodat2 => handle!(fchmodat2),
408            SyscallNum::NR_fchown => handle!(fchown),
409            SyscallNum::NR_fchownat => handle!(fchownat),
410            SyscallNum::NR_fcntl => handle!(fcntl),
411            SyscallNum::NR_fdatasync => handle!(fdatasync),
412            SyscallNum::NR_fgetxattr => handle!(fgetxattr),
413            SyscallNum::NR_flistxattr => handle!(flistxattr),
414            SyscallNum::NR_flock => handle!(flock),
415            SyscallNum::NR_fork => handle!(fork),
416            SyscallNum::NR_fremovexattr => handle!(fremovexattr),
417            SyscallNum::NR_fsetxattr => handle!(fsetxattr),
418            SyscallNum::NR_fstat => handle!(fstat),
419            SyscallNum::NR_fstatfs => handle!(fstatfs),
420            SyscallNum::NR_fsync => handle!(fsync),
421            SyscallNum::NR_ftruncate => handle!(ftruncate),
422            SyscallNum::NR_futex => handle!(futex),
423            SyscallNum::NR_futimesat => handle!(futimesat),
424            SyscallNum::NR_get_robust_list => handle!(get_robust_list),
425            SyscallNum::NR_getdents => handle!(getdents),
426            SyscallNum::NR_getdents64 => handle!(getdents64),
427            SyscallNum::NR_getitimer => handle!(getitimer),
428            SyscallNum::NR_getpeername => handle!(getpeername),
429            SyscallNum::NR_getpgid => handle!(getpgid),
430            SyscallNum::NR_getpgrp => handle!(getpgrp),
431            SyscallNum::NR_getpid => handle!(getpid),
432            SyscallNum::NR_getppid => handle!(getppid),
433            SyscallNum::NR_getrandom => handle!(getrandom),
434            SyscallNum::NR_getsid => handle!(getsid),
435            SyscallNum::NR_getsockname => handle!(getsockname),
436            SyscallNum::NR_getsockopt => handle!(getsockopt),
437            SyscallNum::NR_gettid => handle!(gettid),
438            SyscallNum::NR_ioctl => handle!(ioctl),
439            SyscallNum::NR_kill => handle!(kill),
440            SyscallNum::NR_linkat => handle!(linkat),
441            SyscallNum::NR_listen => handle!(listen),
442            SyscallNum::NR_lseek => handle!(lseek),
443            SyscallNum::NR_mkdirat => handle!(mkdirat),
444            SyscallNum::NR_mknodat => handle!(mknodat),
445            SyscallNum::NR_mmap => handle!(mmap),
446            SyscallNum::NR_mprotect => handle!(mprotect),
447            SyscallNum::NR_mremap => handle!(mremap),
448            SyscallNum::NR_munmap => handle!(munmap),
449            SyscallNum::NR_nanosleep => handle!(nanosleep),
450            SyscallNum::NR_newfstatat => handle!(newfstatat),
451            SyscallNum::NR_open => handle!(open),
452            SyscallNum::NR_openat => handle!(openat),
453            SyscallNum::NR_pipe => handle!(pipe),
454            SyscallNum::NR_pipe2 => handle!(pipe2),
455            SyscallNum::NR_poll => handle!(poll),
456            SyscallNum::NR_ppoll => handle!(ppoll),
457            SyscallNum::NR_prctl => handle!(prctl),
458            SyscallNum::NR_pread64 => handle!(pread64),
459            SyscallNum::NR_preadv => handle!(preadv),
460            SyscallNum::NR_preadv2 => handle!(preadv2),
461            SyscallNum::NR_prlimit64 => handle!(prlimit64),
462            SyscallNum::NR_pselect6 => handle!(pselect6),
463            SyscallNum::NR_pwrite64 => handle!(pwrite64),
464            SyscallNum::NR_pwritev => handle!(pwritev),
465            SyscallNum::NR_pwritev2 => handle!(pwritev2),
466            SyscallNum::NR_read => handle!(read),
467            SyscallNum::NR_readahead => handle!(readahead),
468            SyscallNum::NR_readlinkat => handle!(readlinkat),
469            SyscallNum::NR_readv => handle!(readv),
470            SyscallNum::NR_recvfrom => handle!(recvfrom),
471            SyscallNum::NR_recvmsg => handle!(recvmsg),
472            SyscallNum::NR_renameat => handle!(renameat),
473            SyscallNum::NR_renameat2 => handle!(renameat2),
474            SyscallNum::NR_rseq => handle!(rseq),
475            SyscallNum::NR_rt_sigaction => handle!(rt_sigaction),
476            SyscallNum::NR_rt_sigprocmask => handle!(rt_sigprocmask),
477            SyscallNum::NR_sched_getaffinity => handle!(sched_getaffinity),
478            SyscallNum::NR_sched_setaffinity => handle!(sched_setaffinity),
479            SyscallNum::NR_select => handle!(select),
480            SyscallNum::NR_sendmsg => handle!(sendmsg),
481            SyscallNum::NR_sendto => handle!(sendto),
482            SyscallNum::NR_set_robust_list => handle!(set_robust_list),
483            SyscallNum::NR_set_tid_address => handle!(set_tid_address),
484            SyscallNum::NR_setitimer => handle!(setitimer),
485            SyscallNum::NR_setpgid => handle!(setpgid),
486            SyscallNum::NR_setsid => handle!(setsid),
487            SyscallNum::NR_setsockopt => handle!(setsockopt),
488            SyscallNum::NR_shutdown => handle!(shutdown),
489            SyscallNum::NR_sigaltstack => handle!(sigaltstack),
490            SyscallNum::NR_socket => handle!(socket),
491            SyscallNum::NR_socketpair => handle!(socketpair),
492            SyscallNum::NR_statx => handle!(statx),
493            SyscallNum::NR_symlinkat => handle!(symlinkat),
494            SyscallNum::NR_sync_file_range => handle!(sync_file_range),
495            SyscallNum::NR_syncfs => handle!(syncfs),
496            SyscallNum::NR_sysinfo => handle!(sysinfo),
497            SyscallNum::NR_tgkill => handle!(tgkill),
498            SyscallNum::NR_timerfd_create => handle!(timerfd_create),
499            SyscallNum::NR_timerfd_gettime => handle!(timerfd_gettime),
500            SyscallNum::NR_timerfd_settime => handle!(timerfd_settime),
501            SyscallNum::NR_tkill => handle!(tkill),
502            SyscallNum::NR_uname => handle!(uname),
503            SyscallNum::NR_unlinkat => handle!(unlinkat),
504            SyscallNum::NR_utimensat => handle!(utimensat),
505            SyscallNum::NR_vfork => handle!(vfork),
506            SyscallNum::NR_waitid => handle!(waitid),
507            SyscallNum::NR_wait4 => handle!(wait4),
508            SyscallNum::NR_write => handle!(write),
509            SyscallNum::NR_writev => handle!(writev),
510            //
511            // CUSTOM SHADOW-SPECIFIC SYSCALLS
512            //
513            x if ShadowSyscallNum::try_from(x).is_ok() => {
514                match ShadowSyscallNum::try_from(x).expect("Conversion just succeeded above") {
515                    ShadowSyscallNum::hostname_to_addr_ipv4 => {
516                        handle!(shadow_hostname_to_addr_ipv4)
517                    }
518                    ShadowSyscallNum::init_memory_manager => {
519                        handle!(shadow_init_memory_manager)
520                    }
521                    ShadowSyscallNum::shadow_yield => handle!(shadow_yield),
522                }
523            }
524            //
525            // SHIM-ONLY SYSCALLS
526            //
527            SyscallNum::NR_clock_gettime
528            | SyscallNum::NR_gettimeofday
529            | SyscallNum::NR_sched_yield
530            | SyscallNum::NR_time => {
531                panic!(
532                    "Syscall {} ({}) should have been handled in the shim",
533                    syscall_name, ctx.args.number,
534                )
535            }
536            //
537            // NATIVE LINUX-HANDLED SYSCALLS
538            //
539            SyscallNum::NR_access
540            | SyscallNum::NR_arch_prctl
541            | SyscallNum::NR_chmod
542            | SyscallNum::NR_chown
543            | SyscallNum::NR_exit
544            | SyscallNum::NR_getcwd
545            | SyscallNum::NR_geteuid
546            | SyscallNum::NR_getegid
547            | SyscallNum::NR_getgid
548            | SyscallNum::NR_getgroups
549            | SyscallNum::NR_getresgid
550            | SyscallNum::NR_getresuid
551            | SyscallNum::NR_getrlimit
552            | SyscallNum::NR_getuid
553            | SyscallNum::NR_getxattr
554            | SyscallNum::NR_lchown
555            | SyscallNum::NR_lgetxattr
556            | SyscallNum::NR_link
557            | SyscallNum::NR_listxattr
558            | SyscallNum::NR_llistxattr
559            | SyscallNum::NR_lremovexattr
560            | SyscallNum::NR_lsetxattr
561            | SyscallNum::NR_lstat
562            | SyscallNum::NR_madvise
563            | SyscallNum::NR_mkdir
564            | SyscallNum::NR_mknod
565            | SyscallNum::NR_readlink
566            | SyscallNum::NR_removexattr
567            | SyscallNum::NR_rename
568            | SyscallNum::NR_rmdir
569            | SyscallNum::NR_rt_sigreturn
570            | SyscallNum::NR_setfsgid
571            | SyscallNum::NR_setfsuid
572            | SyscallNum::NR_setgid
573            | SyscallNum::NR_setregid
574            | SyscallNum::NR_setresgid
575            | SyscallNum::NR_setresuid
576            | SyscallNum::NR_setreuid
577            | SyscallNum::NR_setrlimit
578            | SyscallNum::NR_setuid
579            | SyscallNum::NR_setxattr
580            | SyscallNum::NR_stat
581            | SyscallNum::NR_statfs
582            | SyscallNum::NR_symlink
583            | SyscallNum::NR_truncate
584            | SyscallNum::NR_unlink
585            | SyscallNum::NR_utime
586            | SyscallNum::NR_utimes => {
587                log::trace!("Native syscall {} ({})", syscall_name, ctx.args.number);
588
589                let rv = Err(SyscallError::Native);
590
591                log_syscall_simple(
592                    ctx.objs.process,
593                    ctx.objs.process.strace_logging_options(),
594                    ctx.objs.thread.id(),
595                    syscall_name,
596                    "...",
597                    &rv,
598                )
599                .unwrap();
600
601                rv
602            }
603            //
604            // UNSUPPORTED SYSCALL
605            //
606            _ => {
607                log_once_per_value_at_level!(
608                    syscall,
609                    SyscallNum,
610                    log::Level::Warn,
611                    log::Level::Debug,
612                    "Detected unsupported syscall {} ({}) called from thread {} in process {} on host {}",
613                    syscall_name,
614                    ctx.args.number,
615                    ctx.objs.thread.id(),
616                    &*ctx.objs.process.plugin_name(),
617                    ctx.objs.host.name(),
618                );
619
620                let rv = Err(Errno::ENOSYS.into());
621
622                let (syscall_name, syscall_args) = match syscall.to_str() {
623                    // log it in the form "poll(...)"
624                    Some(syscall_name) => (syscall_name, Cow::Borrowed("...")),
625                    // log it in the form "syscall(X, ...)"
626                    None => ("syscall", Cow::Owned(format!("{}, ...", ctx.args.number))),
627                };
628
629                log_syscall_simple(
630                    ctx.objs.process,
631                    ctx.objs.process.strace_logging_options(),
632                    ctx.objs.thread.id(),
633                    syscall_name,
634                    &syscall_args,
635                    &rv,
636                )
637                .unwrap();
638
639                rv
640            }
641        }
642    }
643
644    /// Did the last syscall result in `SyscallError::Blocked`? If called from a syscall handler and
645    /// `is_blocked()` returns `true`, then the current syscall is the same syscall that previously
646    /// blocked. For example, if currently running the `connect` syscall handler and `is_blocked()`
647    /// is `true`, then the previous syscall handler that ran was also `connect` and it returned
648    /// `SyscallError::Blocked`.
649    pub fn is_blocked(&self) -> bool {
650        self.blocked_syscall.is_some()
651    }
652
653    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
654    /// EBADF.
655    fn get_descriptor(
656        descriptor_table: &DescriptorTable,
657        fd: impl TryInto<DescriptorHandle>,
658    ) -> Result<&Descriptor, linux_api::errno::Errno> {
659        // check that fd is within bounds
660        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
661
662        match descriptor_table.get(fd) {
663            Some(desc) => Ok(desc),
664            None => Err(linux_api::errno::Errno::EBADF),
665        }
666    }
667
668    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
669    /// EBADF.
670    fn get_descriptor_mut(
671        descriptor_table: &mut DescriptorTable,
672        fd: impl TryInto<DescriptorHandle>,
673    ) -> Result<&mut Descriptor, linux_api::errno::Errno> {
674        // check that fd is within bounds
675        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
676
677        match descriptor_table.get_mut(fd) {
678            Some(desc) => Ok(desc),
679            None => Err(linux_api::errno::Errno::EBADF),
680        }
681    }
682
683    /// Run a legacy C syscall handler.
684    fn legacy_syscall<T: From<SyscallReg>>(
685        syscall: LegacySyscallFn,
686        ctx: &mut SyscallContext,
687    ) -> Result<T, SyscallError> {
688        let rv: SyscallResult =
689            unsafe { syscall(ctx.handler, std::ptr::from_ref(ctx.args)) }.into();
690
691        // we need to flush pointers here so that the syscall formatter can reliably borrow process
692        // memory without an incompatible borrow
693        if rv.is_err() {
694            // the syscall didn't complete successfully; don't write back pointers
695            log::trace!(
696                "Syscall didn't complete successfully; discarding plugin ptrs without writing back."
697            );
698            ctx.objs.process.free_unsafe_borrows_noflush();
699        } else {
700            ctx.objs
701                .process
702                .free_unsafe_borrows_flush()
703                .expect("flushing syscall ptrs");
704        }
705
706        rv.map(Into::into)
707    }
708}
709
710impl std::ops::Drop for SyscallHandler {
711    fn drop(&mut self) {
712        #[cfg(feature = "perf_timers")]
713        log::debug!(
714            "Handled {} syscalls in {} seconds",
715            self.num_syscalls,
716            self.perf_duration_total.as_secs()
717        );
718        #[cfg(not(feature = "perf_timers"))]
719        log::debug!("Handled {} syscalls", self.num_syscalls);
720
721        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
722            // log the plugin thread specific counts
723            log::debug!(
724                "Thread {} syscall counts: {}",
725                self.thread_id,
726                syscall_counter,
727            );
728
729            // add up the counts at the worker level
730            Worker::add_syscall_counts(syscall_counter);
731        }
732
733        unsafe { c::legacyfile_unref(self.epoll.ptr() as *mut std::ffi::c_void) };
734    }
735}
736
737pub struct SyscallContext<'a, 'b> {
738    pub objs: &'a ThreadContext<'b>,
739    pub args: &'a SyscallArgs,
740    pub handler: &'a mut SyscallHandler,
741}
742
743pub trait SyscallHandlerFn<T> {
744    fn call(self, ctx: &mut SyscallContext) -> SyscallResult;
745}
746
747impl<F, E, T0> SyscallHandlerFn<()> for F
748where
749    F: Fn(&mut SyscallContext) -> Result<T0, E>,
750    E: Into<SyscallError>,
751    T0: Into<SyscallReg>,
752{
753    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
754        self(ctx).map(Into::into).map_err(Into::into)
755    }
756}
757
758impl<F, E, T0, T1> SyscallHandlerFn<(T1,)> for F
759where
760    F: Fn(&mut SyscallContext, T1) -> Result<T0, E>,
761    E: Into<SyscallError>,
762    T0: Into<SyscallReg>,
763    T1: From<SyscallReg>,
764{
765    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
766        self(ctx, ctx.args.get(0).into())
767            .map(Into::into)
768            .map_err(Into::into)
769    }
770}
771
772impl<F, E, T0, T1, T2> SyscallHandlerFn<(T1, T2)> for F
773where
774    F: Fn(&mut SyscallContext, T1, T2) -> Result<T0, E>,
775    E: Into<SyscallError>,
776    T0: Into<SyscallReg>,
777    T1: From<SyscallReg>,
778    T2: From<SyscallReg>,
779{
780    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
781        self(ctx, ctx.args.get(0).into(), ctx.args.get(1).into())
782            .map(Into::into)
783            .map_err(Into::into)
784    }
785}
786
787impl<F, E, T0, T1, T2, T3> SyscallHandlerFn<(T1, T2, T3)> for F
788where
789    F: Fn(&mut SyscallContext, T1, T2, T3) -> Result<T0, E>,
790    E: Into<SyscallError>,
791    T0: Into<SyscallReg>,
792    T1: From<SyscallReg>,
793    T2: From<SyscallReg>,
794    T3: From<SyscallReg>,
795{
796    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
797        self(
798            ctx,
799            ctx.args.get(0).into(),
800            ctx.args.get(1).into(),
801            ctx.args.get(2).into(),
802        )
803        .map(Into::into)
804        .map_err(Into::into)
805    }
806}
807
808impl<F, E, T0, T1, T2, T3, T4> SyscallHandlerFn<(T1, T2, T3, T4)> for F
809where
810    F: Fn(&mut SyscallContext, T1, T2, T3, T4) -> Result<T0, E>,
811    E: Into<SyscallError>,
812    T0: Into<SyscallReg>,
813    T1: From<SyscallReg>,
814    T2: From<SyscallReg>,
815    T3: From<SyscallReg>,
816    T4: From<SyscallReg>,
817{
818    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
819        self(
820            ctx,
821            ctx.args.get(0).into(),
822            ctx.args.get(1).into(),
823            ctx.args.get(2).into(),
824            ctx.args.get(3).into(),
825        )
826        .map(Into::into)
827        .map_err(Into::into)
828    }
829}
830
831impl<F, E, T0, T1, T2, T3, T4, T5> SyscallHandlerFn<(T1, T2, T3, T4, T5)> for F
832where
833    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5) -> Result<T0, E>,
834    E: Into<SyscallError>,
835    T0: Into<SyscallReg>,
836    T1: From<SyscallReg>,
837    T2: From<SyscallReg>,
838    T3: From<SyscallReg>,
839    T4: From<SyscallReg>,
840    T5: From<SyscallReg>,
841{
842    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
843        self(
844            ctx,
845            ctx.args.get(0).into(),
846            ctx.args.get(1).into(),
847            ctx.args.get(2).into(),
848            ctx.args.get(3).into(),
849            ctx.args.get(4).into(),
850        )
851        .map(Into::into)
852        .map_err(Into::into)
853    }
854}
855
856impl<F, E, T0, T1, T2, T3, T4, T5, T6> SyscallHandlerFn<(T1, T2, T3, T4, T5, T6)> for F
857where
858    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5, T6) -> Result<T0, E>,
859    E: Into<SyscallError>,
860    T0: Into<SyscallReg>,
861    T1: From<SyscallReg>,
862    T2: From<SyscallReg>,
863    T3: From<SyscallReg>,
864    T4: From<SyscallReg>,
865    T5: From<SyscallReg>,
866    T6: From<SyscallReg>,
867{
868    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
869        self(
870            ctx,
871            ctx.args.get(0).into(),
872            ctx.args.get(1).into(),
873            ctx.args.get(2).into(),
874            ctx.args.get(3).into(),
875            ctx.args.get(4).into(),
876            ctx.args.get(5).into(),
877        )
878        .map(Into::into)
879        .map_err(Into::into)
880    }
881}
882
883mod export {
884    use crate::host::host::Host;
885    use crate::host::process::Process;
886    use crate::host::thread::Thread;
887
888    use super::*;
889
890    /// Returns a pointer to the current running host. The returned pointer is invalidated the next
891    /// time the worker switches hosts. Rust syscall handlers should get the host from the
892    /// [`SyscallContext`] instead.
893    #[unsafe(no_mangle)]
894    pub extern "C-unwind" fn rustsyscallhandler_getHost(sys: *const SyscallHandler) -> *const Host {
895        let sys = unsafe { sys.as_ref() }.unwrap();
896        Worker::with_active_host(|h| {
897            assert_eq!(h.id(), sys.host_id);
898            std::ptr::from_ref(h)
899        })
900        .unwrap()
901    }
902
903    /// Returns a pointer to the current running process. The returned pointer is invalidated the
904    /// next time the worker switches processes. Rust syscall handlers should get the process from
905    /// the [`SyscallContext`] instead.
906    #[unsafe(no_mangle)]
907    pub extern "C-unwind" fn rustsyscallhandler_getProcess(
908        sys: *const SyscallHandler,
909    ) -> *const Process {
910        let sys = unsafe { sys.as_ref() }.unwrap();
911        Worker::with_active_process(|p| {
912            assert_eq!(p.id(), sys.process_id);
913            std::ptr::from_ref(p)
914        })
915        .unwrap()
916    }
917
918    /// Returns a pointer to the current running thread. The returned pointer is invalidated the
919    /// next time the worker switches threads. Rust syscall handlers should get the thread from the
920    /// [`SyscallContext`] instead.
921    #[unsafe(no_mangle)]
922    pub extern "C-unwind" fn rustsyscallhandler_getThread(
923        sys: *const SyscallHandler,
924    ) -> *const Thread {
925        let sys = unsafe { sys.as_ref() }.unwrap();
926        Worker::with_active_thread(|t| {
927            assert_eq!(t.id(), sys.thread_id);
928            std::ptr::from_ref(t)
929        })
930        .unwrap()
931    }
932
933    #[unsafe(no_mangle)]
934    pub extern "C-unwind" fn rustsyscallhandler_wasBlocked(sys: *const SyscallHandler) -> bool {
935        let sys = unsafe { sys.as_ref() }.unwrap();
936        sys.is_blocked()
937    }
938
939    #[unsafe(no_mangle)]
940    pub extern "C-unwind" fn rustsyscallhandler_didListenTimeoutExpire(
941        sys: *const SyscallHandler,
942    ) -> bool {
943        let sys = unsafe { sys.as_ref() }.unwrap();
944
945        // will be `None` if the syscall condition doesn't exist or there's no timeout
946        let timeout = Worker::with_active_thread(|t| {
947            assert_eq!(t.id(), sys.thread_id);
948            t.syscall_condition().and_then(|x| x.timeout())
949        })
950        .unwrap();
951
952        // true if there is a timeout and it's before or at the current time
953        timeout
954            .map(|timeout| Worker::current_time().unwrap() >= timeout)
955            .unwrap_or(false)
956    }
957
958    #[unsafe(no_mangle)]
959    pub extern "C-unwind" fn rustsyscallhandler_getEpoll(
960        sys: *const SyscallHandler,
961    ) -> *mut c::Epoll {
962        let sys = unsafe { sys.as_ref() }.unwrap();
963        sys.epoll.ptr()
964    }
965}