shadow_rs/host/syscall/handler/
mod.rs

1use std::borrow::Cow;
2
3#[cfg(feature = "perf_timers")]
4use std::time::Duration;
5
6use linux_api::errno::Errno;
7use linux_api::syscall::SyscallNum;
8use shadow_shim_helper_rs::HostId;
9use shadow_shim_helper_rs::shadow_syscalls::ShadowSyscallNum;
10use shadow_shim_helper_rs::simulation_time::SimulationTime;
11use shadow_shim_helper_rs::syscall_types::SyscallArgs;
12use shadow_shim_helper_rs::syscall_types::SyscallReg;
13use shadow_shim_helper_rs::util::SendPointer;
14
15use crate::core::worker::Worker;
16use crate::cshadow as c;
17use crate::host::context::ThreadContext;
18use crate::host::descriptor::Descriptor;
19use crate::host::descriptor::descriptor_table::{DescriptorHandle, DescriptorTable};
20use crate::host::process::ProcessId;
21use crate::host::syscall::formatter::log_syscall_simple;
22use crate::host::syscall::is_shadow_syscall;
23use crate::host::syscall::types::SyscallReturn;
24use crate::host::syscall::types::{SyscallError, SyscallResult};
25use crate::host::thread::ThreadId;
26use crate::utility::counter::Counter;
27
28#[cfg(feature = "perf_timers")]
29use crate::utility::perf_timer::PerfTimer;
30
31mod clone;
32mod close_range;
33mod epoll;
34mod eventfd;
35mod fcntl;
36mod file;
37mod fileat;
38mod futex;
39mod ioctl;
40mod mman;
41mod poll;
42mod prctl;
43mod random;
44mod resource;
45mod sched;
46mod select;
47mod shadow;
48mod signal;
49mod socket;
50mod stat;
51mod sysinfo;
52mod time;
53mod timerfd;
54mod uio;
55mod unistd;
56mod wait;
57
58type LegacySyscallFn =
59    unsafe extern "C-unwind" fn(*mut SyscallHandler, *const SyscallArgs) -> SyscallReturn;
60
61// Will eventually contain syscall handler state once migrated from the c handler
62pub struct SyscallHandler {
63    /// The host that this `SyscallHandler` belongs to. Intended to be used for logging.
64    host_id: HostId,
65    /// The process that this `SyscallHandler` belongs to. Intended to be used for logging.
66    process_id: ProcessId,
67    /// The thread that this `SyscallHandler` belongs to. Intended to be used for logging.
68    thread_id: ThreadId,
69    /// The total number of syscalls that we have handled.
70    num_syscalls: u64,
71    /// A counter for individual syscalls.
72    syscall_counter: Option<Counter>,
73    /// If we are currently blocking a specific syscall, i.e., waiting for a socket to be
74    /// readable/writable or waiting for a timeout, the syscall number of that function is stored
75    /// here. Will be `None` if a syscall is not currently blocked.
76    blocked_syscall: Option<SyscallNum>,
77    /// In some cases the syscall handler completes, but we block the caller anyway to move time
78    /// forward. This stores the result of the completed syscall, to be returned when the caller
79    /// resumes.
80    pending_result: Option<SyscallResult>,
81    /// We use this epoll to service syscalls that need to block on the status of multiple
82    /// descriptors, like poll.
83    epoll: SendPointer<c::Epoll>,
84    /// The cumulative time consumed while handling the current syscall. This includes the time from
85    /// previous calls that ended up blocking.
86    #[cfg(feature = "perf_timers")]
87    perf_duration_current: Duration,
88    /// The total time elapsed while handling all syscalls.
89    #[cfg(feature = "perf_timers")]
90    perf_duration_total: Duration,
91}
92
93impl SyscallHandler {
94    pub fn new(
95        host_id: HostId,
96        process_id: ProcessId,
97        thread_id: ThreadId,
98        count_syscalls: bool,
99    ) -> SyscallHandler {
100        SyscallHandler {
101            host_id,
102            process_id,
103            thread_id,
104            num_syscalls: 0,
105            syscall_counter: count_syscalls.then(Counter::new),
106            blocked_syscall: None,
107            pending_result: None,
108            epoll: unsafe { SendPointer::new(c::epoll_new()) },
109            #[cfg(feature = "perf_timers")]
110            perf_duration_current: Duration::ZERO,
111            #[cfg(feature = "perf_timers")]
112            perf_duration_total: Duration::ZERO,
113        }
114    }
115
116    pub fn syscall(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
117        // it wouldn't make sense if we were given a different host, process, and thread
118        assert_eq!(ctx.host.id(), self.host_id);
119        assert_eq!(ctx.process.id(), self.process_id);
120        assert_eq!(ctx.thread.id(), self.thread_id);
121
122        let syscall = SyscallNum::new(args.number.try_into().unwrap());
123        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
124
125        // make sure that we either don't have a blocked syscall, or if we blocked a syscall, then
126        // that same syscall should be executed again when it becomes unblocked
127        if let Some(blocked_syscall) = self.blocked_syscall {
128            if blocked_syscall != syscall {
129                panic!(
130                    "We blocked syscall {blocked_syscall} but syscall {syscall} is unexpectedly being invoked"
131                );
132            }
133        }
134
135        // were we previously blocked on this same syscall?
136        let was_blocked = self.blocked_syscall.is_some();
137
138        if let Some(pending_result) = self.pending_result.take() {
139            // The syscall was already completed, but we delayed the response to yield the CPU.
140            // Return that response now.
141            log::trace!("Returning delayed result");
142            assert!(!matches!(pending_result, Err(SyscallError::Blocked(_))));
143
144            self.blocked_syscall = None;
145            self.pending_result = None;
146
147            return pending_result;
148        }
149
150        log::trace!(
151            "SYSCALL_HANDLER_PRE: {} ({}){} — ({}, tid={})",
152            syscall_name,
153            args.number,
154            if was_blocked {
155                " (previously BLOCKed)"
156            } else {
157                ""
158            },
159            &*ctx.process.name(),
160            ctx.thread.id(),
161        );
162
163        // Count the frequency of each syscall, but only on the initial call. This avoids double
164        // counting in the case where the initial call blocked at first, but then later became
165        // unblocked and is now being handled again here.
166        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
167            if !was_blocked {
168                syscall_counter.add_one(syscall_name);
169            }
170        }
171
172        #[cfg(feature = "perf_timers")]
173        let timer = PerfTimer::new_started();
174
175        let mut rv = self.run_handler(ctx, args);
176
177        #[cfg(feature = "perf_timers")]
178        {
179            // add the cumulative elapsed seconds
180            self.perf_duration_current += timer.elapsed();
181
182            log::debug!(
183                "Handling syscall {} ({}) took cumulative {} ms",
184                syscall_name,
185                args.number,
186                self.perf_duration_current.as_millis(),
187            );
188        }
189
190        if !matches!(rv, Err(SyscallError::Blocked(_))) {
191            // the syscall completed, count it and the cumulative time to complete it
192            self.num_syscalls += 1;
193
194            #[cfg(feature = "perf_timers")]
195            {
196                self.perf_duration_total += self.perf_duration_current;
197                self.perf_duration_current = Duration::ZERO;
198            }
199        }
200
201        if log::log_enabled!(log::Level::Trace) {
202            let rv_formatted = match &rv {
203                Ok(reg) => format!("{}", i64::from(*reg)),
204                Err(SyscallError::Failed(failed)) => {
205                    let errno = failed.errno;
206                    format!("{} ({errno})", errno.to_negated_i64())
207                }
208                Err(SyscallError::Native) => "<native>".to_string(),
209                Err(SyscallError::Blocked(_)) => "<blocked>".to_string(),
210            };
211
212            log::trace!(
213                "SYSCALL_HANDLER_POST: {} ({}) result {}{} — ({}, tid={})",
214                syscall_name,
215                args.number,
216                if was_blocked { "BLOCK -> " } else { "" },
217                rv_formatted,
218                &*ctx.process.name(),
219                ctx.thread.id(),
220            );
221        }
222
223        // If the syscall would be blocked, but there's a signal pending, fail with
224        // EINTR instead. The shim-side code will run the signal handlers and then
225        // either return the EINTR or restart the syscall (See SA_RESTART in
226        // signal(7)).
227        //
228        // We do this check *after* (not before) trying the syscall so that we don't
229        // "interrupt" a syscall that wouldn't have blocked in the first place, or
230        // that can return a "partial" result when interrupted. e.g. consider the
231        // sequence:
232        //
233        // * Thread is blocked on reading a file descriptor.
234        // * The read becomes ready and the thread is scheduled to run.
235        // * The thread receives an unblocked signal.
236        // * The thread runs again.
237        //
238        // In this scenario, the `read` call should be allowed to complete successfully.
239        // from signal(7):  "If an I/O call on a slow device has already transferred
240        // some data by the time it is interrupted by a signal handler, then the
241        // call will return a success  status  (normally,  the  number of bytes
242        // transferred)."
243
244        if let Err(SyscallError::Blocked(ref blocked)) = rv {
245            // the syscall wants to block, but is there a signal pending?
246            let is_unblocked_signal_pending = ctx
247                .thread
248                .unblocked_signal_pending(ctx.process, &ctx.host.shim_shmem_lock_borrow().unwrap());
249
250            if is_unblocked_signal_pending {
251                // return EINTR instead
252                rv = Err(SyscallError::new_interrupted(blocked.restartable));
253            }
254        }
255
256        // we only use unsafe borrows from C code, and we should have only called into C syscall
257        // handlers through `Self::legacy_syscall` which should have already flushed the pointers,
258        // but we may as well do it again here just to be safe
259        if rv.is_err() {
260            // the syscall didn't complete successfully; don't write back pointers
261            log::trace!(
262                "Syscall didn't complete successfully; discarding plugin ptrs without writing back"
263            );
264            ctx.process.free_unsafe_borrows_noflush();
265        } else {
266            ctx.process
267                .free_unsafe_borrows_flush()
268                .expect("flushing syscall ptrs");
269        }
270
271        if ctx.process.is_running() && !matches!(rv, Err(SyscallError::Blocked(_))) {
272            let host_shmem = ctx.host.shim_shmem();
273            let mut host_shmem_prot = ctx.host.shim_shmem_lock_borrow_mut().unwrap();
274
275            // increment unblocked syscall latency, but only for non-shadow-syscalls, since the
276            // latter are part of Shadow's internal plumbing; they shouldn't necessarily "consume"
277            // time
278            if ctx.host.shim_shmem().model_unblocked_syscall_latency && !is_shadow_syscall(syscall)
279            {
280                host_shmem_prot.unapplied_cpu_latency += host_shmem.unblocked_syscall_latency;
281            }
282
283            log::trace!(
284                "Unapplied CPU latency amt={}ns max={}ns",
285                host_shmem_prot.unapplied_cpu_latency.as_nanos(),
286                host_shmem.max_unapplied_cpu_latency.as_nanos()
287            );
288
289            if host_shmem_prot.unapplied_cpu_latency > host_shmem.max_unapplied_cpu_latency {
290                let new_time = Worker::current_time().unwrap()
291                    + core::mem::replace(
292                        &mut host_shmem_prot.unapplied_cpu_latency,
293                        SimulationTime::ZERO,
294                    );
295                if new_time <= Worker::max_event_runahead_time(ctx.host) {
296                    // The new time is early enough that we can safely just increment to that time.
297                    // i.e. there are no threads or other events scheduled to
298                    // run on this worker before `new_time`.
299                    log::trace!(
300                        "Reached max-unapplied-cpu-latency, but not max runahead; Incrementing time"
301                    );
302                    Worker::set_current_time(new_time);
303                } else {
304                    // We can't safely increment to the new time, e.g. because
305                    // there are other events or the end of the current
306                    // scheduler round scheduled to happen first.  Reschedule
307                    // the current thread to run at the new time instead of
308                    // incrementing it.
309                    log::trace!(
310                        "Reached max-unapplied-cpu-latency, and max runahead; Rescheduling"
311                    );
312
313                    // Save the syscall result so that we can return it later
314                    // instead of re-executing the syscall.
315                    assert!(self.pending_result.is_none());
316                    self.pending_result = Some(rv);
317
318                    rv = Err(SyscallError::new_blocked_until(new_time, false));
319                }
320            }
321        }
322
323        if matches!(rv, Err(SyscallError::Blocked(_))) {
324            // we are blocking: store the syscall number so we know to expect the same syscall again
325            // when it unblocks
326            self.blocked_syscall = Some(syscall);
327        } else {
328            self.blocked_syscall = None;
329        }
330
331        rv
332    }
333
334    #[allow(non_upper_case_globals)]
335    fn run_handler(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
336        let mut ctx = SyscallContext {
337            objs: ctx,
338            args,
339            handler: self,
340        };
341
342        let syscall = SyscallNum::new(ctx.args.number.try_into().unwrap());
343        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
344
345        macro_rules! handle {
346            ($f:ident) => {{
347                let rv = SyscallHandlerFn::call(Self::$f, &mut ctx);
348
349                // log the syscall if enabled
350                if let Some(strace_fmt_options) = ctx.objs.process.strace_logging_options() {
351                    ctx.objs.process.with_strace_file(|file| {
352                        crate::utility::macros::SyscallLogger::$f(
353                            file,
354                            ctx.args.args,
355                            &rv,
356                            strace_fmt_options,
357                            ctx.objs.thread.id(),
358                            &*ctx.objs.process.memory_borrow(),
359                        )
360                        .unwrap();
361                    });
362                }
363
364                rv
365            }};
366        }
367
368        match syscall {
369            // SHADOW-HANDLED SYSCALLS
370            //
371            SyscallNum::NR_accept => handle!(accept),
372            SyscallNum::NR_accept4 => handle!(accept4),
373            SyscallNum::NR_arch_prctl => handle!(arch_prctl),
374            SyscallNum::NR_alarm => handle!(alarm),
375            SyscallNum::NR_bind => handle!(bind),
376            SyscallNum::NR_brk => handle!(brk),
377            SyscallNum::NR_capget => handle!(capget),
378            SyscallNum::NR_capset => handle!(capset),
379            SyscallNum::NR_chdir => handle!(chdir),
380            SyscallNum::NR_clock_getres => handle!(clock_getres),
381            SyscallNum::NR_clock_nanosleep => handle!(clock_nanosleep),
382            SyscallNum::NR_clone => handle!(clone),
383            SyscallNum::NR_clone3 => handle!(clone3),
384            SyscallNum::NR_close => handle!(close),
385            SyscallNum::NR_close_range => handle!(close_range),
386            SyscallNum::NR_connect => handle!(connect),
387            SyscallNum::NR_creat => handle!(creat),
388            SyscallNum::NR_dup => handle!(dup),
389            SyscallNum::NR_dup2 => handle!(dup2),
390            SyscallNum::NR_dup3 => handle!(dup3),
391            SyscallNum::NR_epoll_create => handle!(epoll_create),
392            SyscallNum::NR_epoll_create1 => handle!(epoll_create1),
393            SyscallNum::NR_epoll_ctl => handle!(epoll_ctl),
394            SyscallNum::NR_epoll_pwait => handle!(epoll_pwait),
395            SyscallNum::NR_epoll_pwait2 => handle!(epoll_pwait2),
396            SyscallNum::NR_epoll_wait => handle!(epoll_wait),
397            SyscallNum::NR_eventfd => handle!(eventfd),
398            SyscallNum::NR_eventfd2 => handle!(eventfd2),
399            SyscallNum::NR_execve => handle!(execve),
400            SyscallNum::NR_execveat => handle!(execveat),
401            SyscallNum::NR_exit_group => handle!(exit_group),
402            SyscallNum::NR_faccessat => handle!(faccessat),
403            SyscallNum::NR_faccessat2 => handle!(faccessat2),
404            SyscallNum::NR_fadvise64 => handle!(fadvise64),
405            SyscallNum::NR_fallocate => handle!(fallocate),
406            SyscallNum::NR_fchmod => handle!(fchmod),
407            SyscallNum::NR_fchmodat => handle!(fchmodat),
408            SyscallNum::NR_fchmodat2 => handle!(fchmodat2),
409            SyscallNum::NR_fchown => handle!(fchown),
410            SyscallNum::NR_fchownat => handle!(fchownat),
411            SyscallNum::NR_fcntl => handle!(fcntl),
412            SyscallNum::NR_fdatasync => handle!(fdatasync),
413            SyscallNum::NR_fgetxattr => handle!(fgetxattr),
414            SyscallNum::NR_flistxattr => handle!(flistxattr),
415            SyscallNum::NR_flock => handle!(flock),
416            SyscallNum::NR_fork => handle!(fork),
417            SyscallNum::NR_fremovexattr => handle!(fremovexattr),
418            SyscallNum::NR_fsetxattr => handle!(fsetxattr),
419            SyscallNum::NR_fstat => handle!(fstat),
420            SyscallNum::NR_fstatfs => handle!(fstatfs),
421            SyscallNum::NR_fsync => handle!(fsync),
422            SyscallNum::NR_ftruncate => handle!(ftruncate),
423            SyscallNum::NR_futex => handle!(futex),
424            SyscallNum::NR_futimesat => handle!(futimesat),
425            SyscallNum::NR_get_robust_list => handle!(get_robust_list),
426            SyscallNum::NR_getdents => handle!(getdents),
427            SyscallNum::NR_getdents64 => handle!(getdents64),
428            SyscallNum::NR_getitimer => handle!(getitimer),
429            SyscallNum::NR_getpeername => handle!(getpeername),
430            SyscallNum::NR_getpgid => handle!(getpgid),
431            SyscallNum::NR_getpgrp => handle!(getpgrp),
432            SyscallNum::NR_getpid => handle!(getpid),
433            SyscallNum::NR_getppid => handle!(getppid),
434            SyscallNum::NR_getrandom => handle!(getrandom),
435            SyscallNum::NR_getsid => handle!(getsid),
436            SyscallNum::NR_getsockname => handle!(getsockname),
437            SyscallNum::NR_getsockopt => handle!(getsockopt),
438            SyscallNum::NR_gettid => handle!(gettid),
439            SyscallNum::NR_ioctl => handle!(ioctl),
440            SyscallNum::NR_kill => handle!(kill),
441            SyscallNum::NR_linkat => handle!(linkat),
442            SyscallNum::NR_listen => handle!(listen),
443            SyscallNum::NR_lseek => handle!(lseek),
444            SyscallNum::NR_mkdirat => handle!(mkdirat),
445            SyscallNum::NR_mknodat => handle!(mknodat),
446            SyscallNum::NR_mmap => handle!(mmap),
447            SyscallNum::NR_mprotect => handle!(mprotect),
448            SyscallNum::NR_mremap => handle!(mremap),
449            SyscallNum::NR_munmap => handle!(munmap),
450            SyscallNum::NR_nanosleep => handle!(nanosleep),
451            SyscallNum::NR_newfstatat => handle!(newfstatat),
452            SyscallNum::NR_open => handle!(open),
453            SyscallNum::NR_openat => handle!(openat),
454            SyscallNum::NR_pipe => handle!(pipe),
455            SyscallNum::NR_pipe2 => handle!(pipe2),
456            SyscallNum::NR_poll => handle!(poll),
457            SyscallNum::NR_ppoll => handle!(ppoll),
458            SyscallNum::NR_prctl => handle!(prctl),
459            SyscallNum::NR_pread64 => handle!(pread64),
460            SyscallNum::NR_preadv => handle!(preadv),
461            SyscallNum::NR_preadv2 => handle!(preadv2),
462            SyscallNum::NR_prlimit64 => handle!(prlimit64),
463            SyscallNum::NR_pselect6 => handle!(pselect6),
464            SyscallNum::NR_pwrite64 => handle!(pwrite64),
465            SyscallNum::NR_pwritev => handle!(pwritev),
466            SyscallNum::NR_pwritev2 => handle!(pwritev2),
467            SyscallNum::NR_read => handle!(read),
468            SyscallNum::NR_readahead => handle!(readahead),
469            SyscallNum::NR_readlinkat => handle!(readlinkat),
470            SyscallNum::NR_readv => handle!(readv),
471            SyscallNum::NR_recvfrom => handle!(recvfrom),
472            SyscallNum::NR_recvmsg => handle!(recvmsg),
473            SyscallNum::NR_renameat => handle!(renameat),
474            SyscallNum::NR_renameat2 => handle!(renameat2),
475            SyscallNum::NR_rseq => handle!(rseq),
476            SyscallNum::NR_rt_sigaction => handle!(rt_sigaction),
477            SyscallNum::NR_rt_sigprocmask => handle!(rt_sigprocmask),
478            SyscallNum::NR_sched_getaffinity => handle!(sched_getaffinity),
479            SyscallNum::NR_sched_setaffinity => handle!(sched_setaffinity),
480            SyscallNum::NR_select => handle!(select),
481            SyscallNum::NR_sendmsg => handle!(sendmsg),
482            SyscallNum::NR_sendto => handle!(sendto),
483            SyscallNum::NR_set_robust_list => handle!(set_robust_list),
484            SyscallNum::NR_set_tid_address => handle!(set_tid_address),
485            SyscallNum::NR_setitimer => handle!(setitimer),
486            SyscallNum::NR_setpgid => handle!(setpgid),
487            SyscallNum::NR_setsid => handle!(setsid),
488            SyscallNum::NR_setsockopt => handle!(setsockopt),
489            SyscallNum::NR_shutdown => handle!(shutdown),
490            SyscallNum::NR_sigaltstack => handle!(sigaltstack),
491            SyscallNum::NR_socket => handle!(socket),
492            SyscallNum::NR_socketpair => handle!(socketpair),
493            SyscallNum::NR_statx => handle!(statx),
494            SyscallNum::NR_symlinkat => handle!(symlinkat),
495            SyscallNum::NR_sync_file_range => handle!(sync_file_range),
496            SyscallNum::NR_syncfs => handle!(syncfs),
497            SyscallNum::NR_sysinfo => handle!(sysinfo),
498            SyscallNum::NR_tgkill => handle!(tgkill),
499            SyscallNum::NR_timerfd_create => handle!(timerfd_create),
500            SyscallNum::NR_timerfd_gettime => handle!(timerfd_gettime),
501            SyscallNum::NR_timerfd_settime => handle!(timerfd_settime),
502            SyscallNum::NR_tkill => handle!(tkill),
503            SyscallNum::NR_uname => handle!(uname),
504            SyscallNum::NR_unlinkat => handle!(unlinkat),
505            SyscallNum::NR_utimensat => handle!(utimensat),
506            SyscallNum::NR_vfork => handle!(vfork),
507            SyscallNum::NR_waitid => handle!(waitid),
508            SyscallNum::NR_wait4 => handle!(wait4),
509            SyscallNum::NR_write => handle!(write),
510            SyscallNum::NR_writev => handle!(writev),
511            //
512            // CUSTOM SHADOW-SPECIFIC SYSCALLS
513            //
514            x if ShadowSyscallNum::try_from(x).is_ok() => {
515                match ShadowSyscallNum::try_from(x).expect("Conversion just succeeded above") {
516                    ShadowSyscallNum::hostname_to_addr_ipv4 => {
517                        handle!(shadow_hostname_to_addr_ipv4)
518                    }
519                    ShadowSyscallNum::init_memory_manager => {
520                        handle!(shadow_init_memory_manager)
521                    }
522                    ShadowSyscallNum::shadow_yield => handle!(shadow_yield),
523                }
524            }
525            //
526            // SHIM-ONLY SYSCALLS
527            //
528            SyscallNum::NR_clock_gettime
529            | SyscallNum::NR_gettimeofday
530            | SyscallNum::NR_sched_yield
531            | SyscallNum::NR_time => {
532                panic!(
533                    "Syscall {} ({}) should have been handled in the shim",
534                    syscall_name, ctx.args.number,
535                )
536            }
537            //
538            // NATIVE LINUX-HANDLED SYSCALLS
539            //
540            SyscallNum::NR_access
541            | SyscallNum::NR_chmod
542            | SyscallNum::NR_chown
543            | SyscallNum::NR_exit
544            | SyscallNum::NR_getcwd
545            | SyscallNum::NR_geteuid
546            | SyscallNum::NR_getegid
547            | SyscallNum::NR_getgid
548            | SyscallNum::NR_getgroups
549            | SyscallNum::NR_getresgid
550            | SyscallNum::NR_getresuid
551            | SyscallNum::NR_getrlimit
552            | SyscallNum::NR_getuid
553            | SyscallNum::NR_getxattr
554            | SyscallNum::NR_lchown
555            | SyscallNum::NR_lgetxattr
556            | SyscallNum::NR_link
557            | SyscallNum::NR_listxattr
558            | SyscallNum::NR_llistxattr
559            | SyscallNum::NR_lremovexattr
560            | SyscallNum::NR_lsetxattr
561            | SyscallNum::NR_lstat
562            | SyscallNum::NR_madvise
563            | SyscallNum::NR_mkdir
564            | SyscallNum::NR_mknod
565            | SyscallNum::NR_readlink
566            | SyscallNum::NR_removexattr
567            | SyscallNum::NR_rename
568            | SyscallNum::NR_rmdir
569            | SyscallNum::NR_rt_sigreturn
570            | SyscallNum::NR_setfsgid
571            | SyscallNum::NR_setfsuid
572            | SyscallNum::NR_setgid
573            | SyscallNum::NR_setregid
574            | SyscallNum::NR_setresgid
575            | SyscallNum::NR_setresuid
576            | SyscallNum::NR_setreuid
577            | SyscallNum::NR_setrlimit
578            | SyscallNum::NR_setuid
579            | SyscallNum::NR_setxattr
580            | SyscallNum::NR_stat
581            | SyscallNum::NR_statfs
582            | SyscallNum::NR_symlink
583            | SyscallNum::NR_truncate
584            | SyscallNum::NR_unlink
585            | SyscallNum::NR_utime
586            | SyscallNum::NR_utimes => {
587                log::trace!("Native syscall {} ({})", syscall_name, ctx.args.number);
588
589                let rv = Err(SyscallError::Native);
590
591                log_syscall_simple(
592                    ctx.objs.process,
593                    ctx.objs.process.strace_logging_options(),
594                    ctx.objs.thread.id(),
595                    syscall_name,
596                    "...",
597                    &rv,
598                )
599                .unwrap();
600
601                rv
602            }
603            //
604            // UNSUPPORTED SYSCALL
605            //
606            _ => {
607                log_once_per_value_at_level!(
608                    syscall,
609                    SyscallNum,
610                    log::Level::Warn,
611                    log::Level::Debug,
612                    "Detected unsupported syscall {} ({}) called from thread {} in process {} on host {}",
613                    syscall_name,
614                    ctx.args.number,
615                    ctx.objs.thread.id(),
616                    &*ctx.objs.process.plugin_name(),
617                    ctx.objs.host.name(),
618                );
619
620                let rv = Err(Errno::ENOSYS.into());
621
622                let (syscall_name, syscall_args) = match syscall.to_str() {
623                    // log it in the form "poll(...)"
624                    Some(syscall_name) => (syscall_name, Cow::Borrowed("...")),
625                    // log it in the form "syscall(X, ...)"
626                    None => ("syscall", Cow::Owned(format!("{}, ...", ctx.args.number))),
627                };
628
629                log_syscall_simple(
630                    ctx.objs.process,
631                    ctx.objs.process.strace_logging_options(),
632                    ctx.objs.thread.id(),
633                    syscall_name,
634                    &syscall_args,
635                    &rv,
636                )
637                .unwrap();
638
639                rv
640            }
641        }
642    }
643
644    /// Did the last syscall result in `SyscallError::Blocked`? If called from a syscall handler and
645    /// `is_blocked()` returns `true`, then the current syscall is the same syscall that previously
646    /// blocked. For example, if currently running the `connect` syscall handler and `is_blocked()`
647    /// is `true`, then the previous syscall handler that ran was also `connect` and it returned
648    /// `SyscallError::Blocked`.
649    pub fn is_blocked(&self) -> bool {
650        self.blocked_syscall.is_some()
651    }
652
653    pub fn did_listen_timeout_expire(&self) -> bool {
654        // will be `None` if the syscall condition doesn't exist or there's no timeout
655        let timeout = Worker::with_active_thread(|t| {
656            assert_eq!(t.id(), self.thread_id);
657            t.syscall_condition().and_then(|x| x.timeout())
658        })
659        .unwrap();
660
661        // true if there is a timeout and it's before or at the current time
662        timeout
663            .map(|timeout| Worker::current_time().unwrap() >= timeout)
664            .unwrap_or(false)
665    }
666
667    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
668    /// EBADF.
669    fn get_descriptor(
670        descriptor_table: &DescriptorTable,
671        fd: impl TryInto<DescriptorHandle>,
672    ) -> Result<&Descriptor, linux_api::errno::Errno> {
673        // check that fd is within bounds
674        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
675
676        match descriptor_table.get(fd) {
677            Some(desc) => Ok(desc),
678            None => Err(linux_api::errno::Errno::EBADF),
679        }
680    }
681
682    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
683    /// EBADF.
684    fn get_descriptor_mut(
685        descriptor_table: &mut DescriptorTable,
686        fd: impl TryInto<DescriptorHandle>,
687    ) -> Result<&mut Descriptor, linux_api::errno::Errno> {
688        // check that fd is within bounds
689        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
690
691        match descriptor_table.get_mut(fd) {
692            Some(desc) => Ok(desc),
693            None => Err(linux_api::errno::Errno::EBADF),
694        }
695    }
696
697    /// Run a legacy C syscall handler.
698    fn legacy_syscall<T: From<SyscallReg>>(
699        syscall: LegacySyscallFn,
700        ctx: &mut SyscallContext,
701    ) -> Result<T, SyscallError> {
702        let rv: SyscallResult =
703            unsafe { syscall(ctx.handler, std::ptr::from_ref(ctx.args)) }.into();
704
705        // we need to flush pointers here so that the syscall formatter can reliably borrow process
706        // memory without an incompatible borrow
707        if rv.is_err() {
708            // the syscall didn't complete successfully; don't write back pointers
709            log::trace!(
710                "Syscall didn't complete successfully; discarding plugin ptrs without writing back."
711            );
712            ctx.objs.process.free_unsafe_borrows_noflush();
713        } else {
714            ctx.objs
715                .process
716                .free_unsafe_borrows_flush()
717                .expect("flushing syscall ptrs");
718        }
719
720        rv.map(Into::into)
721    }
722}
723
724impl std::ops::Drop for SyscallHandler {
725    fn drop(&mut self) {
726        #[cfg(feature = "perf_timers")]
727        log::debug!(
728            "Handled {} syscalls in {} seconds",
729            self.num_syscalls,
730            self.perf_duration_total.as_secs()
731        );
732        #[cfg(not(feature = "perf_timers"))]
733        log::debug!("Handled {} syscalls", self.num_syscalls);
734
735        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
736            // log the plugin thread specific counts
737            log::debug!(
738                "Thread {} syscall counts: {}",
739                self.thread_id,
740                syscall_counter,
741            );
742
743            // add up the counts at the worker level
744            Worker::add_syscall_counts(syscall_counter);
745        }
746
747        unsafe { c::legacyfile_unref(self.epoll.ptr() as *mut std::ffi::c_void) };
748    }
749}
750
751pub struct SyscallContext<'a, 'b> {
752    pub objs: &'a ThreadContext<'b>,
753    pub args: &'a SyscallArgs,
754    pub handler: &'a mut SyscallHandler,
755}
756
757pub trait SyscallHandlerFn<T> {
758    fn call(self, ctx: &mut SyscallContext) -> SyscallResult;
759}
760
761impl<F, E, T0> SyscallHandlerFn<()> for F
762where
763    F: Fn(&mut SyscallContext) -> Result<T0, E>,
764    E: Into<SyscallError>,
765    T0: Into<SyscallReg>,
766{
767    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
768        self(ctx).map(Into::into).map_err(Into::into)
769    }
770}
771
772impl<F, E, T0, T1> SyscallHandlerFn<(T1,)> for F
773where
774    F: Fn(&mut SyscallContext, T1) -> Result<T0, E>,
775    E: Into<SyscallError>,
776    T0: Into<SyscallReg>,
777    T1: From<SyscallReg>,
778{
779    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
780        self(ctx, ctx.args.get(0).into())
781            .map(Into::into)
782            .map_err(Into::into)
783    }
784}
785
786impl<F, E, T0, T1, T2> SyscallHandlerFn<(T1, T2)> for F
787where
788    F: Fn(&mut SyscallContext, T1, T2) -> Result<T0, E>,
789    E: Into<SyscallError>,
790    T0: Into<SyscallReg>,
791    T1: From<SyscallReg>,
792    T2: From<SyscallReg>,
793{
794    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
795        self(ctx, ctx.args.get(0).into(), ctx.args.get(1).into())
796            .map(Into::into)
797            .map_err(Into::into)
798    }
799}
800
801impl<F, E, T0, T1, T2, T3> SyscallHandlerFn<(T1, T2, T3)> for F
802where
803    F: Fn(&mut SyscallContext, T1, T2, T3) -> Result<T0, E>,
804    E: Into<SyscallError>,
805    T0: Into<SyscallReg>,
806    T1: From<SyscallReg>,
807    T2: From<SyscallReg>,
808    T3: From<SyscallReg>,
809{
810    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
811        self(
812            ctx,
813            ctx.args.get(0).into(),
814            ctx.args.get(1).into(),
815            ctx.args.get(2).into(),
816        )
817        .map(Into::into)
818        .map_err(Into::into)
819    }
820}
821
822impl<F, E, T0, T1, T2, T3, T4> SyscallHandlerFn<(T1, T2, T3, T4)> for F
823where
824    F: Fn(&mut SyscallContext, T1, T2, T3, T4) -> Result<T0, E>,
825    E: Into<SyscallError>,
826    T0: Into<SyscallReg>,
827    T1: From<SyscallReg>,
828    T2: From<SyscallReg>,
829    T3: From<SyscallReg>,
830    T4: From<SyscallReg>,
831{
832    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
833        self(
834            ctx,
835            ctx.args.get(0).into(),
836            ctx.args.get(1).into(),
837            ctx.args.get(2).into(),
838            ctx.args.get(3).into(),
839        )
840        .map(Into::into)
841        .map_err(Into::into)
842    }
843}
844
845impl<F, E, T0, T1, T2, T3, T4, T5> SyscallHandlerFn<(T1, T2, T3, T4, T5)> for F
846where
847    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5) -> Result<T0, E>,
848    E: Into<SyscallError>,
849    T0: Into<SyscallReg>,
850    T1: From<SyscallReg>,
851    T2: From<SyscallReg>,
852    T3: From<SyscallReg>,
853    T4: From<SyscallReg>,
854    T5: From<SyscallReg>,
855{
856    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
857        self(
858            ctx,
859            ctx.args.get(0).into(),
860            ctx.args.get(1).into(),
861            ctx.args.get(2).into(),
862            ctx.args.get(3).into(),
863            ctx.args.get(4).into(),
864        )
865        .map(Into::into)
866        .map_err(Into::into)
867    }
868}
869
870impl<F, E, T0, T1, T2, T3, T4, T5, T6> SyscallHandlerFn<(T1, T2, T3, T4, T5, T6)> for F
871where
872    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5, T6) -> Result<T0, E>,
873    E: Into<SyscallError>,
874    T0: Into<SyscallReg>,
875    T1: From<SyscallReg>,
876    T2: From<SyscallReg>,
877    T3: From<SyscallReg>,
878    T4: From<SyscallReg>,
879    T5: From<SyscallReg>,
880    T6: From<SyscallReg>,
881{
882    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
883        self(
884            ctx,
885            ctx.args.get(0).into(),
886            ctx.args.get(1).into(),
887            ctx.args.get(2).into(),
888            ctx.args.get(3).into(),
889            ctx.args.get(4).into(),
890            ctx.args.get(5).into(),
891        )
892        .map(Into::into)
893        .map_err(Into::into)
894    }
895}
896
897mod export {
898    use crate::host::host::Host;
899    use crate::host::process::Process;
900    use crate::host::thread::Thread;
901
902    use super::*;
903
904    /// Returns a pointer to the current running host. The returned pointer is invalidated the next
905    /// time the worker switches hosts. Rust syscall handlers should get the host from the
906    /// [`SyscallContext`] instead.
907    #[unsafe(no_mangle)]
908    pub extern "C-unwind" fn rustsyscallhandler_getHost(sys: *const SyscallHandler) -> *const Host {
909        let sys = unsafe { sys.as_ref() }.unwrap();
910        Worker::with_active_host(|h| {
911            assert_eq!(h.id(), sys.host_id);
912            std::ptr::from_ref(h)
913        })
914        .unwrap()
915    }
916
917    /// Returns a pointer to the current running process. The returned pointer is invalidated the
918    /// next time the worker switches processes. Rust syscall handlers should get the process from
919    /// the [`SyscallContext`] instead.
920    #[unsafe(no_mangle)]
921    pub extern "C-unwind" fn rustsyscallhandler_getProcess(
922        sys: *const SyscallHandler,
923    ) -> *const Process {
924        let sys = unsafe { sys.as_ref() }.unwrap();
925        Worker::with_active_process(|p| {
926            assert_eq!(p.id(), sys.process_id);
927            std::ptr::from_ref(p)
928        })
929        .unwrap()
930    }
931
932    /// Returns a pointer to the current running thread. The returned pointer is invalidated the
933    /// next time the worker switches threads. Rust syscall handlers should get the thread from the
934    /// [`SyscallContext`] instead.
935    #[unsafe(no_mangle)]
936    pub extern "C-unwind" fn rustsyscallhandler_getThread(
937        sys: *const SyscallHandler,
938    ) -> *const Thread {
939        let sys = unsafe { sys.as_ref() }.unwrap();
940        Worker::with_active_thread(|t| {
941            assert_eq!(t.id(), sys.thread_id);
942            std::ptr::from_ref(t)
943        })
944        .unwrap()
945    }
946
947    #[unsafe(no_mangle)]
948    pub extern "C-unwind" fn rustsyscallhandler_wasBlocked(sys: *const SyscallHandler) -> bool {
949        let sys = unsafe { sys.as_ref() }.unwrap();
950        sys.is_blocked()
951    }
952
953    #[unsafe(no_mangle)]
954    pub extern "C-unwind" fn rustsyscallhandler_didListenTimeoutExpire(
955        sys: *const SyscallHandler,
956    ) -> bool {
957        let sys = unsafe { sys.as_ref() }.unwrap();
958        sys.did_listen_timeout_expire()
959    }
960
961    #[unsafe(no_mangle)]
962    pub extern "C-unwind" fn rustsyscallhandler_getEpoll(
963        sys: *const SyscallHandler,
964    ) -> *mut c::Epoll {
965        let sys = unsafe { sys.as_ref() }.unwrap();
966        sys.epoll.ptr()
967    }
968}