Skip to main content

shadow_rs/host/syscall/handler/
mod.rs

1use std::borrow::Cow;
2
3#[cfg(feature = "perf_timers")]
4use std::time::Duration;
5
6use linux_api::errno::Errno;
7use linux_api::syscall::SyscallNum;
8use shadow_shim_helper_rs::HostId;
9use shadow_shim_helper_rs::shadow_syscalls::ShadowSyscallNum;
10use shadow_shim_helper_rs::simulation_time::SimulationTime;
11use shadow_shim_helper_rs::syscall_types::SyscallArgs;
12use shadow_shim_helper_rs::syscall_types::SyscallReg;
13use shadow_shim_helper_rs::util::SendPointer;
14
15use crate::core::worker::Worker;
16use crate::cshadow as c;
17use crate::host::context::ThreadContext;
18use crate::host::descriptor::Descriptor;
19use crate::host::descriptor::descriptor_table::{DescriptorHandle, DescriptorTable};
20use crate::host::process::ProcessId;
21use crate::host::syscall::formatter::log_syscall_simple;
22use crate::host::syscall::is_shadow_syscall;
23use crate::host::syscall::types::SyscallReturn;
24use crate::host::syscall::types::{SyscallError, SyscallResult};
25use crate::host::thread::ThreadId;
26use crate::utility::counter::Counter;
27
28#[cfg(feature = "perf_timers")]
29use crate::utility::perf_timer::PerfTimer;
30
31mod clone;
32mod close_range;
33mod epoll;
34mod eventfd;
35mod fcntl;
36mod file;
37mod fileat;
38mod futex;
39mod ioctl;
40mod mman;
41mod poll;
42mod prctl;
43mod random;
44mod resource;
45mod sched;
46mod select;
47mod shadow;
48mod signal;
49mod socket;
50mod stat;
51mod sysinfo;
52mod time;
53mod timerfd;
54mod uio;
55mod unistd;
56mod wait;
57
58type LegacySyscallFn =
59    unsafe extern "C-unwind" fn(*mut SyscallHandler, *const SyscallArgs) -> SyscallReturn;
60
61// Will eventually contain syscall handler state once migrated from the c handler
62pub struct SyscallHandler {
63    /// The host that this `SyscallHandler` belongs to. Intended to be used for logging.
64    host_id: HostId,
65    /// The process that this `SyscallHandler` belongs to. Intended to be used for logging.
66    process_id: ProcessId,
67    /// The thread that this `SyscallHandler` belongs to. Intended to be used for logging.
68    thread_id: ThreadId,
69    /// The total number of syscalls that we have handled.
70    num_syscalls: u64,
71    /// A counter for individual syscalls.
72    syscall_counter: Option<Counter>,
73    /// If we are currently blocking a specific syscall, i.e., waiting for a socket to be
74    /// readable/writable or waiting for a timeout, the syscall number of that function is stored
75    /// here. Will be `None` if a syscall is not currently blocked.
76    blocked_syscall: Option<SyscallNum>,
77    /// In some cases the syscall handler completes, but we block the caller anyway to move time
78    /// forward. This stores the result of the completed syscall, to be returned when the caller
79    /// resumes.
80    pending_result: Option<SyscallResult>,
81    /// We use this epoll to service syscalls that need to block on the status of multiple
82    /// descriptors, like poll.
83    epoll: SendPointer<c::Epoll>,
84    /// The cumulative time consumed while handling the current syscall. This includes the time from
85    /// previous calls that ended up blocking.
86    #[cfg(feature = "perf_timers")]
87    perf_duration_current: Duration,
88    /// The total time elapsed while handling all syscalls.
89    #[cfg(feature = "perf_timers")]
90    perf_duration_total: Duration,
91}
92
93impl SyscallHandler {
94    pub fn new(
95        host_id: HostId,
96        process_id: ProcessId,
97        thread_id: ThreadId,
98        count_syscalls: bool,
99    ) -> SyscallHandler {
100        SyscallHandler {
101            host_id,
102            process_id,
103            thread_id,
104            num_syscalls: 0,
105            syscall_counter: count_syscalls.then(Counter::new),
106            blocked_syscall: None,
107            pending_result: None,
108            epoll: unsafe { SendPointer::new(c::epoll_new()) },
109            #[cfg(feature = "perf_timers")]
110            perf_duration_current: Duration::ZERO,
111            #[cfg(feature = "perf_timers")]
112            perf_duration_total: Duration::ZERO,
113        }
114    }
115
116    pub fn syscall(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
117        // it wouldn't make sense if we were given a different host, process, and thread
118        assert_eq!(ctx.host.id(), self.host_id);
119        assert_eq!(ctx.process.id(), self.process_id);
120        assert_eq!(ctx.thread.id(), self.thread_id);
121
122        let syscall = SyscallNum::new(args.number.try_into().unwrap());
123        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
124
125        // make sure that we either don't have a blocked syscall, or if we blocked a syscall, then
126        // that same syscall should be executed again when it becomes unblocked
127        if let Some(blocked_syscall) = self.blocked_syscall
128            && blocked_syscall != syscall
129        {
130            panic!(
131                "We blocked syscall {blocked_syscall} but syscall {syscall} is unexpectedly being invoked"
132            );
133        }
134
135        // were we previously blocked on this same syscall?
136        let was_blocked = self.blocked_syscall.is_some();
137
138        if let Some(pending_result) = self.pending_result.take() {
139            // The syscall was already completed, but we delayed the response to yield the CPU.
140            // Return that response now.
141            log::trace!("Returning delayed result");
142            assert!(!matches!(pending_result, Err(SyscallError::Blocked(_))));
143
144            self.blocked_syscall = None;
145            self.pending_result = None;
146
147            return pending_result;
148        }
149
150        log::trace!(
151            "SYSCALL_HANDLER_PRE: {} ({}){} — ({}, tid={})",
152            syscall_name,
153            args.number,
154            if was_blocked {
155                " (previously BLOCKed)"
156            } else {
157                ""
158            },
159            &*ctx.process.name(),
160            ctx.thread.id(),
161        );
162
163        // Count the frequency of each syscall, but only on the initial call. This avoids double
164        // counting in the case where the initial call blocked at first, but then later became
165        // unblocked and is now being handled again here.
166        if let Some(syscall_counter) = self.syscall_counter.as_mut()
167            && !was_blocked
168        {
169            syscall_counter.add_one(syscall_name);
170        }
171
172        #[cfg(feature = "perf_timers")]
173        let timer = PerfTimer::new_started();
174
175        let mut rv = self.run_handler(ctx, args);
176
177        #[cfg(feature = "perf_timers")]
178        {
179            // add the cumulative elapsed seconds
180            self.perf_duration_current += timer.elapsed();
181
182            log::debug!(
183                "Handling syscall {} ({}) took cumulative {} ms",
184                syscall_name,
185                args.number,
186                self.perf_duration_current.as_millis(),
187            );
188        }
189
190        if !matches!(rv, Err(SyscallError::Blocked(_))) {
191            // the syscall completed, count it and the cumulative time to complete it
192            self.num_syscalls += 1;
193
194            #[cfg(feature = "perf_timers")]
195            {
196                self.perf_duration_total += self.perf_duration_current;
197                self.perf_duration_current = Duration::ZERO;
198            }
199        }
200
201        if log::log_enabled!(log::Level::Trace) {
202            let rv_formatted = match &rv {
203                Ok(reg) => format!("{}", i64::from(*reg)),
204                Err(SyscallError::Failed(failed)) => {
205                    let errno = failed.errno;
206                    format!("{} ({errno})", errno.to_negated_i64())
207                }
208                Err(SyscallError::Native) => "<native>".to_string(),
209                Err(SyscallError::Blocked(_)) => "<blocked>".to_string(),
210            };
211
212            log::trace!(
213                "SYSCALL_HANDLER_POST: {} ({}) result {}{} — ({}, tid={})",
214                syscall_name,
215                args.number,
216                if was_blocked { "BLOCK -> " } else { "" },
217                rv_formatted,
218                &*ctx.process.name(),
219                ctx.thread.id(),
220            );
221        }
222
223        // If the syscall would be blocked, but there's a signal pending, fail with
224        // EINTR instead. The shim-side code will run the signal handlers and then
225        // either return the EINTR or restart the syscall (See SA_RESTART in
226        // signal(7)).
227        //
228        // We do this check *after* (not before) trying the syscall so that we don't
229        // "interrupt" a syscall that wouldn't have blocked in the first place, or
230        // that can return a "partial" result when interrupted. e.g. consider the
231        // sequence:
232        //
233        // * Thread is blocked on reading a file descriptor.
234        // * The read becomes ready and the thread is scheduled to run.
235        // * The thread receives an unblocked signal.
236        // * The thread runs again.
237        //
238        // In this scenario, the `read` call should be allowed to complete successfully.
239        // from signal(7):  "If an I/O call on a slow device has already transferred
240        // some data by the time it is interrupted by a signal handler, then the
241        // call will return a success  status  (normally,  the  number of bytes
242        // transferred)."
243
244        if let Err(SyscallError::Blocked(ref blocked)) = rv {
245            // the syscall wants to block, but is there a signal pending?
246            let is_unblocked_signal_pending = ctx
247                .thread
248                .unblocked_signal_pending(ctx.process, &ctx.host.shim_shmem_lock_borrow().unwrap());
249
250            if is_unblocked_signal_pending {
251                // return EINTR instead
252                rv = Err(SyscallError::new_interrupted(blocked.restartable));
253            }
254        }
255
256        // we only use unsafe borrows from C code, and we should have only called into C syscall
257        // handlers through `Self::legacy_syscall` which should have already flushed the pointers,
258        // but we may as well do it again here just to be safe
259        if rv.is_err() {
260            // the syscall didn't complete successfully; don't write back pointers
261            log::trace!(
262                "Syscall didn't complete successfully; discarding plugin ptrs without writing back"
263            );
264            ctx.process.free_unsafe_borrows_noflush();
265        } else {
266            ctx.process
267                .free_unsafe_borrows_flush()
268                .expect("flushing syscall ptrs");
269        }
270
271        if ctx.process.is_running() && !matches!(rv, Err(SyscallError::Blocked(_))) {
272            let host_shmem = ctx.host.shim_shmem();
273            let mut host_shmem_prot = ctx.host.shim_shmem_lock_borrow_mut().unwrap();
274
275            // increment unblocked syscall latency, but only for non-shadow-syscalls, since the
276            // latter are part of Shadow's internal plumbing; they shouldn't necessarily "consume"
277            // time
278            if ctx.host.shim_shmem().model_unblocked_syscall_latency && !is_shadow_syscall(syscall)
279            {
280                host_shmem_prot.unapplied_cpu_latency += host_shmem.unblocked_syscall_latency;
281            }
282
283            log::trace!(
284                "Unapplied CPU latency amt={}ns max={}ns",
285                host_shmem_prot.unapplied_cpu_latency.as_nanos(),
286                host_shmem.max_unapplied_cpu_latency.as_nanos()
287            );
288
289            if host_shmem_prot.unapplied_cpu_latency > host_shmem.max_unapplied_cpu_latency {
290                let new_time = Worker::current_time().unwrap()
291                    + core::mem::replace(
292                        &mut host_shmem_prot.unapplied_cpu_latency,
293                        SimulationTime::ZERO,
294                    );
295                if new_time <= Worker::max_event_runahead_time(ctx.host) {
296                    // The new time is early enough that we can safely just increment to that time.
297                    // i.e. there are no threads or other events scheduled to
298                    // run on this worker before `new_time`.
299                    log::trace!(
300                        "Reached max-unapplied-cpu-latency, but not max runahead; Incrementing time"
301                    );
302                    Worker::set_current_time(new_time);
303                } else {
304                    // We can't safely increment to the new time, e.g. because
305                    // there are other events or the end of the current
306                    // scheduler round scheduled to happen first.  Reschedule
307                    // the current thread to run at the new time instead of
308                    // incrementing it.
309                    log::trace!(
310                        "Reached max-unapplied-cpu-latency, and max runahead; Rescheduling"
311                    );
312
313                    // Save the syscall result so that we can return it later
314                    // instead of re-executing the syscall.
315                    assert!(self.pending_result.is_none());
316                    self.pending_result = Some(rv);
317
318                    rv = Err(SyscallError::new_blocked_until(new_time, false));
319                }
320            }
321        }
322
323        if matches!(rv, Err(SyscallError::Blocked(_))) {
324            // we are blocking: store the syscall number so we know to expect the same syscall again
325            // when it unblocks
326            self.blocked_syscall = Some(syscall);
327        } else {
328            self.blocked_syscall = None;
329        }
330
331        rv
332    }
333
334    #[allow(non_upper_case_globals)]
335    fn run_handler(&mut self, ctx: &ThreadContext, args: &SyscallArgs) -> SyscallResult {
336        let mut ctx = SyscallContext {
337            objs: ctx,
338            args,
339            handler: self,
340        };
341
342        let syscall = SyscallNum::new(ctx.args.number.try_into().unwrap());
343        let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
344
345        macro_rules! handle {
346            ($f:ident) => {{
347                let rv = SyscallHandlerFn::call(Self::$f, &mut ctx);
348
349                // log the syscall if enabled
350                if let Some(strace_fmt_options) = ctx.objs.process.strace_logging_options() {
351                    ctx.objs.process.with_strace_file(|file| {
352                        crate::utility::macros::SyscallLogger::$f(
353                            file,
354                            ctx.args.args,
355                            &rv,
356                            strace_fmt_options,
357                            ctx.objs.thread.id(),
358                            &*ctx.objs.process.memory_borrow(),
359                        )
360                        .unwrap();
361                    });
362                }
363
364                rv
365            }};
366        }
367
368        match syscall {
369            // SHADOW-HANDLED SYSCALLS
370            //
371            SyscallNum::NR_accept => handle!(accept),
372            SyscallNum::NR_accept4 => handle!(accept4),
373            SyscallNum::NR_arch_prctl => handle!(arch_prctl),
374            SyscallNum::NR_alarm => handle!(alarm),
375            SyscallNum::NR_bind => handle!(bind),
376            SyscallNum::NR_brk => handle!(brk),
377            SyscallNum::NR_capget => handle!(capget),
378            SyscallNum::NR_capset => handle!(capset),
379            SyscallNum::NR_chdir => handle!(chdir),
380            SyscallNum::NR_clock_getres => handle!(clock_getres),
381            SyscallNum::NR_clock_nanosleep => handle!(clock_nanosleep),
382            SyscallNum::NR_clone => handle!(clone),
383            SyscallNum::NR_clone3 => handle!(clone3),
384            SyscallNum::NR_close => handle!(close),
385            SyscallNum::NR_close_range => handle!(close_range),
386            SyscallNum::NR_connect => handle!(connect),
387            SyscallNum::NR_creat => handle!(creat),
388            SyscallNum::NR_dup => handle!(dup),
389            SyscallNum::NR_dup2 => handle!(dup2),
390            SyscallNum::NR_dup3 => handle!(dup3),
391            SyscallNum::NR_epoll_create => handle!(epoll_create),
392            SyscallNum::NR_epoll_create1 => handle!(epoll_create1),
393            SyscallNum::NR_epoll_ctl => handle!(epoll_ctl),
394            SyscallNum::NR_epoll_pwait => handle!(epoll_pwait),
395            SyscallNum::NR_epoll_pwait2 => handle!(epoll_pwait2),
396            SyscallNum::NR_epoll_wait => handle!(epoll_wait),
397            SyscallNum::NR_eventfd => handle!(eventfd),
398            SyscallNum::NR_eventfd2 => handle!(eventfd2),
399            SyscallNum::NR_execve => handle!(execve),
400            SyscallNum::NR_execveat => handle!(execveat),
401            SyscallNum::NR_exit_group => handle!(exit_group),
402            SyscallNum::NR_faccessat => handle!(faccessat),
403            SyscallNum::NR_faccessat2 => handle!(faccessat2),
404            SyscallNum::NR_fadvise64 => handle!(fadvise64),
405            SyscallNum::NR_fallocate => handle!(fallocate),
406            SyscallNum::NR_fchmod => handle!(fchmod),
407            SyscallNum::NR_fchmodat => handle!(fchmodat),
408            SyscallNum::NR_fchmodat2 => handle!(fchmodat2),
409            SyscallNum::NR_fchown => handle!(fchown),
410            SyscallNum::NR_fchownat => handle!(fchownat),
411            SyscallNum::NR_fcntl => handle!(fcntl),
412            SyscallNum::NR_fdatasync => handle!(fdatasync),
413            SyscallNum::NR_fgetxattr => handle!(fgetxattr),
414            SyscallNum::NR_flistxattr => handle!(flistxattr),
415            SyscallNum::NR_flock => handle!(flock),
416            SyscallNum::NR_fork => handle!(fork),
417            SyscallNum::NR_fremovexattr => handle!(fremovexattr),
418            SyscallNum::NR_fsetxattr => handle!(fsetxattr),
419            SyscallNum::NR_fstat => handle!(fstat),
420            SyscallNum::NR_fstatfs => handle!(fstatfs),
421            SyscallNum::NR_fsync => handle!(fsync),
422            SyscallNum::NR_ftruncate => handle!(ftruncate),
423            SyscallNum::NR_futex => handle!(futex),
424            SyscallNum::NR_futimesat => handle!(futimesat),
425            SyscallNum::NR_get_robust_list => handle!(get_robust_list),
426            SyscallNum::NR_getdents => handle!(getdents),
427            SyscallNum::NR_getdents64 => handle!(getdents64),
428            SyscallNum::NR_getitimer => handle!(getitimer),
429            SyscallNum::NR_getpeername => handle!(getpeername),
430            SyscallNum::NR_getpgid => handle!(getpgid),
431            SyscallNum::NR_getpgrp => handle!(getpgrp),
432            SyscallNum::NR_getpid => handle!(getpid),
433            SyscallNum::NR_getppid => handle!(getppid),
434            SyscallNum::NR_getrandom => handle!(getrandom),
435            SyscallNum::NR_getrlimit => handle!(getrlimit),
436            SyscallNum::NR_getsid => handle!(getsid),
437            SyscallNum::NR_getsockname => handle!(getsockname),
438            SyscallNum::NR_getsockopt => handle!(getsockopt),
439            SyscallNum::NR_gettid => handle!(gettid),
440            SyscallNum::NR_ioctl => handle!(ioctl),
441            SyscallNum::NR_kill => handle!(kill),
442            SyscallNum::NR_linkat => handle!(linkat),
443            SyscallNum::NR_listen => handle!(listen),
444            SyscallNum::NR_lseek => handle!(lseek),
445            SyscallNum::NR_mkdirat => handle!(mkdirat),
446            SyscallNum::NR_mknodat => handle!(mknodat),
447            SyscallNum::NR_mmap => handle!(mmap),
448            SyscallNum::NR_mprotect => handle!(mprotect),
449            SyscallNum::NR_mremap => handle!(mremap),
450            SyscallNum::NR_munmap => handle!(munmap),
451            SyscallNum::NR_nanosleep => handle!(nanosleep),
452            SyscallNum::NR_newfstatat => handle!(newfstatat),
453            SyscallNum::NR_open => handle!(open),
454            SyscallNum::NR_openat => handle!(openat),
455            SyscallNum::NR_pipe => handle!(pipe),
456            SyscallNum::NR_pipe2 => handle!(pipe2),
457            SyscallNum::NR_poll => handle!(poll),
458            SyscallNum::NR_ppoll => handle!(ppoll),
459            SyscallNum::NR_prctl => handle!(prctl),
460            SyscallNum::NR_pread64 => handle!(pread64),
461            SyscallNum::NR_preadv => handle!(preadv),
462            SyscallNum::NR_preadv2 => handle!(preadv2),
463            SyscallNum::NR_prlimit64 => handle!(prlimit64),
464            SyscallNum::NR_pselect6 => handle!(pselect6),
465            SyscallNum::NR_pwrite64 => handle!(pwrite64),
466            SyscallNum::NR_pwritev => handle!(pwritev),
467            SyscallNum::NR_pwritev2 => handle!(pwritev2),
468            SyscallNum::NR_read => handle!(read),
469            SyscallNum::NR_readahead => handle!(readahead),
470            SyscallNum::NR_readlink => handle!(readlink),
471            SyscallNum::NR_readlinkat => handle!(readlinkat),
472            SyscallNum::NR_readv => handle!(readv),
473            SyscallNum::NR_recvfrom => handle!(recvfrom),
474            SyscallNum::NR_recvmsg => handle!(recvmsg),
475            SyscallNum::NR_renameat => handle!(renameat),
476            SyscallNum::NR_renameat2 => handle!(renameat2),
477            SyscallNum::NR_rseq => handle!(rseq),
478            SyscallNum::NR_rt_sigaction => handle!(rt_sigaction),
479            SyscallNum::NR_rt_sigprocmask => handle!(rt_sigprocmask),
480            SyscallNum::NR_sched_getparam => handle!(sched_getparam),
481            SyscallNum::NR_sched_getscheduler => handle!(sched_getscheduler),
482            SyscallNum::NR_sched_getaffinity => handle!(sched_getaffinity),
483            SyscallNum::NR_sched_setparam => handle!(sched_setparam),
484            SyscallNum::NR_sched_setaffinity => handle!(sched_setaffinity),
485            SyscallNum::NR_sched_setscheduler => handle!(sched_setscheduler),
486            SyscallNum::NR_select => handle!(select),
487            SyscallNum::NR_sendmsg => handle!(sendmsg),
488            SyscallNum::NR_sendto => handle!(sendto),
489            SyscallNum::NR_setrlimit => handle!(setrlimit),
490            SyscallNum::NR_set_robust_list => handle!(set_robust_list),
491            SyscallNum::NR_set_tid_address => handle!(set_tid_address),
492            SyscallNum::NR_setitimer => handle!(setitimer),
493            SyscallNum::NR_setpgid => handle!(setpgid),
494            SyscallNum::NR_setsid => handle!(setsid),
495            SyscallNum::NR_setsockopt => handle!(setsockopt),
496            SyscallNum::NR_shutdown => handle!(shutdown),
497            SyscallNum::NR_sigaltstack => handle!(sigaltstack),
498            SyscallNum::NR_socket => handle!(socket),
499            SyscallNum::NR_socketpair => handle!(socketpair),
500            SyscallNum::NR_statx => handle!(statx),
501            SyscallNum::NR_symlinkat => handle!(symlinkat),
502            SyscallNum::NR_sync_file_range => handle!(sync_file_range),
503            SyscallNum::NR_syncfs => handle!(syncfs),
504            SyscallNum::NR_sysinfo => handle!(sysinfo),
505            SyscallNum::NR_tgkill => handle!(tgkill),
506            SyscallNum::NR_timerfd_create => handle!(timerfd_create),
507            SyscallNum::NR_timerfd_gettime => handle!(timerfd_gettime),
508            SyscallNum::NR_timerfd_settime => handle!(timerfd_settime),
509            SyscallNum::NR_tkill => handle!(tkill),
510            SyscallNum::NR_uname => handle!(uname),
511            SyscallNum::NR_unlinkat => handle!(unlinkat),
512            SyscallNum::NR_utimensat => handle!(utimensat),
513            SyscallNum::NR_vfork => handle!(vfork),
514            SyscallNum::NR_waitid => handle!(waitid),
515            SyscallNum::NR_wait4 => handle!(wait4),
516            SyscallNum::NR_write => handle!(write),
517            SyscallNum::NR_writev => handle!(writev),
518            //
519            // CUSTOM SHADOW-SPECIFIC SYSCALLS
520            //
521            x if ShadowSyscallNum::try_from(x).is_ok() => {
522                match ShadowSyscallNum::try_from(x).expect("Conversion just succeeded above") {
523                    ShadowSyscallNum::hostname_to_addr_ipv4 => {
524                        handle!(shadow_hostname_to_addr_ipv4)
525                    }
526                    ShadowSyscallNum::init_memory_manager => {
527                        handle!(shadow_init_memory_manager)
528                    }
529                    ShadowSyscallNum::shadow_yield => handle!(shadow_yield),
530                }
531            }
532            //
533            // SHIM-ONLY SYSCALLS
534            //
535            SyscallNum::NR_clock_gettime
536            | SyscallNum::NR_gettimeofday
537            | SyscallNum::NR_sched_yield
538            | SyscallNum::NR_time => {
539                panic!(
540                    "Syscall {} ({}) should have been handled in the shim",
541                    syscall_name, ctx.args.number,
542                )
543            }
544            //
545            // NATIVE LINUX-HANDLED SYSCALLS
546            //
547            SyscallNum::NR_access
548            | SyscallNum::NR_chmod
549            | SyscallNum::NR_chown
550            | SyscallNum::NR_exit
551            | SyscallNum::NR_getcwd
552            | SyscallNum::NR_geteuid
553            | SyscallNum::NR_getegid
554            | SyscallNum::NR_getgid
555            | SyscallNum::NR_getgroups
556            | SyscallNum::NR_getresgid
557            | SyscallNum::NR_getresuid
558            | SyscallNum::NR_getuid
559            | SyscallNum::NR_getxattr
560            | SyscallNum::NR_lchown
561            | SyscallNum::NR_lgetxattr
562            | SyscallNum::NR_link
563            | SyscallNum::NR_listxattr
564            | SyscallNum::NR_llistxattr
565            | SyscallNum::NR_lremovexattr
566            | SyscallNum::NR_lsetxattr
567            | SyscallNum::NR_lstat
568            | SyscallNum::NR_madvise
569            | SyscallNum::NR_mkdir
570            | SyscallNum::NR_mknod
571            | SyscallNum::NR_removexattr
572            | SyscallNum::NR_rename
573            | SyscallNum::NR_rmdir
574            | SyscallNum::NR_rt_sigreturn
575            | SyscallNum::NR_setfsgid
576            | SyscallNum::NR_setfsuid
577            | SyscallNum::NR_setgid
578            | SyscallNum::NR_setregid
579            | SyscallNum::NR_setresgid
580            | SyscallNum::NR_setresuid
581            | SyscallNum::NR_setreuid
582            | SyscallNum::NR_setuid
583            | SyscallNum::NR_setxattr
584            | SyscallNum::NR_stat
585            | SyscallNum::NR_statfs
586            | SyscallNum::NR_symlink
587            | SyscallNum::NR_truncate
588            | SyscallNum::NR_unlink
589            | SyscallNum::NR_utime
590            | SyscallNum::NR_utimes => {
591                log::trace!("Native syscall {} ({})", syscall_name, ctx.args.number);
592
593                let rv = Err(SyscallError::Native);
594
595                log_syscall_simple(
596                    ctx.objs.process,
597                    ctx.objs.process.strace_logging_options(),
598                    ctx.objs.thread.id(),
599                    syscall_name,
600                    "...",
601                    &rv,
602                )
603                .unwrap();
604
605                rv
606            }
607            //
608            // UNSUPPORTED SYSCALL
609            //
610            _ => {
611                log_once_per_value_at_level!(
612                    syscall,
613                    SyscallNum,
614                    log::Level::Warn,
615                    log::Level::Debug,
616                    "Detected unsupported syscall {} ({}) called from thread {} in process {} on host {}",
617                    syscall_name,
618                    ctx.args.number,
619                    ctx.objs.thread.id(),
620                    &*ctx.objs.process.plugin_name(),
621                    ctx.objs.host.name(),
622                );
623
624                let rv = Err(Errno::ENOSYS.into());
625
626                let (syscall_name, syscall_args) = match syscall.to_str() {
627                    // log it in the form "poll(...)"
628                    Some(syscall_name) => (syscall_name, Cow::Borrowed("...")),
629                    // log it in the form "syscall(X, ...)"
630                    None => ("syscall", Cow::Owned(format!("{}, ...", ctx.args.number))),
631                };
632
633                log_syscall_simple(
634                    ctx.objs.process,
635                    ctx.objs.process.strace_logging_options(),
636                    ctx.objs.thread.id(),
637                    syscall_name,
638                    &syscall_args,
639                    &rv,
640                )
641                .unwrap();
642
643                rv
644            }
645        }
646    }
647
648    /// Did the last syscall result in `SyscallError::Blocked`? If called from a syscall handler and
649    /// `is_blocked()` returns `true`, then the current syscall is the same syscall that previously
650    /// blocked. For example, if currently running the `connect` syscall handler and `is_blocked()`
651    /// is `true`, then the previous syscall handler that ran was also `connect` and it returned
652    /// `SyscallError::Blocked`.
653    pub fn is_blocked(&self) -> bool {
654        self.blocked_syscall.is_some()
655    }
656
657    pub fn did_listen_timeout_expire(&self) -> bool {
658        // will be `None` if the syscall condition doesn't exist or there's no timeout
659        let timeout = Worker::with_active_thread(|t| {
660            assert_eq!(t.id(), self.thread_id);
661            t.syscall_condition().and_then(|x| x.timeout())
662        })
663        .unwrap();
664
665        // true if there is a timeout and it's before or at the current time
666        timeout
667            .map(|timeout| Worker::current_time().unwrap() >= timeout)
668            .unwrap_or(false)
669    }
670
671    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
672    /// EBADF.
673    fn get_descriptor(
674        descriptor_table: &DescriptorTable,
675        fd: impl TryInto<DescriptorHandle>,
676    ) -> Result<&Descriptor, linux_api::errno::Errno> {
677        // check that fd is within bounds
678        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
679
680        match descriptor_table.get(fd) {
681            Some(desc) => Ok(desc),
682            None => Err(linux_api::errno::Errno::EBADF),
683        }
684    }
685
686    /// Internal helper that returns the `Descriptor` for the fd if it exists, otherwise returns
687    /// EBADF.
688    fn get_descriptor_mut(
689        descriptor_table: &mut DescriptorTable,
690        fd: impl TryInto<DescriptorHandle>,
691    ) -> Result<&mut Descriptor, linux_api::errno::Errno> {
692        // check that fd is within bounds
693        let fd = fd.try_into().or(Err(linux_api::errno::Errno::EBADF))?;
694
695        match descriptor_table.get_mut(fd) {
696            Some(desc) => Ok(desc),
697            None => Err(linux_api::errno::Errno::EBADF),
698        }
699    }
700
701    /// Run a legacy C syscall handler.
702    fn legacy_syscall<T: From<SyscallReg>>(
703        syscall: LegacySyscallFn,
704        ctx: &mut SyscallContext,
705    ) -> Result<T, SyscallError> {
706        let rv: SyscallResult =
707            unsafe { syscall(ctx.handler, std::ptr::from_ref(ctx.args)) }.into();
708
709        // we need to flush pointers here so that the syscall formatter can reliably borrow process
710        // memory without an incompatible borrow
711        if rv.is_err() {
712            // the syscall didn't complete successfully; don't write back pointers
713            log::trace!(
714                "Syscall didn't complete successfully; discarding plugin ptrs without writing back."
715            );
716            ctx.objs.process.free_unsafe_borrows_noflush();
717        } else {
718            ctx.objs
719                .process
720                .free_unsafe_borrows_flush()
721                .expect("flushing syscall ptrs");
722        }
723
724        rv.map(Into::into)
725    }
726}
727
728impl std::ops::Drop for SyscallHandler {
729    fn drop(&mut self) {
730        #[cfg(feature = "perf_timers")]
731        log::debug!(
732            "Handled {} syscalls in {} seconds",
733            self.num_syscalls,
734            self.perf_duration_total.as_secs()
735        );
736        #[cfg(not(feature = "perf_timers"))]
737        log::debug!("Handled {} syscalls", self.num_syscalls);
738
739        if let Some(syscall_counter) = self.syscall_counter.as_mut() {
740            // log the plugin thread specific counts
741            log::debug!(
742                "Thread {} syscall counts: {}",
743                self.thread_id,
744                syscall_counter,
745            );
746
747            // add up the counts at the worker level
748            Worker::add_syscall_counts(syscall_counter);
749        }
750
751        unsafe { c::legacyfile_unref(self.epoll.ptr() as *mut std::ffi::c_void) };
752    }
753}
754
755pub struct SyscallContext<'a, 'b> {
756    pub objs: &'a ThreadContext<'b>,
757    pub args: &'a SyscallArgs,
758    pub handler: &'a mut SyscallHandler,
759}
760
761pub trait SyscallHandlerFn<T> {
762    fn call(self, ctx: &mut SyscallContext) -> SyscallResult;
763}
764
765impl<F, E, T0> SyscallHandlerFn<()> for F
766where
767    F: Fn(&mut SyscallContext) -> Result<T0, E>,
768    E: Into<SyscallError>,
769    T0: Into<SyscallReg>,
770{
771    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
772        self(ctx).map(Into::into).map_err(Into::into)
773    }
774}
775
776impl<F, E, T0, T1> SyscallHandlerFn<(T1,)> for F
777where
778    F: Fn(&mut SyscallContext, T1) -> Result<T0, E>,
779    E: Into<SyscallError>,
780    T0: Into<SyscallReg>,
781    T1: From<SyscallReg>,
782{
783    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
784        self(ctx, ctx.args.get(0).into())
785            .map(Into::into)
786            .map_err(Into::into)
787    }
788}
789
790impl<F, E, T0, T1, T2> SyscallHandlerFn<(T1, T2)> for F
791where
792    F: Fn(&mut SyscallContext, T1, T2) -> Result<T0, E>,
793    E: Into<SyscallError>,
794    T0: Into<SyscallReg>,
795    T1: From<SyscallReg>,
796    T2: From<SyscallReg>,
797{
798    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
799        self(ctx, ctx.args.get(0).into(), ctx.args.get(1).into())
800            .map(Into::into)
801            .map_err(Into::into)
802    }
803}
804
805impl<F, E, T0, T1, T2, T3> SyscallHandlerFn<(T1, T2, T3)> for F
806where
807    F: Fn(&mut SyscallContext, T1, T2, T3) -> Result<T0, E>,
808    E: Into<SyscallError>,
809    T0: Into<SyscallReg>,
810    T1: From<SyscallReg>,
811    T2: From<SyscallReg>,
812    T3: From<SyscallReg>,
813{
814    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
815        self(
816            ctx,
817            ctx.args.get(0).into(),
818            ctx.args.get(1).into(),
819            ctx.args.get(2).into(),
820        )
821        .map(Into::into)
822        .map_err(Into::into)
823    }
824}
825
826impl<F, E, T0, T1, T2, T3, T4> SyscallHandlerFn<(T1, T2, T3, T4)> for F
827where
828    F: Fn(&mut SyscallContext, T1, T2, T3, T4) -> Result<T0, E>,
829    E: Into<SyscallError>,
830    T0: Into<SyscallReg>,
831    T1: From<SyscallReg>,
832    T2: From<SyscallReg>,
833    T3: From<SyscallReg>,
834    T4: From<SyscallReg>,
835{
836    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
837        self(
838            ctx,
839            ctx.args.get(0).into(),
840            ctx.args.get(1).into(),
841            ctx.args.get(2).into(),
842            ctx.args.get(3).into(),
843        )
844        .map(Into::into)
845        .map_err(Into::into)
846    }
847}
848
849impl<F, E, T0, T1, T2, T3, T4, T5> SyscallHandlerFn<(T1, T2, T3, T4, T5)> for F
850where
851    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5) -> Result<T0, E>,
852    E: Into<SyscallError>,
853    T0: Into<SyscallReg>,
854    T1: From<SyscallReg>,
855    T2: From<SyscallReg>,
856    T3: From<SyscallReg>,
857    T4: From<SyscallReg>,
858    T5: From<SyscallReg>,
859{
860    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
861        self(
862            ctx,
863            ctx.args.get(0).into(),
864            ctx.args.get(1).into(),
865            ctx.args.get(2).into(),
866            ctx.args.get(3).into(),
867            ctx.args.get(4).into(),
868        )
869        .map(Into::into)
870        .map_err(Into::into)
871    }
872}
873
874impl<F, E, T0, T1, T2, T3, T4, T5, T6> SyscallHandlerFn<(T1, T2, T3, T4, T5, T6)> for F
875where
876    F: Fn(&mut SyscallContext, T1, T2, T3, T4, T5, T6) -> Result<T0, E>,
877    E: Into<SyscallError>,
878    T0: Into<SyscallReg>,
879    T1: From<SyscallReg>,
880    T2: From<SyscallReg>,
881    T3: From<SyscallReg>,
882    T4: From<SyscallReg>,
883    T5: From<SyscallReg>,
884    T6: From<SyscallReg>,
885{
886    fn call(self, ctx: &mut SyscallContext) -> SyscallResult {
887        self(
888            ctx,
889            ctx.args.get(0).into(),
890            ctx.args.get(1).into(),
891            ctx.args.get(2).into(),
892            ctx.args.get(3).into(),
893            ctx.args.get(4).into(),
894            ctx.args.get(5).into(),
895        )
896        .map(Into::into)
897        .map_err(Into::into)
898    }
899}
900
901mod export {
902    use crate::host::host::Host;
903    use crate::host::process::Process;
904    use crate::host::thread::Thread;
905
906    use super::*;
907
908    /// Returns a pointer to the current running host. The returned pointer is invalidated the next
909    /// time the worker switches hosts. Rust syscall handlers should get the host from the
910    /// [`SyscallContext`] instead.
911    #[unsafe(no_mangle)]
912    pub extern "C-unwind" fn rustsyscallhandler_getHost(sys: *const SyscallHandler) -> *const Host {
913        let sys = unsafe { sys.as_ref() }.unwrap();
914        Worker::with_active_host(|h| {
915            assert_eq!(h.id(), sys.host_id);
916            std::ptr::from_ref(h)
917        })
918        .unwrap()
919    }
920
921    /// Returns a pointer to the current running process. The returned pointer is invalidated the
922    /// next time the worker switches processes. Rust syscall handlers should get the process from
923    /// the [`SyscallContext`] instead.
924    #[unsafe(no_mangle)]
925    pub extern "C-unwind" fn rustsyscallhandler_getProcess(
926        sys: *const SyscallHandler,
927    ) -> *const Process {
928        let sys = unsafe { sys.as_ref() }.unwrap();
929        Worker::with_active_process(|p| {
930            assert_eq!(p.id(), sys.process_id);
931            std::ptr::from_ref(p)
932        })
933        .unwrap()
934    }
935
936    /// Returns a pointer to the current running thread. The returned pointer is invalidated the
937    /// next time the worker switches threads. Rust syscall handlers should get the thread from the
938    /// [`SyscallContext`] instead.
939    #[unsafe(no_mangle)]
940    pub extern "C-unwind" fn rustsyscallhandler_getThread(
941        sys: *const SyscallHandler,
942    ) -> *const Thread {
943        let sys = unsafe { sys.as_ref() }.unwrap();
944        Worker::with_active_thread(|t| {
945            assert_eq!(t.id(), sys.thread_id);
946            std::ptr::from_ref(t)
947        })
948        .unwrap()
949    }
950
951    #[unsafe(no_mangle)]
952    pub extern "C-unwind" fn rustsyscallhandler_wasBlocked(sys: *const SyscallHandler) -> bool {
953        let sys = unsafe { sys.as_ref() }.unwrap();
954        sys.is_blocked()
955    }
956
957    #[unsafe(no_mangle)]
958    pub extern "C-unwind" fn rustsyscallhandler_didListenTimeoutExpire(
959        sys: *const SyscallHandler,
960    ) -> bool {
961        let sys = unsafe { sys.as_ref() }.unwrap();
962        sys.did_listen_timeout_expire()
963    }
964
965    #[unsafe(no_mangle)]
966    pub extern "C-unwind" fn rustsyscallhandler_getEpoll(
967        sys: *const SyscallHandler,
968    ) -> *mut c::Epoll {
969        let sys = unsafe { sys.as_ref() }.unwrap();
970        sys.epoll.ptr()
971    }
972}