shadow_rs/host/syscall/handler/
sched.rs

1use std::mem::MaybeUninit;
2
3use linux_api::errno::Errno;
4use linux_api::posix_types::kernel_pid_t;
5use linux_api::rseq::rseq;
6use log::warn;
7use shadow_shim_helper_rs::syscall_types::ForeignPtr;
8
9use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
10use crate::host::syscall::types::ForeignArrayPtr;
11use crate::host::thread::ThreadId;
12
13// We always report that the thread is running on CPU 0, Node 0
14const CURRENT_CPU: u32 = 0;
15
16const RSEQ_FLAG_UNREGISTER: i32 = 1;
17
18impl SyscallHandler {
19    log_syscall!(
20        sched_getaffinity,
21        /* rv */ i32,
22        /* pid */ kernel_pid_t,
23        /* cpusetsize */ usize,
24        /* mask */ *const std::ffi::c_void,
25    );
26    pub fn sched_getaffinity(
27        ctx: &mut SyscallContext,
28        tid: kernel_pid_t,
29        cpusetsize: usize,
30        // sched_getaffinity(2):
31        // > The underlying system calls (which represent CPU masks as bit masks
32        // > of type unsigned long *) impose no restriction on the size of the CPU
33        // > mask
34        mask_ptr: ForeignPtr<std::ffi::c_ulong>,
35    ) -> Result<std::ffi::c_int, Errno> {
36        let mask_ptr = mask_ptr.cast::<u8>();
37        let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
38
39        let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
40        if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
41            return Err(Errno::ESRCH);
42        }
43
44        // Shadow doesn't have users, so no need to check for permissions
45
46        if cpusetsize == 0 {
47            return Err(Errno::EINVAL);
48        }
49
50        let mut mem = ctx.objs.process.memory_borrow_mut();
51        let mut mask = mem.memory_ref_mut(mask_ptr)?;
52
53        // this assumes little endian
54        let bytes_written = 1;
55        mask[0] = 1;
56
57        mask.flush()?;
58
59        Ok(bytes_written)
60    }
61
62    log_syscall!(
63        sched_setaffinity,
64        /* rv */ i32,
65        /* pid */ kernel_pid_t,
66        /* cpusetsize */ usize,
67        /* mask */ *const std::ffi::c_void,
68    );
69    pub fn sched_setaffinity(
70        ctx: &mut SyscallContext,
71        tid: kernel_pid_t,
72        cpusetsize: usize,
73        // sched_getaffinity(2):
74        // > The underlying system calls (which represent CPU masks as bit masks
75        // > of type unsigned long *) impose no restriction on the size of the CPU
76        // > mask
77        mask_ptr: ForeignPtr<std::ffi::c_ulong>,
78    ) -> Result<(), Errno> {
79        let mask_ptr = mask_ptr.cast::<u8>();
80        let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
81
82        let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
83        if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
84            return Err(Errno::ESRCH);
85        };
86
87        // Shadow doesn't have users, so no need to check for permissions
88
89        if cpusetsize == 0 {
90            return Err(Errno::EINVAL);
91        }
92
93        let mem = ctx.objs.process.memory_borrow_mut();
94        let mask = mem.memory_ref(mask_ptr)?;
95
96        // this assumes little endian
97        if mask[0] & 0x01 == 0 {
98            return Err(Errno::EINVAL);
99        }
100
101        Ok(())
102    }
103
104    log_syscall!(
105        rseq,
106        /* rv */ i32,
107        /* rseq */ *const std::ffi::c_void,
108        /* rseq_len */ u32,
109        /* flags */ i32,
110        /* sig */ u32,
111    );
112    pub fn rseq(
113        ctx: &mut SyscallContext,
114        rseq_ptr: ForeignPtr<MaybeUninit<u8>>,
115        rseq_len: u32,
116        flags: std::ffi::c_int,
117        _sig: u32,
118    ) -> Result<(), Errno> {
119        // we won't need more bytes than the size of the `rseq` struct
120        let rseq_len = rseq_len.try_into().unwrap();
121        let rseq_len = std::cmp::min(rseq_len, std::mem::size_of::<rseq>());
122
123        if flags & (!RSEQ_FLAG_UNREGISTER) != 0 {
124            warn!("Unrecognized rseq flags: {flags}");
125            return Err(Errno::EINVAL);
126        }
127        if flags & RSEQ_FLAG_UNREGISTER != 0 {
128            // TODO:
129            // * Validate that an rseq was previously registered
130            // * Validate that `sig` matches registration
131            // * Set the cpu_id of the previously registerd rseq to the uninitialized
132            //   state.
133            return Ok(());
134        }
135
136        // The `rseq` struct is designed to grow as linux needs to add more features, so we can't
137        // assume that the application making the rseq syscall is using the exact same struct as we
138        // have available in the linux_api crate (the calling application's rseq struct may have
139        // more or fewer fields). Furthermore, the rseq struct ends with a "flexible array member",
140        // which means that the rseq struct cannot be `Copy` and therefore not `Pod`.
141        //
142        // Instead, we should treat the rseq struct as a bunch of bytes and write to individual
143        // fields if possible without making assumptions about the size of the data.
144        let mut mem = ctx.objs.process.memory_borrow_mut();
145        let mut rseq_mem = mem.memory_ref_mut(ForeignArrayPtr::new(rseq_ptr, rseq_len))?;
146        let rseq_bytes = &mut *rseq_mem;
147
148        // rseq is mostly unimplemented, but also mostly unneeded in Shadow.
149        // We'd only need to implement the "real" functionality if we ever implement
150        // true preemption, in which case we'd need to do something if we ever pre-empted
151        // while the user code was in a restartable sequence. As it is, Shadow only
152        // reschedules threads at system calls, and system calls are disallowed inside
153        // restartable sequences.
154        //
155        // TODO: One place where Shadow might need to implement rseq recovery is
156        // if a hardware-based signal is delivered in the middle of an
157        // interruptible sequence.  e.g. the code in the rseq accesses an
158        // invalid address, raising SIGSEGV, but then catching it and recovering
159        // in a handler.
160        // https://github.com/shadow/shadow/issues/2139
161        //
162        // For now we just update to reflect that the thread is running on CPU 0.
163
164        let Some((cpu_id, cpu_id_start)) = field_project!(rseq_bytes, rseq, (cpu_id, cpu_id_start))
165        else {
166            return Err(Errno::EINVAL);
167        };
168
169        cpu_id.write(CURRENT_CPU);
170        cpu_id_start.write(CURRENT_CPU);
171
172        rseq_mem.flush()?;
173
174        Ok(())
175    }
176}