shadow_rs/host/syscall/handler/
sched.rs

1use std::mem::MaybeUninit;
2
3use linux_api::errno::Errno;
4use linux_api::posix_types::kernel_pid_t;
5use linux_api::rseq::rseq;
6use log::warn;
7use shadow_shim_helper_rs::syscall_types::ForeignPtr;
8
9use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
10use crate::host::syscall::type_formatting::SyscallNonDeterministicArg;
11use crate::host::syscall::types::ForeignArrayPtr;
12use crate::host::thread::ThreadId;
13
14// We always report that the thread is running on CPU 0, Node 0
15const CURRENT_CPU: u32 = 0;
16
17const RSEQ_FLAG_UNREGISTER: i32 = 1;
18
19impl SyscallHandler {
20    log_syscall!(
21        sched_getaffinity,
22        /* rv */ i32,
23        // Non-deterministic due to https://github.com/shadow/shadow/issues/3626
24        /* pid */
25        SyscallNonDeterministicArg<kernel_pid_t>,
26        /* cpusetsize */ usize,
27        /* mask */ *const std::ffi::c_void,
28    );
29    pub fn sched_getaffinity(
30        ctx: &mut SyscallContext,
31        tid: kernel_pid_t,
32        cpusetsize: usize,
33        // sched_getaffinity(2):
34        // > The underlying system calls (which represent CPU masks as bit masks
35        // > of type unsigned long *) impose no restriction on the size of the CPU
36        // > mask
37        mask_ptr: ForeignPtr<std::ffi::c_ulong>,
38    ) -> Result<std::ffi::c_int, Errno> {
39        let mask_ptr = mask_ptr.cast::<u8>();
40        let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
41
42        let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
43        if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
44            return Err(Errno::ESRCH);
45        }
46
47        // Shadow doesn't have users, so no need to check for permissions
48
49        if cpusetsize == 0 {
50            return Err(Errno::EINVAL);
51        }
52
53        let mut mem = ctx.objs.process.memory_borrow_mut();
54        let mut mask = mem.memory_ref_mut(mask_ptr)?;
55
56        // this assumes little endian
57        let bytes_written = 1;
58        mask[0] = 1;
59
60        mask.flush()?;
61
62        Ok(bytes_written)
63    }
64
65    log_syscall!(
66        sched_setaffinity,
67        /* rv */ i32,
68        /* pid */ kernel_pid_t,
69        /* cpusetsize */ usize,
70        /* mask */ *const std::ffi::c_void,
71    );
72    pub fn sched_setaffinity(
73        ctx: &mut SyscallContext,
74        tid: kernel_pid_t,
75        cpusetsize: usize,
76        // sched_getaffinity(2):
77        // > The underlying system calls (which represent CPU masks as bit masks
78        // > of type unsigned long *) impose no restriction on the size of the CPU
79        // > mask
80        mask_ptr: ForeignPtr<std::ffi::c_ulong>,
81    ) -> Result<(), Errno> {
82        let mask_ptr = mask_ptr.cast::<u8>();
83        let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
84
85        let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
86        if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
87            return Err(Errno::ESRCH);
88        };
89
90        // Shadow doesn't have users, so no need to check for permissions
91
92        if cpusetsize == 0 {
93            return Err(Errno::EINVAL);
94        }
95
96        let mem = ctx.objs.process.memory_borrow_mut();
97        let mask = mem.memory_ref(mask_ptr)?;
98
99        // this assumes little endian
100        if mask[0] & 0x01 == 0 {
101            return Err(Errno::EINVAL);
102        }
103
104        Ok(())
105    }
106
107    log_syscall!(
108        rseq,
109        /* rv */ i32,
110        /* rseq */ *const std::ffi::c_void,
111        /* rseq_len */ u32,
112        /* flags */ i32,
113        /* sig */ u32,
114    );
115    pub fn rseq(
116        ctx: &mut SyscallContext,
117        rseq_ptr: ForeignPtr<MaybeUninit<u8>>,
118        rseq_len: u32,
119        flags: std::ffi::c_int,
120        _sig: u32,
121    ) -> Result<(), Errno> {
122        // we won't need more bytes than the size of the `rseq` struct
123        let rseq_len = rseq_len.try_into().unwrap();
124        let rseq_len = std::cmp::min(rseq_len, std::mem::size_of::<rseq>());
125
126        if flags & (!RSEQ_FLAG_UNREGISTER) != 0 {
127            warn!("Unrecognized rseq flags: {flags}");
128            return Err(Errno::EINVAL);
129        }
130        if flags & RSEQ_FLAG_UNREGISTER != 0 {
131            // TODO:
132            // * Validate that an rseq was previously registered
133            // * Validate that `sig` matches registration
134            // * Set the cpu_id of the previously registerd rseq to the uninitialized
135            //   state.
136            return Ok(());
137        }
138
139        // The `rseq` struct is designed to grow as linux needs to add more features, so we can't
140        // assume that the application making the rseq syscall is using the exact same struct as we
141        // have available in the linux_api crate (the calling application's rseq struct may have
142        // more or fewer fields). Furthermore, the rseq struct ends with a "flexible array member",
143        // which means that the rseq struct cannot be `Copy` and therefore not `Pod`.
144        //
145        // Instead, we should treat the rseq struct as a bunch of bytes and write to individual
146        // fields if possible without making assumptions about the size of the data.
147        let mut mem = ctx.objs.process.memory_borrow_mut();
148        let mut rseq_mem = mem.memory_ref_mut(ForeignArrayPtr::new(rseq_ptr, rseq_len))?;
149        let rseq_bytes = &mut *rseq_mem;
150
151        // rseq is mostly unimplemented, but also mostly unneeded in Shadow.
152        // We'd only need to implement the "real" functionality if we ever implement
153        // true preemption, in which case we'd need to do something if we ever pre-empted
154        // while the user code was in a restartable sequence. As it is, Shadow only
155        // reschedules threads at system calls, and system calls are disallowed inside
156        // restartable sequences.
157        //
158        // TODO: One place where Shadow might need to implement rseq recovery is
159        // if a hardware-based signal is delivered in the middle of an
160        // interruptible sequence.  e.g. the code in the rseq accesses an
161        // invalid address, raising SIGSEGV, but then catching it and recovering
162        // in a handler.
163        // https://github.com/shadow/shadow/issues/2139
164        //
165        // For now we just update to reflect that the thread is running on CPU 0.
166
167        let Some((cpu_id, cpu_id_start)) = field_project!(rseq_bytes, rseq, (cpu_id, cpu_id_start))
168        else {
169            return Err(Errno::EINVAL);
170        };
171
172        cpu_id.write(CURRENT_CPU);
173        cpu_id_start.write(CURRENT_CPU);
174
175        rseq_mem.flush()?;
176
177        Ok(())
178    }
179}