shadow_rs/host/syscall/handler/sched.rs
1use std::mem::MaybeUninit;
2
3use linux_api::errno::Errno;
4use linux_api::posix_types::kernel_pid_t;
5use linux_api::rseq::rseq;
6use log::warn;
7use shadow_shim_helper_rs::syscall_types::ForeignPtr;
8
9use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
10use crate::host::syscall::type_formatting::SyscallNonDeterministicArg;
11use crate::host::syscall::types::ForeignArrayPtr;
12use crate::host::thread::ThreadId;
13
14// We always report that the thread is running on CPU 0, Node 0
15const CURRENT_CPU: u32 = 0;
16
17const RSEQ_FLAG_UNREGISTER: i32 = 1;
18
19impl SyscallHandler {
20 log_syscall!(
21 sched_getaffinity,
22 /* rv */ i32,
23 // Non-deterministic due to https://github.com/shadow/shadow/issues/3626
24 /* pid */
25 SyscallNonDeterministicArg<kernel_pid_t>,
26 /* cpusetsize */ usize,
27 /* mask */ *const std::ffi::c_void,
28 );
29 pub fn sched_getaffinity(
30 ctx: &mut SyscallContext,
31 tid: kernel_pid_t,
32 cpusetsize: usize,
33 // sched_getaffinity(2):
34 // > The underlying system calls (which represent CPU masks as bit masks
35 // > of type unsigned long *) impose no restriction on the size of the CPU
36 // > mask
37 mask_ptr: ForeignPtr<std::ffi::c_ulong>,
38 ) -> Result<std::ffi::c_int, Errno> {
39 let mask_ptr = mask_ptr.cast::<u8>();
40 let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
41
42 let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
43 if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
44 return Err(Errno::ESRCH);
45 }
46
47 // Shadow doesn't have users, so no need to check for permissions
48
49 if cpusetsize == 0 {
50 return Err(Errno::EINVAL);
51 }
52
53 let mut mem = ctx.objs.process.memory_borrow_mut();
54 let mut mask = mem.memory_ref_mut(mask_ptr)?;
55
56 // this assumes little endian
57 let bytes_written = 1;
58 mask[0] = 1;
59
60 mask.flush()?;
61
62 Ok(bytes_written)
63 }
64
65 log_syscall!(
66 sched_setaffinity,
67 /* rv */ i32,
68 /* pid */ kernel_pid_t,
69 /* cpusetsize */ usize,
70 /* mask */ *const std::ffi::c_void,
71 );
72 pub fn sched_setaffinity(
73 ctx: &mut SyscallContext,
74 tid: kernel_pid_t,
75 cpusetsize: usize,
76 // sched_getaffinity(2):
77 // > The underlying system calls (which represent CPU masks as bit masks
78 // > of type unsigned long *) impose no restriction on the size of the CPU
79 // > mask
80 mask_ptr: ForeignPtr<std::ffi::c_ulong>,
81 ) -> Result<(), Errno> {
82 let mask_ptr = mask_ptr.cast::<u8>();
83 let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
84
85 let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
86 if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
87 return Err(Errno::ESRCH);
88 };
89
90 // Shadow doesn't have users, so no need to check for permissions
91
92 if cpusetsize == 0 {
93 return Err(Errno::EINVAL);
94 }
95
96 let mem = ctx.objs.process.memory_borrow_mut();
97 let mask = mem.memory_ref(mask_ptr)?;
98
99 // this assumes little endian
100 if mask[0] & 0x01 == 0 {
101 return Err(Errno::EINVAL);
102 }
103
104 Ok(())
105 }
106
107 log_syscall!(
108 rseq,
109 /* rv */ i32,
110 /* rseq */ *const std::ffi::c_void,
111 /* rseq_len */ u32,
112 /* flags */ i32,
113 /* sig */ u32,
114 );
115 pub fn rseq(
116 ctx: &mut SyscallContext,
117 rseq_ptr: ForeignPtr<MaybeUninit<u8>>,
118 rseq_len: u32,
119 flags: std::ffi::c_int,
120 _sig: u32,
121 ) -> Result<(), Errno> {
122 // we won't need more bytes than the size of the `rseq` struct
123 let rseq_len = rseq_len.try_into().unwrap();
124 let rseq_len = std::cmp::min(rseq_len, std::mem::size_of::<rseq>());
125
126 if flags & (!RSEQ_FLAG_UNREGISTER) != 0 {
127 warn!("Unrecognized rseq flags: {flags}");
128 return Err(Errno::EINVAL);
129 }
130 if flags & RSEQ_FLAG_UNREGISTER != 0 {
131 // TODO:
132 // * Validate that an rseq was previously registered
133 // * Validate that `sig` matches registration
134 // * Set the cpu_id of the previously registerd rseq to the uninitialized
135 // state.
136 return Ok(());
137 }
138
139 // The `rseq` struct is designed to grow as linux needs to add more features, so we can't
140 // assume that the application making the rseq syscall is using the exact same struct as we
141 // have available in the linux_api crate (the calling application's rseq struct may have
142 // more or fewer fields). Furthermore, the rseq struct ends with a "flexible array member",
143 // which means that the rseq struct cannot be `Copy` and therefore not `Pod`.
144 //
145 // Instead, we should treat the rseq struct as a bunch of bytes and write to individual
146 // fields if possible without making assumptions about the size of the data.
147 let mut mem = ctx.objs.process.memory_borrow_mut();
148 let mut rseq_mem = mem.memory_ref_mut(ForeignArrayPtr::new(rseq_ptr, rseq_len))?;
149 let rseq_bytes = &mut *rseq_mem;
150
151 // rseq is mostly unimplemented, but also mostly unneeded in Shadow.
152 // We'd only need to implement the "real" functionality if we ever implement
153 // true preemption, in which case we'd need to do something if we ever pre-empted
154 // while the user code was in a restartable sequence. As it is, Shadow only
155 // reschedules threads at system calls, and system calls are disallowed inside
156 // restartable sequences.
157 //
158 // TODO: One place where Shadow might need to implement rseq recovery is
159 // if a hardware-based signal is delivered in the middle of an
160 // interruptible sequence. e.g. the code in the rseq accesses an
161 // invalid address, raising SIGSEGV, but then catching it and recovering
162 // in a handler.
163 // https://github.com/shadow/shadow/issues/2139
164 //
165 // For now we just update to reflect that the thread is running on CPU 0.
166
167 let Some((cpu_id, cpu_id_start)) = field_project!(rseq_bytes, rseq, (cpu_id, cpu_id_start))
168 else {
169 return Err(Errno::EINVAL);
170 };
171
172 cpu_id.write(CURRENT_CPU);
173 cpu_id_start.write(CURRENT_CPU);
174
175 rseq_mem.flush()?;
176
177 Ok(())
178 }
179}