shadow_rs/host/syscall/handler/sched.rs
1use std::mem::MaybeUninit;
2
3use linux_api::errno::Errno;
4use linux_api::posix_types::kernel_pid_t;
5use linux_api::rseq::rseq;
6use log::warn;
7use shadow_shim_helper_rs::syscall_types::ForeignPtr;
8
9use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
10use crate::host::syscall::types::ForeignArrayPtr;
11use crate::host::thread::ThreadId;
12
13// We always report that the thread is running on CPU 0, Node 0
14const CURRENT_CPU: u32 = 0;
15
16const RSEQ_FLAG_UNREGISTER: i32 = 1;
17
18impl SyscallHandler {
19 log_syscall!(
20 sched_getaffinity,
21 /* rv */ i32,
22 /* pid */ kernel_pid_t,
23 /* cpusetsize */ usize,
24 /* mask */ *const std::ffi::c_void,
25 );
26 pub fn sched_getaffinity(
27 ctx: &mut SyscallContext,
28 tid: kernel_pid_t,
29 cpusetsize: usize,
30 // sched_getaffinity(2):
31 // > The underlying system calls (which represent CPU masks as bit masks
32 // > of type unsigned long *) impose no restriction on the size of the CPU
33 // > mask
34 mask_ptr: ForeignPtr<std::ffi::c_ulong>,
35 ) -> Result<std::ffi::c_int, Errno> {
36 let mask_ptr = mask_ptr.cast::<u8>();
37 let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
38
39 let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
40 if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
41 return Err(Errno::ESRCH);
42 }
43
44 // Shadow doesn't have users, so no need to check for permissions
45
46 if cpusetsize == 0 {
47 return Err(Errno::EINVAL);
48 }
49
50 let mut mem = ctx.objs.process.memory_borrow_mut();
51 let mut mask = mem.memory_ref_mut(mask_ptr)?;
52
53 // this assumes little endian
54 let bytes_written = 1;
55 mask[0] = 1;
56
57 mask.flush()?;
58
59 Ok(bytes_written)
60 }
61
62 log_syscall!(
63 sched_setaffinity,
64 /* rv */ i32,
65 /* pid */ kernel_pid_t,
66 /* cpusetsize */ usize,
67 /* mask */ *const std::ffi::c_void,
68 );
69 pub fn sched_setaffinity(
70 ctx: &mut SyscallContext,
71 tid: kernel_pid_t,
72 cpusetsize: usize,
73 // sched_getaffinity(2):
74 // > The underlying system calls (which represent CPU masks as bit masks
75 // > of type unsigned long *) impose no restriction on the size of the CPU
76 // > mask
77 mask_ptr: ForeignPtr<std::ffi::c_ulong>,
78 ) -> Result<(), Errno> {
79 let mask_ptr = mask_ptr.cast::<u8>();
80 let mask_ptr = ForeignArrayPtr::new(mask_ptr, cpusetsize);
81
82 let tid = ThreadId::try_from(tid).or(Err(Errno::ESRCH))?;
83 if !ctx.objs.host.has_thread(tid) && kernel_pid_t::from(tid) != 0 {
84 return Err(Errno::ESRCH);
85 };
86
87 // Shadow doesn't have users, so no need to check for permissions
88
89 if cpusetsize == 0 {
90 return Err(Errno::EINVAL);
91 }
92
93 let mem = ctx.objs.process.memory_borrow_mut();
94 let mask = mem.memory_ref(mask_ptr)?;
95
96 // this assumes little endian
97 if mask[0] & 0x01 == 0 {
98 return Err(Errno::EINVAL);
99 }
100
101 Ok(())
102 }
103
104 log_syscall!(
105 rseq,
106 /* rv */ i32,
107 /* rseq */ *const std::ffi::c_void,
108 /* rseq_len */ u32,
109 /* flags */ i32,
110 /* sig */ u32,
111 );
112 pub fn rseq(
113 ctx: &mut SyscallContext,
114 rseq_ptr: ForeignPtr<MaybeUninit<u8>>,
115 rseq_len: u32,
116 flags: std::ffi::c_int,
117 _sig: u32,
118 ) -> Result<(), Errno> {
119 // we won't need more bytes than the size of the `rseq` struct
120 let rseq_len = rseq_len.try_into().unwrap();
121 let rseq_len = std::cmp::min(rseq_len, std::mem::size_of::<rseq>());
122
123 if flags & (!RSEQ_FLAG_UNREGISTER) != 0 {
124 warn!("Unrecognized rseq flags: {flags}");
125 return Err(Errno::EINVAL);
126 }
127 if flags & RSEQ_FLAG_UNREGISTER != 0 {
128 // TODO:
129 // * Validate that an rseq was previously registered
130 // * Validate that `sig` matches registration
131 // * Set the cpu_id of the previously registerd rseq to the uninitialized
132 // state.
133 return Ok(());
134 }
135
136 // The `rseq` struct is designed to grow as linux needs to add more features, so we can't
137 // assume that the application making the rseq syscall is using the exact same struct as we
138 // have available in the linux_api crate (the calling application's rseq struct may have
139 // more or fewer fields). Furthermore, the rseq struct ends with a "flexible array member",
140 // which means that the rseq struct cannot be `Copy` and therefore not `Pod`.
141 //
142 // Instead, we should treat the rseq struct as a bunch of bytes and write to individual
143 // fields if possible without making assumptions about the size of the data.
144 let mut mem = ctx.objs.process.memory_borrow_mut();
145 let mut rseq_mem = mem.memory_ref_mut(ForeignArrayPtr::new(rseq_ptr, rseq_len))?;
146 let rseq_bytes = &mut *rseq_mem;
147
148 // rseq is mostly unimplemented, but also mostly unneeded in Shadow.
149 // We'd only need to implement the "real" functionality if we ever implement
150 // true preemption, in which case we'd need to do something if we ever pre-empted
151 // while the user code was in a restartable sequence. As it is, Shadow only
152 // reschedules threads at system calls, and system calls are disallowed inside
153 // restartable sequences.
154 //
155 // TODO: One place where Shadow might need to implement rseq recovery is
156 // if a hardware-based signal is delivered in the middle of an
157 // interruptible sequence. e.g. the code in the rseq accesses an
158 // invalid address, raising SIGSEGV, but then catching it and recovering
159 // in a handler.
160 // https://github.com/shadow/shadow/issues/2139
161 //
162 // For now we just update to reflect that the thread is running on CPU 0.
163
164 let Some((cpu_id, cpu_id_start)) = field_project!(rseq_bytes, rseq, (cpu_id, cpu_id_start))
165 else {
166 return Err(Errno::EINVAL);
167 };
168
169 cpu_id.write(CURRENT_CPU);
170 cpu_id_start.write(CURRENT_CPU);
171
172 rseq_mem.flush()?;
173
174 Ok(())
175 }
176}