shadow_rs/host/syscall/handler/
epoll.rs
1use std::ops::DerefMut;
2use std::sync::Arc;
3
4use linux_api::epoll::{EpollCreateFlags, EpollCtlOp, EpollEvents};
5use linux_api::errno::Errno;
6use linux_api::fcntl::DescriptorFlags;
7use shadow_shim_helper_rs::simulation_time::SimulationTime;
8use shadow_shim_helper_rs::syscall_types::ForeignPtr;
9
10use crate::core::worker::Worker;
11use crate::cshadow;
12use crate::host::descriptor::descriptor_table::DescriptorHandle;
13use crate::host::descriptor::epoll::Epoll;
14use crate::host::descriptor::{CompatFile, Descriptor, File, FileState, OpenFile};
15use crate::host::memory_manager::MemoryManager;
16use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
17use crate::host::syscall::types::{ForeignArrayPtr, SyscallError};
18use crate::utility::callback_queue::CallbackQueue;
19
20impl SyscallHandler {
21 log_syscall!(
22 epoll_create,
23 std::ffi::c_int,
24 std::ffi::c_int,
25 );
26 pub fn epoll_create(
27 ctx: &mut SyscallContext,
28 size: std::ffi::c_int,
29 ) -> Result<DescriptorHandle, Errno> {
30 if size <= 0 {
33 return Err(Errno::EINVAL);
34 }
35
36 Self::epoll_create_helper(ctx, 0)
37 }
38
39 log_syscall!(
40 epoll_create1,
41 std::ffi::c_int,
42 std::ffi::c_int,
43 );
44 pub fn epoll_create1(
45 ctx: &mut SyscallContext,
46 flags: std::ffi::c_int,
47 ) -> Result<DescriptorHandle, Errno> {
48 Self::epoll_create_helper(ctx, flags)
49 }
50
51 fn epoll_create_helper(
52 ctx: &mut SyscallContext,
53 flags: std::ffi::c_int,
54 ) -> Result<DescriptorHandle, Errno> {
55 let Some(flags) = EpollCreateFlags::from_bits(flags) else {
58 log::debug!("Invalid epoll_create flags: {flags}");
59 return Err(Errno::EINVAL);
60 };
61
62 let mut desc_flags = DescriptorFlags::empty();
63
64 if flags.contains(EpollCreateFlags::EPOLL_CLOEXEC) {
65 desc_flags.insert(DescriptorFlags::FD_CLOEXEC);
66 }
67
68 let epoll = Epoll::new();
69 let mut desc = Descriptor::new(CompatFile::New(OpenFile::new(File::Epoll(epoll))));
70 desc.set_flags(desc_flags);
71
72 let fd = ctx
73 .objs
74 .thread
75 .descriptor_table_borrow_mut(ctx.objs.host)
76 .register_descriptor(desc)
77 .or(Err(Errno::ENFILE))?;
78
79 log::trace!("Created epoll fd {fd}");
80
81 Ok(fd)
82 }
83
84 log_syscall!(
85 epoll_ctl,
86 std::ffi::c_int,
87 std::ffi::c_int,
88 std::ffi::c_int,
89 std::ffi::c_int,
90 *const std::ffi::c_void,
91 );
92 pub fn epoll_ctl(
93 ctx: &mut SyscallContext,
94 epfd: std::ffi::c_int,
95 op: std::ffi::c_int,
96 fd: std::ffi::c_int,
97 event_ptr: ForeignPtr<linux_api::epoll::epoll_event>,
98 ) -> Result<(), Errno> {
99 let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
104
105 let (epoll, epoll_canon_handle) = {
107 let desc = Self::get_descriptor(&desc_table, epfd)?;
108
109 let CompatFile::New(epoll) = desc.file() else {
110 return Err(Errno::EINVAL);
111 };
112
113 let epoll_canon_handle = epoll.inner_file().canonical_handle();
114
115 let File::Epoll(epoll) = epoll.inner_file() else {
116 return Err(Errno::EINVAL);
117 };
118
119 (epoll, epoll_canon_handle)
120 };
121
122 let target = {
124 let desc = Self::get_descriptor(&desc_table, fd)?;
125
126 match desc.file() {
131 CompatFile::New(file) => file.inner_file().clone(),
132 CompatFile::Legacy(file) => {
133 let file_type = unsafe { cshadow::legacyfile_getType(file.ptr()) };
134 if file_type == cshadow::_LegacyFileType_DT_FILE {
135 return Err(Errno::EPERM);
137 } else {
138 warn_once_then_trace!(
141 "Attempted to add a legacy file to an epoll file, which \
142 shadow doesn't support"
143 );
144 return Err(Errno::EINVAL);
145 }
146 }
147 }
148 };
149
150 if epoll_canon_handle == target.canonical_handle() {
152 return Err(Errno::EINVAL);
153 }
154
155 let Ok(op) = EpollCtlOp::try_from(op) else {
157 log::debug!("Invalid epoll op: {op}");
158 return Err(Errno::EINVAL);
159 };
160
161 let (events, data) = if op == EpollCtlOp::EPOLL_CTL_DEL {
163 (EpollEvents::empty(), 0)
166 } else {
167 let mem = ctx.objs.process.memory_borrow();
168 let ev = mem.read(event_ptr)?;
169
170 let Some(mut events) = EpollEvents::from_bits(ev.events) else {
171 log::debug!("Invalid epoll_ctl events: {}", { ev.events });
173 return Err(Errno::EINVAL);
174 };
175
176 events.insert(EpollEvents::EPOLLERR | EpollEvents::EPOLLHUP);
178
179 (events, ev.data)
180 };
181
182 log::trace!("Calling epoll_ctl on epoll {epfd} with child {fd}");
183
184 CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
185 let weak_epoll = Arc::downgrade(epoll);
186 epoll
187 .borrow_mut()
188 .ctl(op, fd, target, events, data, weak_epoll, cb_queue)
189 })?;
190 Ok(())
191 }
192
193 log_syscall!(
194 epoll_wait,
195 std::ffi::c_int,
196 std::ffi::c_int,
197 *const std::ffi::c_void,
198 std::ffi::c_int,
199 std::ffi::c_int,
200 );
201 pub fn epoll_wait(
202 ctx: &mut SyscallContext,
203 epfd: std::ffi::c_int,
204 events_ptr: ForeignPtr<linux_api::epoll::epoll_event>,
205 max_events: std::ffi::c_int,
206 timeout: std::ffi::c_int,
207 ) -> Result<std::ffi::c_int, SyscallError> {
208 let timeout = timeout_arg_to_maybe_simtime(timeout)?;
210 Self::epoll_wait_helper(ctx, epfd, events_ptr, max_events, timeout, None)
211 }
212
213 log_syscall!(
214 epoll_pwait,
215 std::ffi::c_int,
216 std::ffi::c_int,
217 *const std::ffi::c_void,
218 std::ffi::c_int,
219 std::ffi::c_int,
220 *const std::ffi::c_void,
221 linux_api::posix_types::kernel_size_t,
222 );
223 pub fn epoll_pwait(
224 ctx: &mut SyscallContext,
225 epfd: std::ffi::c_int,
226 events_ptr: ForeignPtr<linux_api::epoll::epoll_event>,
227 max_events: std::ffi::c_int,
228 timeout: std::ffi::c_int,
229 sigmask_ptr: ForeignPtr<linux_api::signal::sigset_t>,
230 _sigsetsize: linux_api::posix_types::kernel_size_t,
231 ) -> Result<std::ffi::c_int, SyscallError> {
232 let sigmask = if sigmask_ptr.is_null() {
235 None
236 } else {
237 Some(ctx.objs.process.memory_borrow().read(sigmask_ptr)?)
238 };
239
240 let timeout = timeout_arg_to_maybe_simtime(timeout)?;
242 Self::epoll_wait_helper(ctx, epfd, events_ptr, max_events, timeout, sigmask)
243 }
244
245 log_syscall!(
246 epoll_pwait2,
247 std::ffi::c_int,
248 std::ffi::c_int,
249 *const std::ffi::c_void,
250 std::ffi::c_int,
251 *const std::ffi::c_void,
252 *const std::ffi::c_void,
253 linux_api::posix_types::kernel_size_t,
254 );
255 pub fn epoll_pwait2(
256 ctx: &mut SyscallContext,
257 epfd: std::ffi::c_int,
258 events_ptr: ForeignPtr<linux_api::epoll::epoll_event>,
259 max_events: std::ffi::c_int,
260 timeout_ptr: ForeignPtr<linux_api::time::timespec>,
261 sigmask_ptr: ForeignPtr<linux_api::signal::sigset_t>,
262 _sigsetsize: linux_api::posix_types::kernel_size_t,
263 ) -> Result<std::ffi::c_int, SyscallError> {
264 let (sigmask, timeout) = {
265 let mem = ctx.objs.process.memory_borrow();
266
267 let sigmask = if sigmask_ptr.is_null() {
270 None
271 } else {
272 Some(mem.read(sigmask_ptr)?)
273 };
274
275 let timeout = if timeout_ptr.is_null() {
277 None
278 } else {
279 let tspec = mem.read(timeout_ptr)?;
280 let sim_time = SimulationTime::try_from(tspec).map_err(|_| Errno::EINVAL)?;
281 Some(sim_time)
282 };
283
284 (sigmask, timeout)
285 };
286
287 Self::epoll_wait_helper(ctx, epfd, events_ptr, max_events, timeout, sigmask)
288 }
289
290 fn epoll_wait_helper(
291 ctx: &mut SyscallContext,
292 epfd: std::ffi::c_int,
293 events_ptr: ForeignPtr<linux_api::epoll::epoll_event>,
294 max_events: std::ffi::c_int,
295 timeout: Option<SimulationTime>,
296 sigmask: Option<linux_api::signal::sigset_t>,
297 ) -> Result<std::ffi::c_int, SyscallError> {
298 let max_events = {
300 let upper_bound = epoll_max_events_upper_bound();
301
302 if max_events <= 0 || max_events > upper_bound {
303 log::trace!(
304 "Epoll maxevents {max_events} is not greater than 0 \
305 and less than {upper_bound}"
306 );
307 return Err(Errno::EINVAL.into());
308 }
309
310 u32::try_from(max_events).unwrap()
311 };
312
313 if sigmask.is_some() {
315 warn_once_then_trace!(
316 "Epoll pwait called with non-null sigmask, \
317 which is not yet supported by shadow; returning EINVAL"
318 );
319 return Err(Errno::EINVAL.into());
320 }
321
322 let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
324 let epoll = {
325 let desc = Self::get_descriptor(&desc_table, epfd)?;
326
327 let CompatFile::New(epoll) = desc.file() else {
328 return Err(Errno::EINVAL.into());
329 };
330
331 let File::Epoll(epoll) = epoll.inner_file() else {
332 return Err(Errno::EINVAL.into());
333 };
334
335 epoll
336 };
337
338 if epoll.borrow().has_ready_events() {
339 log::trace!("Epoll {epfd} has ready events");
340
341 if events_ptr.is_null() {
346 return Err(Errno::EFAULT.into());
347 }
348
349 let ready = CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
353 epoll
354 .borrow_mut()
355 .collect_ready_events(cb_queue, max_events)
356 });
357 let n_ready = ready.len();
358 if n_ready > max_events as usize {
359 panic!("Epoll should not return more than {max_events} events");
360 }
361
362 let mut mem = ctx.objs.process.memory_borrow_mut();
364 write_events_to_ptr(&mut mem, ready, events_ptr)?;
365
366 log::trace!("Epoll {epfd} returning {n_ready} events");
368 return Ok(n_ready.try_into().unwrap());
369 }
370
371 if let Some(timeout) = timeout {
374 if timeout.is_zero() {
375 log::trace!("No events are ready on epoll {epfd} and the timeout is 0");
376 return Ok(0);
377 }
378 }
379
380 if let Some(cond) = ctx.objs.thread.syscall_condition() {
383 if let Some(abs_timeout) = cond.timeout() {
384 if Worker::current_time().unwrap() >= abs_timeout {
385 log::trace!("No events are ready on epoll {epfd} and the timeout expired");
386 return Ok(0);
387 }
388 }
389 }
390
391 if ctx.objs.thread.unblocked_signal_pending(
393 ctx.objs.process,
394 &ctx.objs.host.shim_shmem_lock_borrow().unwrap(),
395 ) {
396 return Err(SyscallError::new_interrupted(false));
397 }
398
399 let Ok(abs_timeout_opt) = timeout
401 .map(|x| Worker::current_time().unwrap().checked_add(x).ok_or(()))
402 .transpose()
403 else {
404 log::trace!("Epoll wait with invalid timeout {timeout:?} (too large)");
405 return Err(Errno::EINVAL.into());
406 };
407
408 log::trace!("No events are ready on epoll {epfd} and we need to block");
409
410 let mut rv = SyscallError::new_blocked_on_file(
412 File::Epoll(Arc::clone(epoll)),
413 FileState::READABLE,
414 false,
415 );
416
417 if abs_timeout_opt.is_some() {
419 rv.blocked_condition().unwrap().set_timeout(abs_timeout_opt);
420 }
421
422 Err(rv)
423 }
424}
425
426fn timeout_arg_to_maybe_simtime(
427 timeout_ms: std::ffi::c_int,
428) -> Result<Option<SimulationTime>, Errno> {
429 let timeout_ms = (timeout_ms >= 0).then_some(timeout_ms);
431
432 if let Some(timeout_ms) = timeout_ms {
433 let timeout_ms = timeout_ms.try_into().unwrap();
435 let timeout = SimulationTime::try_from_millis(timeout_ms).ok_or(Errno::EINVAL)?;
436 Ok(Some(timeout))
437 } else {
438 Ok(None)
439 }
440}
441
442fn epoll_max_events_upper_bound() -> i32 {
451 let ep_max_events = i32::MAX;
452 let ep_ev_size: i32 = std::mem::size_of::<linux_api::epoll::epoll_event>()
453 .try_into()
454 .unwrap_or(i32::MAX);
455 ep_max_events.saturating_div(ep_ev_size)
456}
457
458fn write_events_to_ptr(
459 mem: &mut MemoryManager,
460 ready: Vec<(EpollEvents, u64)>,
461 events_ptr: ForeignPtr<linux_api::epoll::epoll_event>,
462) -> Result<(), Errno> {
463 let events_ptr = ForeignArrayPtr::new(events_ptr, ready.len());
464 let mut mem_ref = mem.memory_ref_mut(events_ptr)?;
465
466 for ((ev, data), plugin_ev) in ready.iter().zip(mem_ref.deref_mut().iter_mut()) {
467 plugin_ev.events = ev.bits();
468 plugin_ev.data = *data;
469 }
470
471 mem_ref.flush()?;
472
473 Ok(())
474}