shadow_rs/host/syscall/handler/
uio.rs

1use linux_api::errno::Errno;
2use shadow_shim_helper_rs::syscall_types::ForeignPtr;
3
4use crate::cshadow as c;
5use crate::host::descriptor::socket::{RecvmsgArgs, RecvmsgReturn, SendmsgArgs, Socket};
6use crate::host::descriptor::{CompatFile, File, FileState, FileStatus};
7use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
8use crate::host::syscall::io::{self, IoVec};
9use crate::host::syscall::types::{ForeignArrayPtr, SyscallError};
10use crate::utility::callback_queue::CallbackQueue;
11
12impl SyscallHandler {
13    log_syscall!(
14        readv,
15        /* rv */ libc::ssize_t,
16        /* fd */ std::ffi::c_int,
17        /* iov */ *const libc::iovec,
18        /* iovcnt */ std::ffi::c_int,
19    );
20    pub fn readv(
21        ctx: &mut SyscallContext,
22        fd: std::ffi::c_int,
23        iov_ptr: ForeignPtr<libc::iovec>,
24        iov_count: std::ffi::c_int,
25    ) -> Result<libc::ssize_t, SyscallError> {
26        // if we were previously blocked, get the active file from the last syscall handler
27        // invocation since it may no longer exist in the descriptor table
28        let file = ctx
29            .objs
30            .thread
31            .syscall_condition()
32            // if this was for a C descriptor, then there won't be an active file object
33            .and_then(|x| x.active_file().cloned());
34
35        let file = match file {
36            // we were previously blocked, so re-use the file from the previous syscall invocation
37            Some(x) => x,
38            // get the file from the descriptor table, or return early if it doesn't exist
39            None => {
40                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
41                match Self::get_descriptor(&desc_table, fd)?.file() {
42                    CompatFile::New(file) => file.clone(),
43                    // if it's a legacy file, use the C syscall handler instead
44                    CompatFile::Legacy(_) => {
45                        drop(desc_table);
46                        return Self::legacy_syscall(c::syscallhandler_readv, ctx);
47                    }
48                }
49            }
50        };
51
52        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
53
54        let iovs = {
55            let mem = ctx.objs.process.memory_borrow_mut();
56            io::read_iovecs(&mem, iov_ptr, iov_count)?
57        };
58        assert_eq!(iovs.len(), iov_count);
59
60        let mut result = Self::readv_helper(ctx, file.inner_file(), &iovs, None, 0);
61
62        // if the syscall will block, keep the file open until the syscall restarts
63        if let Some(err) = result.as_mut().err() {
64            if let Some(cond) = err.blocked_condition() {
65                cond.set_active_file(file);
66            }
67        }
68
69        let bytes_read = result?;
70        Ok(bytes_read)
71    }
72
73    log_syscall!(
74        preadv,
75        /* rv */ libc::ssize_t,
76        /* fd */ std::ffi::c_int,
77        /* iov */ *const libc::iovec,
78        /* iovcnt */ std::ffi::c_int,
79        /* pos_l */ libc::c_ulong,
80        /* pos_h */ libc::c_ulong,
81    );
82    pub fn preadv(
83        ctx: &mut SyscallContext,
84        fd: std::ffi::c_int,
85        iov_ptr: ForeignPtr<libc::iovec>,
86        iov_count: std::ffi::c_int,
87        offset_l: libc::c_ulong,
88        _offset_h: libc::c_ulong,
89    ) -> Result<libc::ssize_t, SyscallError> {
90        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
91        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
92        let offset = offset_l as libc::off_t;
93
94        // if we were previously blocked, get the active file from the last syscall handler
95        // invocation since it may no longer exist in the descriptor table
96        let file = ctx
97            .objs
98            .thread
99            .syscall_condition()
100            // if this was for a C descriptor, then there won't be an active file object
101            .and_then(|x| x.active_file().cloned());
102
103        let file = match file {
104            // we were previously blocked, so re-use the file from the previous syscall invocation
105            Some(x) => x,
106            // get the file from the descriptor table, or return early if it doesn't exist
107            None => {
108                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
109                match Self::get_descriptor(&desc_table, fd)?.file() {
110                    CompatFile::New(file) => file.clone(),
111                    // if it's a legacy file, use the C syscall handler instead
112                    CompatFile::Legacy(_) => {
113                        drop(desc_table);
114                        return Self::legacy_syscall(c::syscallhandler_preadv, ctx);
115                    }
116                }
117            }
118        };
119
120        // make sure the offset is not negative
121        if offset < 0 {
122            return Err(Errno::EINVAL.into());
123        }
124
125        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
126
127        let iovs = {
128            let mem = ctx.objs.process.memory_borrow_mut();
129            io::read_iovecs(&mem, iov_ptr, iov_count)?
130        };
131        assert_eq!(iovs.len(), iov_count);
132
133        let mut result = Self::readv_helper(ctx, file.inner_file(), &iovs, Some(offset), 0);
134
135        // if the syscall will block, keep the file open until the syscall restarts
136        if let Some(err) = result.as_mut().err() {
137            if let Some(cond) = err.blocked_condition() {
138                cond.set_active_file(file);
139            }
140        }
141
142        let bytes_read = result?;
143        Ok(bytes_read)
144    }
145
146    log_syscall!(
147        preadv2,
148        /* rv */ libc::ssize_t,
149        /* fd */ std::ffi::c_int,
150        /* iov */ *const libc::iovec,
151        /* iovcnt */ std::ffi::c_int,
152        /* pos_l */ libc::c_ulong,
153        /* pos_h */ libc::c_ulong,
154        /* flags */ std::ffi::c_int,
155    );
156    pub fn preadv2(
157        ctx: &mut SyscallContext,
158        fd: std::ffi::c_int,
159        iov_ptr: ForeignPtr<libc::iovec>,
160        iov_count: std::ffi::c_int,
161        offset_l: libc::c_ulong,
162        _offset_h: libc::c_ulong,
163        flags: std::ffi::c_int,
164    ) -> Result<libc::ssize_t, SyscallError> {
165        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
166        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
167        let offset = offset_l as libc::off_t;
168
169        // if we were previously blocked, get the active file from the last syscall handler
170        // invocation since it may no longer exist in the descriptor table
171        let file = ctx
172            .objs
173            .thread
174            .syscall_condition()
175            // if this was for a C descriptor, then there won't be an active file object
176            .and_then(|x| x.active_file().cloned());
177
178        let file = match file {
179            // we were previously blocked, so re-use the file from the previous syscall invocation
180            Some(x) => x,
181            // get the file from the descriptor table, or return early if it doesn't exist
182            None => {
183                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
184                match Self::get_descriptor(&desc_table, fd)?.file() {
185                    CompatFile::New(file) => file.clone(),
186                    // if it's a legacy file, use the C syscall handler instead
187                    CompatFile::Legacy(_) => {
188                        drop(desc_table);
189                        return Self::legacy_syscall(c::syscallhandler_preadv2, ctx);
190                    }
191                }
192            }
193        };
194
195        // readv(2): "Unlike preadv() and pwritev(), if the offset argument is -1, then the current
196        // file offset is used and updated."
197        let offset = (offset != -1).then_some(offset);
198
199        // if the offset is set, make sure it's not negative
200        if let Some(offset) = offset {
201            if offset < 0 {
202                return Err(Errno::EINVAL.into());
203            }
204        }
205
206        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
207
208        let iovs = {
209            let mem = ctx.objs.process.memory_borrow_mut();
210            io::read_iovecs(&mem, iov_ptr, iov_count)?
211        };
212        assert_eq!(iovs.len(), iov_count);
213
214        let mut result = Self::readv_helper(ctx, file.inner_file(), &iovs, offset, flags);
215
216        // if the syscall will block, keep the file open until the syscall restarts
217        if let Some(err) = result.as_mut().err() {
218            if let Some(cond) = err.blocked_condition() {
219                cond.set_active_file(file);
220            }
221        }
222
223        let bytes_read = result?;
224        Ok(bytes_read)
225    }
226
227    pub fn readv_helper(
228        ctx: &mut SyscallContext,
229        file: &File,
230        iovs: &[IoVec],
231        offset: Option<libc::off_t>,
232        flags: std::ffi::c_int,
233    ) -> Result<libc::ssize_t, SyscallError> {
234        let mut mem = ctx.objs.process.memory_borrow_mut();
235
236        // if it's a socket, call recvmsg_helper() instead
237        if let File::Socket(socket) = file {
238            if offset.is_some() {
239                // sockets don't support offsets
240                return Err(Errno::ESPIPE.into());
241            }
242
243            // experimentally, it seems that read() calls on sockets with 0-length buffers will
244            // always return 0, even if there would otherwise be an EWOULDBOCK from a recv() call
245            // (see the `test_zero_len_buf_read_and_recv` and `test_zero_len_msg_read_and_recv`
246            // send/recv tests for examples)
247            if iovs.iter().map(|x| x.len).sum::<usize>() == 0 {
248                return Ok(0);
249            }
250
251            let args = RecvmsgArgs {
252                iovs,
253                control_ptr: ForeignArrayPtr::new(ForeignPtr::null(), 0),
254                flags: 0,
255            };
256
257            // call the socket's recvmsg(), and run any resulting events
258            let RecvmsgReturn { return_val, .. } =
259                CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
260                    Socket::recvmsg(socket, args, &mut mem, cb_queue)
261                })?;
262
263            return Ok(return_val);
264        }
265
266        let file_status = file.borrow().status();
267
268        let result =
269            // call the file's read(), and run any resulting events
270            CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
271                file.borrow_mut().readv(
272                    iovs,
273                    offset,
274                    flags,
275                    &mut mem,
276                    cb_queue,
277                )
278            });
279
280        // if the syscall would block and it's a blocking descriptor
281        if result == Err(Errno::EWOULDBLOCK.into()) && !file_status.contains(FileStatus::NONBLOCK) {
282            // TODO: should we block on the READABLE, HUP, and RDHUP states?
283            // https://github.com/shadow/shadow/issues/2181
284            let wait_for = FileState::READABLE;
285
286            // check that we're not already in the state that we're going to wait for
287            debug_assert!(!file.borrow().state().intersects(wait_for));
288
289            return Err(SyscallError::new_blocked_on_file(
290                file.clone(),
291                wait_for,
292                file.borrow().supports_sa_restart(),
293            ));
294        }
295
296        result
297    }
298
299    log_syscall!(
300        writev,
301        /* rv */ libc::ssize_t,
302        /* fd */ std::ffi::c_int,
303        /* iov */ *const libc::iovec,
304        /* iovcnt */ std::ffi::c_int,
305    );
306    pub fn writev(
307        ctx: &mut SyscallContext,
308        fd: std::ffi::c_int,
309        iov_ptr: ForeignPtr<libc::iovec>,
310        iov_count: std::ffi::c_int,
311    ) -> Result<libc::ssize_t, SyscallError> {
312        // if we were previously blocked, get the active file from the last syscall handler
313        // invocation since it may no longer exist in the descriptor table
314        let file = ctx
315            .objs
316            .thread
317            .syscall_condition()
318            // if this was for a C descriptor, then there won't be an active file object
319            .and_then(|x| x.active_file().cloned());
320
321        let file = match file {
322            // we were previously blocked, so re-use the file from the previous syscall invocation
323            Some(x) => x,
324            // get the file from the descriptor table, or return early if it doesn't exist
325            None => {
326                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
327                match Self::get_descriptor(&desc_table, fd)?.file() {
328                    CompatFile::New(file) => file.clone(),
329                    // if it's a legacy file, use the C syscall handler instead
330                    CompatFile::Legacy(_) => {
331                        drop(desc_table);
332                        return Self::legacy_syscall(c::syscallhandler_writev, ctx);
333                    }
334                }
335            }
336        };
337
338        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
339
340        let iovs = {
341            let mem = ctx.objs.process.memory_borrow_mut();
342            io::read_iovecs(&mem, iov_ptr, iov_count)?
343        };
344        assert_eq!(iovs.len(), iov_count);
345
346        let mut result = Self::writev_helper(ctx, file.inner_file(), &iovs, None, 0);
347
348        // if the syscall will block, keep the file open until the syscall restarts
349        if let Some(err) = result.as_mut().err() {
350            if let Some(cond) = err.blocked_condition() {
351                cond.set_active_file(file);
352            }
353        }
354
355        let bytes_written = result?;
356        Ok(bytes_written)
357    }
358
359    log_syscall!(
360        pwritev,
361        /* rv */ libc::ssize_t,
362        /* fd */ std::ffi::c_int,
363        /* iov */ *const libc::iovec,
364        /* iovcnt */ std::ffi::c_int,
365        /* pos_l */ libc::c_ulong,
366        /* pos_h */ libc::c_ulong,
367    );
368    pub fn pwritev(
369        ctx: &mut SyscallContext,
370        fd: std::ffi::c_int,
371        iov_ptr: ForeignPtr<libc::iovec>,
372        iov_count: std::ffi::c_int,
373        offset_l: libc::c_ulong,
374        _offset_h: libc::c_ulong,
375    ) -> Result<libc::ssize_t, SyscallError> {
376        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
377        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
378        let offset = offset_l as libc::off_t;
379
380        // if we were previously blocked, get the active file from the last syscall handler
381        // invocation since it may no longer exist in the descriptor table
382        let file = ctx
383            .objs
384            .thread
385            .syscall_condition()
386            // if this was for a C descriptor, then there won't be an active file object
387            .and_then(|x| x.active_file().cloned());
388
389        let file = match file {
390            // we were previously blocked, so re-use the file from the previous syscall invocation
391            Some(x) => x,
392            // get the file from the descriptor table, or return early if it doesn't exist
393            None => {
394                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
395                match Self::get_descriptor(&desc_table, fd)?.file() {
396                    CompatFile::New(file) => file.clone(),
397                    // if it's a legacy file, use the C syscall handler instead
398                    CompatFile::Legacy(_) => {
399                        drop(desc_table);
400                        return Self::legacy_syscall(c::syscallhandler_pwritev, ctx);
401                    }
402                }
403            }
404        };
405
406        // make sure the offset is not negative
407        if offset < 0 {
408            return Err(Errno::EINVAL.into());
409        }
410
411        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
412
413        let iovs = {
414            let mem = ctx.objs.process.memory_borrow_mut();
415            io::read_iovecs(&mem, iov_ptr, iov_count)?
416        };
417        assert_eq!(iovs.len(), iov_count);
418
419        let mut result = Self::writev_helper(ctx, file.inner_file(), &iovs, Some(offset), 0);
420
421        // if the syscall will block, keep the file open until the syscall restarts
422        if let Some(err) = result.as_mut().err() {
423            if let Some(cond) = err.blocked_condition() {
424                cond.set_active_file(file);
425            }
426        }
427
428        let bytes_written = result?;
429        Ok(bytes_written)
430    }
431
432    log_syscall!(
433        pwritev2,
434        /* rv */ libc::ssize_t,
435        /* fd */ std::ffi::c_int,
436        /* iov */ *const libc::iovec,
437        /* iovcnt */ std::ffi::c_int,
438        /* pos_l */ libc::c_ulong,
439        /* pos_h */ libc::c_ulong,
440        /* flags */ std::ffi::c_int,
441    );
442    pub fn pwritev2(
443        ctx: &mut SyscallContext,
444        fd: std::ffi::c_int,
445        iov_ptr: ForeignPtr<libc::iovec>,
446        iov_count: std::ffi::c_int,
447        offset_l: libc::c_ulong,
448        _offset_h: libc::c_ulong,
449        flags: std::ffi::c_int,
450    ) -> Result<libc::ssize_t, SyscallError> {
451        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
452        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
453        let offset = offset_l as libc::off_t;
454
455        // if we were previously blocked, get the active file from the last syscall handler
456        // invocation since it may no longer exist in the descriptor table
457        let file = ctx
458            .objs
459            .thread
460            .syscall_condition()
461            // if this was for a C descriptor, then there won't be an active file object
462            .and_then(|x| x.active_file().cloned());
463
464        let file = match file {
465            // we were previously blocked, so re-use the file from the previous syscall invocation
466            Some(x) => x,
467            // get the file from the descriptor table, or return early if it doesn't exist
468            None => {
469                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
470                match Self::get_descriptor(&desc_table, fd)?.file() {
471                    CompatFile::New(file) => file.clone(),
472                    // if it's a legacy file, use the C syscall handler instead
473                    CompatFile::Legacy(_) => {
474                        drop(desc_table);
475                        return Self::legacy_syscall(c::syscallhandler_pwritev2, ctx);
476                    }
477                }
478            }
479        };
480
481        // readv(2): "Unlike preadv() and pwritev(), if the offset argument is -1, then the current
482        // file offset is used and updated."
483        let offset = (offset != -1).then_some(offset);
484
485        // if the offset is set, make sure it's not negative
486        if let Some(offset) = offset {
487            if offset < 0 {
488                return Err(Errno::EINVAL.into());
489            }
490        }
491
492        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
493
494        let iovs = {
495            let mem = ctx.objs.process.memory_borrow_mut();
496            io::read_iovecs(&mem, iov_ptr, iov_count)?
497        };
498        assert_eq!(iovs.len(), iov_count);
499
500        let mut result = Self::writev_helper(ctx, file.inner_file(), &iovs, offset, flags);
501
502        // if the syscall will block, keep the file open until the syscall restarts
503        if let Some(err) = result.as_mut().err() {
504            if let Some(cond) = err.blocked_condition() {
505                cond.set_active_file(file);
506            }
507        }
508
509        let bytes_written = result?;
510        Ok(bytes_written)
511    }
512
513    pub fn writev_helper(
514        ctx: &mut SyscallContext,
515        file: &File,
516        iovs: &[IoVec],
517        offset: Option<libc::off_t>,
518        flags: std::ffi::c_int,
519    ) -> Result<libc::ssize_t, SyscallError> {
520        let mut mem = ctx.objs.process.memory_borrow_mut();
521        let mut rng = ctx.objs.host.random_mut();
522        let net_ns = ctx.objs.host.network_namespace_borrow();
523
524        // if it's a socket, call sendmsg_helper() instead
525        if let File::Socket(socket) = file {
526            if offset.is_some() {
527                // sockets don't support offsets
528                return Err(Errno::ESPIPE.into());
529            }
530
531            let args = SendmsgArgs {
532                addr: None,
533                iovs,
534                control_ptr: ForeignArrayPtr::new(ForeignPtr::null(), 0),
535                flags: 0,
536            };
537
538            // call the socket's sendmsg(), and run any resulting events
539            let bytes_written = CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
540                Socket::sendmsg(socket, args, &mut mem, &net_ns, &mut *rng, cb_queue)
541            })?;
542
543            return Ok(bytes_written);
544        }
545
546        let file_status = file.borrow().status();
547
548        let result =
549            // call the file's write(), and run any resulting events
550            CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
551                file.borrow_mut().writev(
552                    iovs,
553                    offset,
554                    flags,
555                    &mut mem,
556                    cb_queue,
557                )
558            });
559
560        // if the syscall would block and it's a blocking descriptor
561        if result == Err(Errno::EWOULDBLOCK.into()) && !file_status.contains(FileStatus::NONBLOCK) {
562            // TODO: should we block on the WRITABLE and HUP states?
563            // https://github.com/shadow/shadow/issues/2181
564            let wait_for = FileState::WRITABLE;
565
566            // check that we're not already in the state that we're going to wait for
567            debug_assert!(!file.borrow().state().intersects(wait_for));
568
569            return Err(SyscallError::new_blocked_on_file(
570                file.clone(),
571                wait_for,
572                file.borrow().supports_sa_restart(),
573            ));
574        }
575
576        result
577    }
578}