shadow_rs/host/syscall/handler/
uio.rs

1use linux_api::errno::Errno;
2use shadow_shim_helper_rs::syscall_types::ForeignPtr;
3
4use crate::cshadow as c;
5use crate::host::descriptor::socket::{RecvmsgArgs, RecvmsgReturn, SendmsgArgs, Socket};
6use crate::host::descriptor::{CompatFile, File, FileState, FileStatus};
7use crate::host::syscall::handler::{SyscallContext, SyscallHandler};
8use crate::host::syscall::io::{self, IoVec};
9use crate::host::syscall::types::{ForeignArrayPtr, SyscallError};
10use crate::utility::callback_queue::CallbackQueue;
11
12impl SyscallHandler {
13    log_syscall!(
14        readv,
15        /* rv */ libc::ssize_t,
16        /* fd */ std::ffi::c_int,
17        /* iov */ *const libc::iovec,
18        /* iovcnt */ std::ffi::c_int,
19    );
20    pub fn readv(
21        ctx: &mut SyscallContext,
22        fd: std::ffi::c_int,
23        iov_ptr: ForeignPtr<libc::iovec>,
24        iov_count: std::ffi::c_int,
25    ) -> Result<libc::ssize_t, SyscallError> {
26        // if we were previously blocked, get the active file from the last syscall handler
27        // invocation since it may no longer exist in the descriptor table
28        let file = ctx
29            .objs
30            .thread
31            .syscall_condition()
32            // if this was for a C descriptor, then there won't be an active file object
33            .and_then(|x| x.active_file().cloned());
34
35        let file = match file {
36            // we were previously blocked, so re-use the file from the previous syscall invocation
37            Some(x) => x,
38            // get the file from the descriptor table, or return early if it doesn't exist
39            None => {
40                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
41                match Self::get_descriptor(&desc_table, fd)?.file() {
42                    CompatFile::New(file) => file.clone(),
43                    // if it's a legacy file, use the C syscall handler instead
44                    CompatFile::Legacy(_) => {
45                        drop(desc_table);
46                        return Self::legacy_syscall(c::syscallhandler_readv, ctx);
47                    }
48                }
49            }
50        };
51
52        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
53
54        let iovs = {
55            let mem = ctx.objs.process.memory_borrow_mut();
56            io::read_iovecs(&mem, iov_ptr, iov_count)?
57        };
58        assert_eq!(iovs.len(), iov_count);
59
60        let mut result = Self::readv_helper(ctx, file.inner_file(), &iovs, None, 0);
61
62        // if the syscall will block, keep the file open until the syscall restarts
63        if let Some(err) = result.as_mut().err() {
64            if let Some(cond) = err.blocked_condition() {
65                cond.set_active_file(file);
66            }
67        }
68
69        let bytes_read = result?;
70        Ok(bytes_read)
71    }
72
73    log_syscall!(
74        preadv,
75        /* rv */ libc::ssize_t,
76        /* fd */ std::ffi::c_int,
77        /* iov */ *const libc::iovec,
78        /* iovcnt */ std::ffi::c_int,
79        /* pos_l */ libc::c_ulong,
80        /* pos_h */ libc::c_ulong,
81    );
82    pub fn preadv(
83        ctx: &mut SyscallContext,
84        fd: std::ffi::c_int,
85        iov_ptr: ForeignPtr<libc::iovec>,
86        iov_count: std::ffi::c_int,
87        offset_l: libc::c_ulong,
88        _offset_h: libc::c_ulong,
89    ) -> Result<libc::ssize_t, SyscallError> {
90        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
91        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
92        let offset = offset_l as libc::off_t;
93
94        // if we were previously blocked, get the active file from the last syscall handler
95        // invocation since it may no longer exist in the descriptor table
96        let file = ctx
97            .objs
98            .thread
99            .syscall_condition()
100            // if this was for a C descriptor, then there won't be an active file object
101            .and_then(|x| x.active_file().cloned());
102
103        let file = match file {
104            // we were previously blocked, so re-use the file from the previous syscall invocation
105            Some(x) => x,
106            // get the file from the descriptor table, or return early if it doesn't exist
107            None => {
108                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
109                match Self::get_descriptor(&desc_table, fd)?.file() {
110                    CompatFile::New(file) => file.clone(),
111                    // if it's a legacy file, use the C syscall handler instead
112                    CompatFile::Legacy(_) => {
113                        drop(desc_table);
114                        return Self::legacy_syscall(c::syscallhandler_preadv, ctx);
115                    }
116                }
117            }
118        };
119
120        // make sure the offset is not negative
121        if offset < 0 {
122            return Err(Errno::EINVAL.into());
123        }
124
125        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
126
127        let iovs = {
128            let mem = ctx.objs.process.memory_borrow_mut();
129            io::read_iovecs(&mem, iov_ptr, iov_count)?
130        };
131        assert_eq!(iovs.len(), iov_count);
132
133        let mut result = Self::readv_helper(ctx, file.inner_file(), &iovs, Some(offset), 0);
134
135        // if the syscall will block, keep the file open until the syscall restarts
136        if let Some(err) = result.as_mut().err() {
137            if let Some(cond) = err.blocked_condition() {
138                cond.set_active_file(file);
139            }
140        }
141
142        let bytes_read = result?;
143        Ok(bytes_read)
144    }
145
146    log_syscall!(
147        preadv2,
148        /* rv */ libc::ssize_t,
149        /* fd */ std::ffi::c_int,
150        /* iov */ *const libc::iovec,
151        /* iovcnt */ std::ffi::c_int,
152        /* pos_l */ libc::c_ulong,
153        /* pos_h */ libc::c_ulong,
154        /* flags */ std::ffi::c_int,
155    );
156    pub fn preadv2(
157        ctx: &mut SyscallContext,
158        fd: std::ffi::c_int,
159        iov_ptr: ForeignPtr<libc::iovec>,
160        iov_count: std::ffi::c_int,
161        offset_l: libc::c_ulong,
162        _offset_h: libc::c_ulong,
163        flags: std::ffi::c_int,
164    ) -> Result<libc::ssize_t, SyscallError> {
165        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
166        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
167        let offset = offset_l as libc::off_t;
168
169        // if we were previously blocked, get the active file from the last syscall handler
170        // invocation since it may no longer exist in the descriptor table
171        let file = ctx
172            .objs
173            .thread
174            .syscall_condition()
175            // if this was for a C descriptor, then there won't be an active file object
176            .and_then(|x| x.active_file().cloned());
177
178        let file = match file {
179            // we were previously blocked, so re-use the file from the previous syscall invocation
180            Some(x) => x,
181            // get the file from the descriptor table, or return early if it doesn't exist
182            None => {
183                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
184                match Self::get_descriptor(&desc_table, fd)?.file() {
185                    CompatFile::New(file) => file.clone(),
186                    // if it's a legacy file, use the C syscall handler instead
187                    CompatFile::Legacy(_) => {
188                        drop(desc_table);
189                        return Self::legacy_syscall(c::syscallhandler_preadv2, ctx);
190                    }
191                }
192            }
193        };
194
195        // readv(2): "Unlike preadv() and pwritev(), if the offset argument is -1, then the current
196        // file offset is used and updated."
197        let offset = (offset != -1).then_some(offset);
198
199        // if the offset is set, make sure it's not negative
200        if let Some(offset) = offset {
201            if offset < 0 {
202                return Err(Errno::EINVAL.into());
203            }
204        }
205
206        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
207
208        let iovs = {
209            let mem = ctx.objs.process.memory_borrow_mut();
210            io::read_iovecs(&mem, iov_ptr, iov_count)?
211        };
212        assert_eq!(iovs.len(), iov_count);
213
214        let mut result = Self::readv_helper(ctx, file.inner_file(), &iovs, offset, flags);
215
216        // if the syscall will block, keep the file open until the syscall restarts
217        if let Some(err) = result.as_mut().err() {
218            if let Some(cond) = err.blocked_condition() {
219                cond.set_active_file(file);
220            }
221        }
222
223        let bytes_read = result?;
224        Ok(bytes_read)
225    }
226
227    pub fn readv_helper(
228        ctx: &mut SyscallContext,
229        file: &File,
230        iovs: &[IoVec],
231        offset: Option<libc::off_t>,
232        flags: std::ffi::c_int,
233    ) -> Result<libc::ssize_t, SyscallError> {
234        let mut mem = ctx.objs.process.memory_borrow_mut();
235
236        // if it's a socket, call recvmsg_helper() instead
237        if let File::Socket(socket) = file {
238            if offset.is_some() {
239                // sockets don't support offsets
240                return Err(Errno::ESPIPE.into());
241            }
242
243            // experimentally, it seems that read() calls on sockets with 0-length buffers will
244            // always return 0, even if there would otherwise be an EWOULDBOCK from a recv() call
245            // (see the `test_zero_len_buf_read_and_recv` and `test_zero_len_msg_read_and_recv`
246            // send/recv tests for examples)
247            if iovs.iter().map(|x| x.len).sum::<usize>() == 0 {
248                return Ok(0);
249            }
250
251            let args = RecvmsgArgs {
252                iovs,
253                control_ptr: ForeignArrayPtr::new(ForeignPtr::null(), 0),
254                flags: 0,
255            };
256
257            // call the socket's recvmsg(), and run any resulting events
258            let RecvmsgReturn { return_val, .. } =
259                CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
260                    Socket::recvmsg(socket, args, &mut mem, cb_queue)
261                })?;
262
263            return Ok(return_val);
264        }
265
266        let file_status = file.borrow().status();
267
268        let result =
269            // call the file's read(), and run any resulting events
270            CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
271                file.borrow_mut().readv(
272                    iovs,
273                    offset,
274                    flags,
275                    &mut mem,
276                    cb_queue,
277                )
278            });
279
280        // if the syscall would block and it's a blocking descriptor
281        if result == Err(Errno::EWOULDBLOCK.into()) && !file_status.contains(FileStatus::NONBLOCK) {
282            return Err(SyscallError::new_blocked_on_file(
283                file.clone(),
284                FileState::READABLE,
285                file.borrow().supports_sa_restart(),
286            ));
287        }
288
289        result
290    }
291
292    log_syscall!(
293        writev,
294        /* rv */ libc::ssize_t,
295        /* fd */ std::ffi::c_int,
296        /* iov */ *const libc::iovec,
297        /* iovcnt */ std::ffi::c_int,
298    );
299    pub fn writev(
300        ctx: &mut SyscallContext,
301        fd: std::ffi::c_int,
302        iov_ptr: ForeignPtr<libc::iovec>,
303        iov_count: std::ffi::c_int,
304    ) -> Result<libc::ssize_t, SyscallError> {
305        // if we were previously blocked, get the active file from the last syscall handler
306        // invocation since it may no longer exist in the descriptor table
307        let file = ctx
308            .objs
309            .thread
310            .syscall_condition()
311            // if this was for a C descriptor, then there won't be an active file object
312            .and_then(|x| x.active_file().cloned());
313
314        let file = match file {
315            // we were previously blocked, so re-use the file from the previous syscall invocation
316            Some(x) => x,
317            // get the file from the descriptor table, or return early if it doesn't exist
318            None => {
319                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
320                match Self::get_descriptor(&desc_table, fd)?.file() {
321                    CompatFile::New(file) => file.clone(),
322                    // if it's a legacy file, use the C syscall handler instead
323                    CompatFile::Legacy(_) => {
324                        drop(desc_table);
325                        return Self::legacy_syscall(c::syscallhandler_writev, ctx);
326                    }
327                }
328            }
329        };
330
331        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
332
333        let iovs = {
334            let mem = ctx.objs.process.memory_borrow_mut();
335            io::read_iovecs(&mem, iov_ptr, iov_count)?
336        };
337        assert_eq!(iovs.len(), iov_count);
338
339        let mut result = Self::writev_helper(ctx, file.inner_file(), &iovs, None, 0);
340
341        // if the syscall will block, keep the file open until the syscall restarts
342        if let Some(err) = result.as_mut().err() {
343            if let Some(cond) = err.blocked_condition() {
344                cond.set_active_file(file);
345            }
346        }
347
348        let bytes_written = result?;
349        Ok(bytes_written)
350    }
351
352    log_syscall!(
353        pwritev,
354        /* rv */ libc::ssize_t,
355        /* fd */ std::ffi::c_int,
356        /* iov */ *const libc::iovec,
357        /* iovcnt */ std::ffi::c_int,
358        /* pos_l */ libc::c_ulong,
359        /* pos_h */ libc::c_ulong,
360    );
361    pub fn pwritev(
362        ctx: &mut SyscallContext,
363        fd: std::ffi::c_int,
364        iov_ptr: ForeignPtr<libc::iovec>,
365        iov_count: std::ffi::c_int,
366        offset_l: libc::c_ulong,
367        _offset_h: libc::c_ulong,
368    ) -> Result<libc::ssize_t, SyscallError> {
369        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
370        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
371        let offset = offset_l as libc::off_t;
372
373        // if we were previously blocked, get the active file from the last syscall handler
374        // invocation since it may no longer exist in the descriptor table
375        let file = ctx
376            .objs
377            .thread
378            .syscall_condition()
379            // if this was for a C descriptor, then there won't be an active file object
380            .and_then(|x| x.active_file().cloned());
381
382        let file = match file {
383            // we were previously blocked, so re-use the file from the previous syscall invocation
384            Some(x) => x,
385            // get the file from the descriptor table, or return early if it doesn't exist
386            None => {
387                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
388                match Self::get_descriptor(&desc_table, fd)?.file() {
389                    CompatFile::New(file) => file.clone(),
390                    // if it's a legacy file, use the C syscall handler instead
391                    CompatFile::Legacy(_) => {
392                        drop(desc_table);
393                        return Self::legacy_syscall(c::syscallhandler_pwritev, ctx);
394                    }
395                }
396            }
397        };
398
399        // make sure the offset is not negative
400        if offset < 0 {
401            return Err(Errno::EINVAL.into());
402        }
403
404        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
405
406        let iovs = {
407            let mem = ctx.objs.process.memory_borrow_mut();
408            io::read_iovecs(&mem, iov_ptr, iov_count)?
409        };
410        assert_eq!(iovs.len(), iov_count);
411
412        let mut result = Self::writev_helper(ctx, file.inner_file(), &iovs, Some(offset), 0);
413
414        // if the syscall will block, keep the file open until the syscall restarts
415        if let Some(err) = result.as_mut().err() {
416            if let Some(cond) = err.blocked_condition() {
417                cond.set_active_file(file);
418            }
419        }
420
421        let bytes_written = result?;
422        Ok(bytes_written)
423    }
424
425    log_syscall!(
426        pwritev2,
427        /* rv */ libc::ssize_t,
428        /* fd */ std::ffi::c_int,
429        /* iov */ *const libc::iovec,
430        /* iovcnt */ std::ffi::c_int,
431        /* pos_l */ libc::c_ulong,
432        /* pos_h */ libc::c_ulong,
433        /* flags */ std::ffi::c_int,
434    );
435    pub fn pwritev2(
436        ctx: &mut SyscallContext,
437        fd: std::ffi::c_int,
438        iov_ptr: ForeignPtr<libc::iovec>,
439        iov_count: std::ffi::c_int,
440        offset_l: libc::c_ulong,
441        _offset_h: libc::c_ulong,
442        flags: std::ffi::c_int,
443    ) -> Result<libc::ssize_t, SyscallError> {
444        // on Linux x86-64, an `unsigned long` is 64 bits, so we can ignore `offset_h`
445        static_assertions::assert_eq_size!(libc::c_ulong, libc::off_t);
446        let offset = offset_l as libc::off_t;
447
448        // if we were previously blocked, get the active file from the last syscall handler
449        // invocation since it may no longer exist in the descriptor table
450        let file = ctx
451            .objs
452            .thread
453            .syscall_condition()
454            // if this was for a C descriptor, then there won't be an active file object
455            .and_then(|x| x.active_file().cloned());
456
457        let file = match file {
458            // we were previously blocked, so re-use the file from the previous syscall invocation
459            Some(x) => x,
460            // get the file from the descriptor table, or return early if it doesn't exist
461            None => {
462                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
463                match Self::get_descriptor(&desc_table, fd)?.file() {
464                    CompatFile::New(file) => file.clone(),
465                    // if it's a legacy file, use the C syscall handler instead
466                    CompatFile::Legacy(_) => {
467                        drop(desc_table);
468                        return Self::legacy_syscall(c::syscallhandler_pwritev2, ctx);
469                    }
470                }
471            }
472        };
473
474        // readv(2): "Unlike preadv() and pwritev(), if the offset argument is -1, then the current
475        // file offset is used and updated."
476        let offset = (offset != -1).then_some(offset);
477
478        // if the offset is set, make sure it's not negative
479        if let Some(offset) = offset {
480            if offset < 0 {
481                return Err(Errno::EINVAL.into());
482            }
483        }
484
485        let iov_count = iov_count.try_into().or(Err(Errno::EINVAL))?;
486
487        let iovs = {
488            let mem = ctx.objs.process.memory_borrow_mut();
489            io::read_iovecs(&mem, iov_ptr, iov_count)?
490        };
491        assert_eq!(iovs.len(), iov_count);
492
493        let mut result = Self::writev_helper(ctx, file.inner_file(), &iovs, offset, flags);
494
495        // if the syscall will block, keep the file open until the syscall restarts
496        if let Some(err) = result.as_mut().err() {
497            if let Some(cond) = err.blocked_condition() {
498                cond.set_active_file(file);
499            }
500        }
501
502        let bytes_written = result?;
503        Ok(bytes_written)
504    }
505
506    pub fn writev_helper(
507        ctx: &mut SyscallContext,
508        file: &File,
509        iovs: &[IoVec],
510        offset: Option<libc::off_t>,
511        flags: std::ffi::c_int,
512    ) -> Result<libc::ssize_t, SyscallError> {
513        let mut mem = ctx.objs.process.memory_borrow_mut();
514        let mut rng = ctx.objs.host.random_mut();
515        let net_ns = ctx.objs.host.network_namespace_borrow();
516
517        // if it's a socket, call sendmsg_helper() instead
518        if let File::Socket(socket) = file {
519            if offset.is_some() {
520                // sockets don't support offsets
521                return Err(Errno::ESPIPE.into());
522            }
523
524            let args = SendmsgArgs {
525                addr: None,
526                iovs,
527                control_ptr: ForeignArrayPtr::new(ForeignPtr::null(), 0),
528                flags: 0,
529            };
530
531            // call the socket's sendmsg(), and run any resulting events
532            let bytes_written = CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
533                Socket::sendmsg(socket, args, &mut mem, &net_ns, &mut *rng, cb_queue)
534            })?;
535
536            return Ok(bytes_written);
537        }
538
539        let file_status = file.borrow().status();
540
541        let result =
542            // call the file's write(), and run any resulting events
543            CallbackQueue::queue_and_run_with_legacy(|cb_queue| {
544                file.borrow_mut().writev(
545                    iovs,
546                    offset,
547                    flags,
548                    &mut mem,
549                    cb_queue,
550                )
551            });
552
553        // if the syscall would block and it's a blocking descriptor
554        if result == Err(Errno::EWOULDBLOCK.into()) && !file_status.contains(FileStatus::NONBLOCK) {
555            return Err(SyscallError::new_blocked_on_file(
556                file.clone(),
557                FileState::WRITABLE,
558                file.borrow().supports_sa_restart(),
559            ));
560        }
561
562        result
563    }
564}