shadow_rs/host/syscall/handler/
mman.rs

1use std::os::unix::ffi::OsStrExt;
2use std::path::PathBuf;
3
4use linux_api::errno::Errno;
5use linux_api::fcntl::OFlag;
6use linux_api::mman::{MapFlags, ProtFlags};
7use shadow_shim_helper_rs::syscall_types::ForeignPtr;
8
9use crate::cshadow as c;
10use crate::host::descriptor::{CompatFile, FileState};
11use crate::host::memory_manager::AllocdMem;
12use crate::host::syscall::handler::{SyscallContext, SyscallHandler, ThreadContext};
13use crate::host::syscall::types::SyscallError;
14
15impl SyscallHandler {
16    log_syscall!(
17        brk,
18        /* rv */ std::ffi::c_int,
19        /* addr */ *const std::ffi::c_void,
20    );
21    pub fn brk(
22        ctx: &mut SyscallContext,
23        addr: ForeignPtr<u8>,
24    ) -> Result<ForeignPtr<u8>, SyscallError> {
25        // delegate to the memory manager
26        let mut memory_manager = ctx.objs.process.memory_borrow_mut();
27        memory_manager.handle_brk(ctx.objs, addr)
28    }
29
30    // <https://github.com/torvalds/linux/tree/v6.3/mm/mremap.c#L895>
31    // ```
32    // SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
33    //                 unsigned long, new_len, unsigned long, flags,
34    //                 unsigned long, new_addr)
35    // ```
36    log_syscall!(
37        mremap,
38        /* rv */ *const std::ffi::c_void,
39        /* old_address */ *const std::ffi::c_void,
40        /* old_size */ std::ffi::c_ulong,
41        /* new_size */ std::ffi::c_ulong,
42        /* flags */ linux_api::mman::MRemapFlags,
43        /* new_address */ *const std::ffi::c_void,
44    );
45    pub fn mremap(
46        ctx: &mut SyscallContext,
47        old_addr: std::ffi::c_ulong,
48        old_size: std::ffi::c_ulong,
49        new_size: std::ffi::c_ulong,
50        flags: std::ffi::c_ulong,
51        new_addr: std::ffi::c_ulong,
52    ) -> Result<ForeignPtr<u8>, SyscallError> {
53        let old_addr: usize = old_addr.try_into().unwrap();
54        let old_size: usize = old_size.try_into().unwrap();
55        let new_size: usize = new_size.try_into().unwrap();
56        let new_addr: usize = new_addr.try_into().unwrap();
57
58        // check for truncated flag bits (use u32 instead of i32 to prevent sign extension when
59        // casting from signed to unsigned)
60        if flags as u32 as u64 != flags {
61            warn_once_then_trace!("Ignoring truncated flags from mremap: {flags}");
62        }
63
64        let flags = flags as i32;
65
66        let old_addr = ForeignPtr::<()>::from(old_addr).cast::<u8>();
67        let new_addr = ForeignPtr::<()>::from(new_addr).cast::<u8>();
68
69        // delegate to the memory manager
70        let mut memory_manager = ctx.objs.process.memory_borrow_mut();
71        memory_manager.handle_mremap(ctx.objs, old_addr, old_size, new_size, flags, new_addr)
72    }
73
74    // <https://github.com/torvalds/linux/tree/v6.3/mm/mmap.c#L2786>
75    // ```
76    // SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
77    // ```
78    log_syscall!(
79        munmap,
80        /* rv */ std::ffi::c_int,
81        /* addr */ *const std::ffi::c_void,
82        /* length */ usize,
83    );
84    pub fn munmap(
85        ctx: &mut SyscallContext,
86        addr: std::ffi::c_ulong,
87        len: usize,
88    ) -> Result<(), SyscallError> {
89        let addr: usize = addr.try_into().unwrap();
90        let addr = ForeignPtr::<()>::from(addr).cast::<u8>();
91
92        // delegate to the memory manager
93        let mut memory_manager = ctx.objs.process.memory_borrow_mut();
94        memory_manager.handle_munmap(ctx.objs, addr, len)
95    }
96
97    // <https://github.com/torvalds/linux/tree/v6.3/mm/mprotect.c#L849>
98    // ```
99    // SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot)
100    // ```
101    log_syscall!(
102        mprotect,
103        /* rv */ std::ffi::c_int,
104        /* addr */ *const std::ffi::c_void,
105        /* len */ usize,
106        /* prot */ linux_api::mman::ProtFlags,
107    );
108    pub fn mprotect(
109        ctx: &mut SyscallContext,
110        addr: std::ffi::c_ulong,
111        len: usize,
112        prot: std::ffi::c_ulong,
113    ) -> Result<(), SyscallError> {
114        let addr: usize = addr.try_into().unwrap();
115        let addr = ForeignPtr::<()>::from(addr).cast::<u8>();
116
117        let Some(prot) = ProtFlags::from_bits(prot) else {
118            let unrecognized = ProtFlags::from_bits_retain(prot).difference(ProtFlags::all());
119            log_once_per_value_at_level!(
120                unrecognized,
121                ProtFlags,
122                log::Level::Warn,
123                log::Level::Debug,
124                "Unrecognized prot flag: {:#x}",
125                unrecognized.bits()
126            );
127            return Err(Errno::EINVAL.into());
128        };
129
130        // delegate to the memory manager
131        let mut memory_manager = ctx.objs.process.memory_borrow_mut();
132        memory_manager.handle_mprotect(ctx.objs, addr, len, prot)
133    }
134
135    // <https://github.com/torvalds/linux/tree/v6.3/arch/x86/kernel/sys_x86_64.c#L86>
136    // ```
137    // SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
138    //                 unsigned long, prot, unsigned long, flags,
139    //                 unsigned long, fd, unsigned long, off)
140    // ```
141    log_syscall!(
142        mmap,
143        /* rv */ *const std::ffi::c_void,
144        /* addr */ *const std::ffi::c_void,
145        /* length */ usize,
146        /* prot */ linux_api::mman::ProtFlags,
147        /* flags */ linux_api::mman::MapFlags,
148        /* fd */ std::ffi::c_ulong,
149        /* offset */ std::ffi::c_ulong,
150    );
151    pub fn mmap(
152        ctx: &mut SyscallContext,
153        addr: std::ffi::c_ulong,
154        len: std::ffi::c_ulong,
155        prot: std::ffi::c_ulong,
156        flags: std::ffi::c_ulong,
157        fd: std::ffi::c_ulong,
158        offset: std::ffi::c_ulong,
159    ) -> Result<ForeignPtr<u8>, Errno> {
160        log::trace!("mmap called on fd {fd} for {len} bytes");
161
162        let addr: usize = addr.try_into().unwrap();
163        let addr = ForeignPtr::<()>::from(addr).cast::<u8>();
164
165        let len: usize = len.try_into().unwrap();
166
167        let offset = offset as i64;
168
169        let Some(prot) = ProtFlags::from_bits(prot) else {
170            let unrecognized = ProtFlags::from_bits_retain(prot).difference(ProtFlags::all());
171            log_once_per_value_at_level!(
172                unrecognized,
173                ProtFlags,
174                log::Level::Warn,
175                log::Level::Debug,
176                "Unrecognized prot flag: {:#x}",
177                unrecognized.bits()
178            );
179            return Err(Errno::EINVAL);
180        };
181        let Some(flags) = MapFlags::from_bits(flags) else {
182            let unrecognized = MapFlags::from_bits_retain(flags).difference(MapFlags::all());
183            log_once_per_value_at_level!(
184                unrecognized,
185                MapFlags,
186                log::Level::Warn,
187                log::Level::Debug,
188                "Unrecognized map flag: {:#x}",
189                unrecognized.bits()
190            );
191            return Err(Errno::EINVAL);
192        };
193
194        // at least one of these values is required according to man page
195        let required_flags =
196            MapFlags::MAP_PRIVATE | MapFlags::MAP_SHARED | MapFlags::MAP_SHARED_VALIDATE;
197
198        // need non-zero len, and at least one of the above options
199        if len == 0 || !required_flags.intersects(flags) {
200            log::debug!("Invalid len ({len}), prot ({prot:?}), or flags ({flags:?})");
201            return Err(Errno::EINVAL);
202        }
203
204        // we ignore the fd on anonymous mappings, otherwise it must refer to a regular file
205        // TODO: why does this fd <= 2 exist?
206        if fd <= 2 && !flags.contains(MapFlags::MAP_ANONYMOUS) {
207            log::debug!("Invalid fd {fd} and MAP_ANONYMOUS is not set in flags {flags:?}");
208            return Err(Errno::EBADF);
209        }
210
211        // we only need a file if it's not an anonymous mapping
212        let file = if flags.contains(MapFlags::MAP_ANONYMOUS) {
213            None
214        } else {
215            let file = {
216                // get the descriptor, or return early if it doesn't exist
217                let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
218                let desc = Self::get_descriptor(&desc_table, fd)?;
219
220                let CompatFile::Legacy(file) = desc.file() else {
221                    // this syscall uses a regular file, which is implemented in C
222                    return Err(Errno::EINVAL);
223                };
224
225                file.ptr()
226            };
227
228            assert!(!file.is_null());
229
230            if unsafe { c::legacyfile_getStatus(file) }.contains(FileState::CLOSED) {
231                // A file that is referenced in the descriptor table should never be a closed file.
232                // File handles (fds) are handles to open files, so if we have a file handle to a
233                // closed file, then there's an error somewhere in Shadow. Shadow's TCP sockets do
234                // close themselves even if there are still file handles (see
235                // `_tcp_endOfFileSignalled`), so we can't make this a panic.
236                log::warn!("File {file:p} (fd={fd}) is closed");
237                return Err(Errno::EBADF);
238            }
239
240            if unsafe { c::legacyfile_getType(file) } != c::_LegacyFileType_DT_FILE {
241                log::debug!("Descriptor exists for fd {fd}, but is not a regular file type");
242                return Err(Errno::EACCES);
243            }
244
245            // success; we know we have a file type descriptor
246            Some(file as *mut c::RegularFile)
247        };
248
249        // this fd exists in the plugin and not shadow; make sure to close this before returning (no
250        // RAII)
251        let plugin_fd = file.map(|file| Self::open_plugin_file(ctx.objs, fd, file));
252
253        // the file is None for an anonymous mapping, or a non-null Some otherwise
254        let Ok(plugin_fd) = plugin_fd.transpose() else {
255            log::warn!("mmap on fd {fd} for {len} bytes failed");
256            return Err(Errno::EACCES);
257        };
258
259        // delegate execution of the mmap itself to the memory manager
260        let mut memory_manager = ctx.objs.process.memory_borrow_mut();
261        let mmap_result = memory_manager.do_mmap(
262            ctx.objs,
263            addr,
264            len,
265            prot,
266            flags,
267            plugin_fd.unwrap_or(-1),
268            offset,
269        );
270
271        log::trace!(
272            "Plugin-native mmap syscall at plugin addr {addr:p} with plugin fd {fd} for \
273            {len} bytes returned {mmap_result:?}"
274        );
275
276        // close the file we asked them to open
277        if let Some(plugin_fd) = plugin_fd {
278            Self::close_plugin_file(ctx.objs, plugin_fd);
279        }
280
281        mmap_result
282    }
283
284    fn open_plugin_file(
285        ctx: &ThreadContext,
286        fd: std::ffi::c_ulong,
287        file: *mut c::RegularFile,
288    ) -> Result<i32, ()> {
289        assert!(!file.is_null());
290
291        log::trace!("Trying to open file {fd} in the plugin");
292
293        // Make sure we don't open special files like `/dev/urandom` in the plugin via mmap. We
294        // allow `/etc/localtime`, which should have been swapped with `/usr/share/zoneinfo/Etc/UTC`
295        // in `regularfile_openat`.
296        let file_type = unsafe { c::regularfile_getType(file) };
297        if file_type != c::_FileType_FILE_TYPE_REGULAR
298            && file_type != c::_FileType_FILE_TYPE_LOCALTIME
299        {
300            warn_once_then_debug!("Tried to mmap a non-regular non-localtime file");
301            return Err(());
302        }
303
304        let native_fd = unsafe { c::regularfile_getOSBackedFD(file) };
305
306        // the file is in the shadow process, and we want to open it in the plugin
307        let Some(path) = Self::create_persistent_mmap_path(native_fd) else {
308            log::trace!("RegularFile {fd} has a NULL path");
309            return Err(());
310        };
311
312        let path_bytes = path.as_os_str().as_bytes();
313
314        // TODO: do we really want to continue if we need to truncate the path and we already know
315        // the truncated path will be incorrect?
316
317        // we need enough mem for the string, but no more than PATH_MAX (with space for a NUL)
318        let path_len = std::cmp::min(path_bytes.len(), libc::PATH_MAX as usize - 1);
319        assert!(path_len > 0);
320
321        let path_bytes = &path_bytes[..path_len];
322
323        log::trace!("Opening path '{}' in plugin", path.display());
324
325        // get some memory in the plugin to write the path of the file to open (an extra 1 for NUL);
326        // must free this, but will panic if borrowing the memory manager
327        let plugin_buffer = AllocdMem::<u8>::new(ctx, path_len + 1);
328
329        {
330            let mut mem = ctx.process.memory_borrow_mut();
331
332            // write the path to the plugin
333            if let Err(e) = mem.copy_to_ptr(plugin_buffer.ptr().slice(..path_len), path_bytes) {
334                log::warn!("Unable to write string to allocated buffer: {e}");
335                std::mem::drop(mem);
336                plugin_buffer.free(ctx);
337                return Err(());
338            }
339
340            // write the NUL to the plugin
341            if let Err(e) = mem.copy_to_ptr(plugin_buffer.ptr().slice(path_len..), &[0]) {
342                log::warn!("Unable to write NUL to allocated buffer: {e}");
343                std::mem::drop(mem);
344                plugin_buffer.free(ctx);
345                return Err(());
346            }
347        }
348
349        // attempt to open the file in the plugin with the same flags as what the shadow RegularFile
350        // object has
351
352        // from man 2 open
353        let creation_flags = OFlag::empty()
354            | OFlag::O_CLOEXEC
355            | OFlag::O_CREAT
356            | OFlag::O_DIRECTORY
357            | OFlag::O_EXCL
358            | OFlag::O_NOCTTY
359            | OFlag::O_NOFOLLOW
360            | OFlag::O_TMPFILE
361            | OFlag::O_TRUNC;
362
363        // the flags linux is using
364        let native_flags = OFlag::from_bits_retain(unsafe {
365            libc::fcntl(c::regularfile_getOSBackedFD(file), libc::F_GETFL)
366        });
367
368        // get original flags that were used to open the file
369        let mut flags = OFlag::from_bits_retain(unsafe { c::regularfile_getFlagsAtOpen(file) });
370        // use only the file creation flags, except O_CLOEXEC
371        flags &= creation_flags.difference(OFlag::O_CLOEXEC);
372        // add any file access mode and file status flags that shadow doesn't implement
373        flags |= native_flags.difference(OFlag::from_bits_retain(unsafe { c::SHADOW_FLAG_MASK }));
374        // add any flags that shadow implements
375        flags |= OFlag::from_bits_retain(unsafe { c::regularfile_getShadowFlags(file) });
376        // be careful not to try re-creating or truncating it
377        flags -= OFlag::O_CREAT | OFlag::O_EXCL | OFlag::O_TMPFILE | OFlag::O_TRUNC;
378        // don't use O_NOFOLLOW since it will prevent the plugin from opening the
379        // /proc/<shadow-pid>/fd/<linux-fd> file, which is a symbolic link
380        flags -= OFlag::O_NOFOLLOW;
381
382        let mode = unsafe { c::regularfile_getModeAtOpen(file) };
383
384        // instruct the plugin to open the file at the path we sent
385        let (process_ctx, thread) = ctx.split_thread();
386        let open_result = thread.native_open(
387            &process_ctx,
388            plugin_buffer.ptr().ptr(),
389            flags.bits() as i32,
390            mode as i32,
391        );
392
393        plugin_buffer.free(ctx);
394
395        let open_result = match open_result {
396            Ok(x) => x,
397            Err(e) => {
398                log::trace!(
399                    "Failed to open path '{}' in plugin, error {e}",
400                    path.display()
401                );
402                return Err(());
403            }
404        };
405
406        log::trace!(
407            "Successfully opened path '{}' in plugin, got plugin fd {open_result}",
408            path.display(),
409        );
410
411        Ok(open_result)
412    }
413
414    /// Instruct the plugin to close the file at the given fd.
415    fn close_plugin_file(ctx: &ThreadContext, plugin_fd: i32) {
416        let (ctx, thread) = ctx.split_thread();
417        let result = thread.native_close(&ctx, plugin_fd);
418
419        if let Err(e) = result {
420            log::trace!("Failed to close file at fd {plugin_fd} in plugin, error {e}");
421        } else {
422            log::trace!("Successfully closed file at fd {plugin_fd} in plugin");
423        }
424    }
425
426    /// Get a path to a persistent file that can be mmapped in a child process, where any I/O
427    /// operations on the map will be linked to the original file. Returns a path, or `None` if we
428    /// are unable to create an accessible path.
429    fn create_persistent_mmap_path(native_fd: std::ffi::c_int) -> Option<PathBuf> {
430        assert!(native_fd >= 0);
431
432        // Return a path that is linked to the I/O operations of the file. Our current strategy is
433        // to have the plugin open and map the /proc/<shadow-pid>/fd/<linux-fd> file, which
434        // guarantees that the I/O on the Shadow file object and the new map will be linked to the
435        // linux file. TODO: using procfs in this was may or may not work if trying to mmap a
436        // device.
437        //
438        // NOTE: If we need to change this implementation, there are two tricky cases that need to
439        // be considered: files opened with O_TMPFILE (with a directory pathname), and files that
440        // were opened and then immediately unlinked (so only the anonymous fd remains). The procfs
441        // solution above handles both of these issues.
442
443        let pid_string = std::process::id().to_string();
444        let native_fd_string = native_fd.to_string();
445
446        // We do not use the original file path here, because that path could have been re-linked to
447        // a different file since this file was opened.
448        let path: PathBuf = ["/proc", &pid_string, "fd", &native_fd_string]
449            .iter()
450            .collect();
451
452        // make sure the path is accessible
453        if !path.exists() {
454            log::warn!(
455                "Unable to produce a persistent mmap path for file (linux file {native_fd})"
456            );
457            return None;
458        }
459
460        log::trace!(
461            "RegularFile (linux file {native_fd}) is persistent in procfs at {}",
462            path.display()
463        );
464
465        Some(path)
466    }
467}