1use std::os::unix::ffi::OsStrExt;
2use std::path::PathBuf;
34use linux_api::errno::Errno;
5use linux_api::fcntl::OFlag;
6use linux_api::mman::{MapFlags, ProtFlags};
7use shadow_shim_helper_rs::syscall_types::ForeignPtr;
89use crate::cshadow as c;
10use crate::host::descriptor::{CompatFile, FileState};
11use crate::host::memory_manager::AllocdMem;
12use crate::host::syscall::handler::{SyscallContext, SyscallHandler, ThreadContext};
13use crate::host::syscall::types::SyscallError;
1415impl SyscallHandler {
16log_syscall!(
17 brk,
18/* rv */ std::ffi::c_int,
19/* addr */ *const std::ffi::c_void,
20 );
21pub fn brk(
22 ctx: &mut SyscallContext,
23 addr: ForeignPtr<u8>,
24 ) -> Result<ForeignPtr<u8>, SyscallError> {
25// delegate to the memory manager
26let mut memory_manager = ctx.objs.process.memory_borrow_mut();
27 memory_manager.handle_brk(ctx.objs, addr)
28 }
2930// <https://github.com/torvalds/linux/tree/v6.3/mm/mremap.c#L895>
31 // ```
32 // SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
33 // unsigned long, new_len, unsigned long, flags,
34 // unsigned long, new_addr)
35 // ```
36log_syscall!(
37 mremap,
38/* rv */ *const std::ffi::c_void,
39/* old_address */ *const std::ffi::c_void,
40/* old_size */ std::ffi::c_ulong,
41/* new_size */ std::ffi::c_ulong,
42/* flags */ linux_api::mman::MRemapFlags,
43/* new_address */ *const std::ffi::c_void,
44 );
45pub fn mremap(
46 ctx: &mut SyscallContext,
47 old_addr: std::ffi::c_ulong,
48 old_size: std::ffi::c_ulong,
49 new_size: std::ffi::c_ulong,
50 flags: std::ffi::c_ulong,
51 new_addr: std::ffi::c_ulong,
52 ) -> Result<ForeignPtr<u8>, SyscallError> {
53let old_addr: usize = old_addr.try_into().unwrap();
54let old_size: usize = old_size.try_into().unwrap();
55let new_size: usize = new_size.try_into().unwrap();
56let new_addr: usize = new_addr.try_into().unwrap();
5758// check for truncated flag bits (use u32 instead of i32 to prevent sign extension when
59 // casting from signed to unsigned)
60if flags as u32 as u64 != flags {
61warn_once_then_trace!("Ignoring truncated flags from mremap: {flags}");
62 }
6364let flags = flags as i32;
6566let old_addr = ForeignPtr::<()>::from(old_addr).cast::<u8>();
67let new_addr = ForeignPtr::<()>::from(new_addr).cast::<u8>();
6869// delegate to the memory manager
70let mut memory_manager = ctx.objs.process.memory_borrow_mut();
71 memory_manager.handle_mremap(ctx.objs, old_addr, old_size, new_size, flags, new_addr)
72 }
7374// <https://github.com/torvalds/linux/tree/v6.3/mm/mmap.c#L2786>
75 // ```
76 // SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
77 // ```
78log_syscall!(
79 munmap,
80/* rv */ std::ffi::c_int,
81/* addr */ *const std::ffi::c_void,
82/* length */ usize,
83 );
84pub fn munmap(
85 ctx: &mut SyscallContext,
86 addr: std::ffi::c_ulong,
87 len: usize,
88 ) -> Result<(), SyscallError> {
89let addr: usize = addr.try_into().unwrap();
90let addr = ForeignPtr::<()>::from(addr).cast::<u8>();
9192// delegate to the memory manager
93let mut memory_manager = ctx.objs.process.memory_borrow_mut();
94 memory_manager.handle_munmap(ctx.objs, addr, len)
95 }
9697// <https://github.com/torvalds/linux/tree/v6.3/mm/mprotect.c#L849>
98 // ```
99 // SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, unsigned long, prot)
100 // ```
101log_syscall!(
102 mprotect,
103/* rv */ std::ffi::c_int,
104/* addr */ *const std::ffi::c_void,
105/* len */ usize,
106/* prot */ linux_api::mman::ProtFlags,
107 );
108pub fn mprotect(
109 ctx: &mut SyscallContext,
110 addr: std::ffi::c_ulong,
111 len: usize,
112 prot: std::ffi::c_ulong,
113 ) -> Result<(), SyscallError> {
114let addr: usize = addr.try_into().unwrap();
115let addr = ForeignPtr::<()>::from(addr).cast::<u8>();
116117let Some(prot) = ProtFlags::from_bits(prot) else {
118let unrecognized = ProtFlags::from_bits_retain(prot).difference(ProtFlags::all());
119log_once_per_value_at_level!(
120 unrecognized,
121 ProtFlags,
122 log::Level::Warn,
123 log::Level::Debug,
124"Unrecognized prot flag: {:#x}",
125 unrecognized.bits()
126 );
127return Err(Errno::EINVAL.into());
128 };
129130// delegate to the memory manager
131let mut memory_manager = ctx.objs.process.memory_borrow_mut();
132 memory_manager.handle_mprotect(ctx.objs, addr, len, prot)
133 }
134135// <https://github.com/torvalds/linux/tree/v6.3/arch/x86/kernel/sys_x86_64.c#L86>
136 // ```
137 // SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
138 // unsigned long, prot, unsigned long, flags,
139 // unsigned long, fd, unsigned long, off)
140 // ```
141log_syscall!(
142 mmap,
143/* rv */ *const std::ffi::c_void,
144/* addr */ *const std::ffi::c_void,
145/* length */ usize,
146/* prot */ linux_api::mman::ProtFlags,
147/* flags */ linux_api::mman::MapFlags,
148/* fd */ std::ffi::c_ulong,
149/* offset */ std::ffi::c_ulong,
150 );
151pub fn mmap(
152 ctx: &mut SyscallContext,
153 addr: std::ffi::c_ulong,
154 len: std::ffi::c_ulong,
155 prot: std::ffi::c_ulong,
156 flags: std::ffi::c_ulong,
157 fd: std::ffi::c_ulong,
158 offset: std::ffi::c_ulong,
159 ) -> Result<ForeignPtr<u8>, Errno> {
160log::trace!("mmap called on fd {fd} for {len} bytes");
161162let addr: usize = addr.try_into().unwrap();
163let addr = ForeignPtr::<()>::from(addr).cast::<u8>();
164165let len: usize = len.try_into().unwrap();
166167let offset = offset as i64;
168169let Some(prot) = ProtFlags::from_bits(prot) else {
170let unrecognized = ProtFlags::from_bits_retain(prot).difference(ProtFlags::all());
171log_once_per_value_at_level!(
172 unrecognized,
173 ProtFlags,
174 log::Level::Warn,
175 log::Level::Debug,
176"Unrecognized prot flag: {:#x}",
177 unrecognized.bits()
178 );
179return Err(Errno::EINVAL);
180 };
181let Some(flags) = MapFlags::from_bits(flags) else {
182let unrecognized = MapFlags::from_bits_retain(flags).difference(MapFlags::all());
183log_once_per_value_at_level!(
184 unrecognized,
185 MapFlags,
186 log::Level::Warn,
187 log::Level::Debug,
188"Unrecognized map flag: {:#x}",
189 unrecognized.bits()
190 );
191return Err(Errno::EINVAL);
192 };
193194// at least one of these values is required according to man page
195let required_flags =
196 MapFlags::MAP_PRIVATE | MapFlags::MAP_SHARED | MapFlags::MAP_SHARED_VALIDATE;
197198// need non-zero len, and at least one of the above options
199if len == 0 || !required_flags.intersects(flags) {
200log::debug!("Invalid len ({len}), prot ({prot:?}), or flags ({flags:?})");
201return Err(Errno::EINVAL);
202 }
203204// we ignore the fd on anonymous mappings, otherwise it must refer to a regular file
205 // TODO: why does this fd <= 2 exist?
206if fd <= 2 && !flags.contains(MapFlags::MAP_ANONYMOUS) {
207log::debug!("Invalid fd {fd} and MAP_ANONYMOUS is not set in flags {flags:?}");
208return Err(Errno::EBADF);
209 }
210211// we only need a file if it's not an anonymous mapping
212let file = if flags.contains(MapFlags::MAP_ANONYMOUS) {
213None
214} else {
215let file = {
216// get the descriptor, or return early if it doesn't exist
217let desc_table = ctx.objs.thread.descriptor_table_borrow(ctx.objs.host);
218let desc = Self::get_descriptor(&desc_table, fd)?;
219220let CompatFile::Legacy(file) = desc.file() else {
221// this syscall uses a regular file, which is implemented in C
222return Err(Errno::EINVAL);
223 };
224225 file.ptr()
226 };
227228assert!(!file.is_null());
229230if unsafe { c::legacyfile_getStatus(file) }.contains(FileState::CLOSED) {
231// A file that is referenced in the descriptor table should never be a closed file.
232 // File handles (fds) are handles to open files, so if we have a file handle to a
233 // closed file, then there's an error somewhere in Shadow. Shadow's TCP sockets do
234 // close themselves even if there are still file handles (see
235 // `_tcp_endOfFileSignalled`), so we can't make this a panic.
236log::warn!("File {file:p} (fd={fd}) is closed");
237return Err(Errno::EBADF);
238 }
239240if unsafe { c::legacyfile_getType(file) } != c::_LegacyFileType_DT_FILE {
241log::debug!("Descriptor exists for fd {fd}, but is not a regular file type");
242return Err(Errno::EACCES);
243 }
244245// success; we know we have a file type descriptor
246Some(file as *mut c::RegularFile)
247 };
248249// this fd exists in the plugin and not shadow; make sure to close this before returning (no
250 // RAII)
251let plugin_fd = file.map(|file| Self::open_plugin_file(ctx.objs, fd, file));
252253// the file is None for an anonymous mapping, or a non-null Some otherwise
254let Ok(plugin_fd) = plugin_fd.transpose() else {
255log::warn!("mmap on fd {fd} for {len} bytes failed");
256return Err(Errno::EACCES);
257 };
258259// delegate execution of the mmap itself to the memory manager
260let mut memory_manager = ctx.objs.process.memory_borrow_mut();
261let mmap_result = memory_manager.do_mmap(
262 ctx.objs,
263 addr,
264 len,
265 prot,
266 flags,
267 plugin_fd.unwrap_or(-1),
268 offset,
269 );
270271log::trace!(
272"Plugin-native mmap syscall at plugin addr {addr:p} with plugin fd {fd} for \
273 {len} bytes returned {mmap_result:?}"
274);
275276// close the file we asked them to open
277if let Some(plugin_fd) = plugin_fd {
278Self::close_plugin_file(ctx.objs, plugin_fd);
279 }
280281 mmap_result
282 }
283284fn open_plugin_file(
285 ctx: &ThreadContext,
286 fd: std::ffi::c_ulong,
287 file: *mut c::RegularFile,
288 ) -> Result<i32, ()> {
289assert!(!file.is_null());
290291log::trace!("Trying to open file {fd} in the plugin");
292293// Make sure we don't open special files like `/dev/urandom` in the plugin via mmap. We
294 // allow `/etc/localtime`, which should have been swapped with `/usr/share/zoneinfo/Etc/UTC`
295 // in `regularfile_openat`.
296let file_type = unsafe { c::regularfile_getType(file) };
297if file_type != c::_FileType_FILE_TYPE_REGULAR
298 && file_type != c::_FileType_FILE_TYPE_LOCALTIME
299 {
300warn_once_then_debug!("Tried to mmap a non-regular non-localtime file");
301return Err(());
302 }
303304let native_fd = unsafe { c::regularfile_getOSBackedFD(file) };
305306// the file is in the shadow process, and we want to open it in the plugin
307let Some(path) = Self::create_persistent_mmap_path(native_fd) else {
308log::trace!("RegularFile {fd} has a NULL path");
309return Err(());
310 };
311312let path_bytes = path.as_os_str().as_bytes();
313314// TODO: do we really want to continue if we need to truncate the path and we already know
315 // the truncated path will be incorrect?
316317 // we need enough mem for the string, but no more than PATH_MAX (with space for a NUL)
318let path_len = std::cmp::min(path_bytes.len(), libc::PATH_MAX as usize - 1);
319assert!(path_len > 0);
320321let path_bytes = &path_bytes[..path_len];
322323log::trace!("Opening path '{}' in plugin", path.display());
324325// get some memory in the plugin to write the path of the file to open (an extra 1 for NUL);
326 // must free this, but will panic if borrowing the memory manager
327let plugin_buffer = AllocdMem::<u8>::new(ctx, path_len + 1);
328329 {
330let mut mem = ctx.process.memory_borrow_mut();
331332// write the path to the plugin
333if let Err(e) = mem.copy_to_ptr(plugin_buffer.ptr().slice(..path_len), path_bytes) {
334log::warn!("Unable to write string to allocated buffer: {e}");
335 std::mem::drop(mem);
336 plugin_buffer.free(ctx);
337return Err(());
338 }
339340// write the NUL to the plugin
341if let Err(e) = mem.copy_to_ptr(plugin_buffer.ptr().slice(path_len..), &[0]) {
342log::warn!("Unable to write NUL to allocated buffer: {e}");
343 std::mem::drop(mem);
344 plugin_buffer.free(ctx);
345return Err(());
346 }
347 }
348349// attempt to open the file in the plugin with the same flags as what the shadow RegularFile
350 // object has
351352 // from man 2 open
353let creation_flags = OFlag::empty()
354 | OFlag::O_CLOEXEC
355 | OFlag::O_CREAT
356 | OFlag::O_DIRECTORY
357 | OFlag::O_EXCL
358 | OFlag::O_NOCTTY
359 | OFlag::O_NOFOLLOW
360 | OFlag::O_TMPFILE
361 | OFlag::O_TRUNC;
362363// the flags linux is using
364let native_flags = OFlag::from_bits_retain(unsafe {
365 libc::fcntl(c::regularfile_getOSBackedFD(file), libc::F_GETFL)
366 });
367368// get original flags that were used to open the file
369let mut flags = OFlag::from_bits_retain(unsafe { c::regularfile_getFlagsAtOpen(file) });
370// use only the file creation flags, except O_CLOEXEC
371flags &= creation_flags.difference(OFlag::O_CLOEXEC);
372// add any file access mode and file status flags that shadow doesn't implement
373flags |= native_flags.difference(OFlag::from_bits_retain(unsafe { c::SHADOW_FLAG_MASK }));
374// add any flags that shadow implements
375flags |= OFlag::from_bits_retain(unsafe { c::regularfile_getShadowFlags(file) });
376// be careful not to try re-creating or truncating it
377flags -= OFlag::O_CREAT | OFlag::O_EXCL | OFlag::O_TMPFILE | OFlag::O_TRUNC;
378// don't use O_NOFOLLOW since it will prevent the plugin from opening the
379 // /proc/<shadow-pid>/fd/<linux-fd> file, which is a symbolic link
380flags -= OFlag::O_NOFOLLOW;
381382let mode = unsafe { c::regularfile_getModeAtOpen(file) };
383384// instruct the plugin to open the file at the path we sent
385let (process_ctx, thread) = ctx.split_thread();
386let open_result = thread.native_open(
387&process_ctx,
388 plugin_buffer.ptr().ptr(),
389 flags.bits() as i32,
390 mode as i32,
391 );
392393 plugin_buffer.free(ctx);
394395let open_result = match open_result {
396Ok(x) => x,
397Err(e) => {
398log::trace!(
399"Failed to open path '{}' in plugin, error {e}",
400 path.display()
401 );
402return Err(());
403 }
404 };
405406log::trace!(
407"Successfully opened path '{}' in plugin, got plugin fd {open_result}",
408 path.display(),
409 );
410411Ok(open_result)
412 }
413414/// Instruct the plugin to close the file at the given fd.
415fn close_plugin_file(ctx: &ThreadContext, plugin_fd: i32) {
416let (ctx, thread) = ctx.split_thread();
417let result = thread.native_close(&ctx, plugin_fd);
418419if let Err(e) = result {
420log::trace!("Failed to close file at fd {plugin_fd} in plugin, error {e}");
421 } else {
422log::trace!("Successfully closed file at fd {plugin_fd} in plugin");
423 }
424 }
425426/// Get a path to a persistent file that can be mmapped in a child process, where any I/O
427 /// operations on the map will be linked to the original file. Returns a path, or `None` if we
428 /// are unable to create an accessible path.
429fn create_persistent_mmap_path(native_fd: std::ffi::c_int) -> Option<PathBuf> {
430assert!(native_fd >= 0);
431432// Return a path that is linked to the I/O operations of the file. Our current strategy is
433 // to have the plugin open and map the /proc/<shadow-pid>/fd/<linux-fd> file, which
434 // guarantees that the I/O on the Shadow file object and the new map will be linked to the
435 // linux file. TODO: using procfs in this was may or may not work if trying to mmap a
436 // device.
437 //
438 // NOTE: If we need to change this implementation, there are two tricky cases that need to
439 // be considered: files opened with O_TMPFILE (with a directory pathname), and files that
440 // were opened and then immediately unlinked (so only the anonymous fd remains). The procfs
441 // solution above handles both of these issues.
442443let pid_string = std::process::id().to_string();
444let native_fd_string = native_fd.to_string();
445446// We do not use the original file path here, because that path could have been re-linked to
447 // a different file since this file was opened.
448let path: PathBuf = ["/proc", &pid_string, "fd", &native_fd_string]
449 .iter()
450 .collect();
451452// make sure the path is accessible
453if !path.exists() {
454log::warn!(
455"Unable to produce a persistent mmap path for file (linux file {native_fd})"
456);
457return None;
458 }
459460log::trace!(
461"RegularFile (linux file {native_fd}) is persistent in procfs at {}",
462 path.display()
463 );
464465Some(path)
466 }
467}