rustix/thread/
prctl.rs

1//! Linux `prctl` wrappers.
2//!
3//! Rustix wraps variadic/dynamic-dispatch functions like `prctl` in type-safe
4//! wrappers.
5//!
6//! # Safety
7//!
8//! The inner `prctl` calls are dynamically typed and must be called correctly.
9#![allow(unsafe_code)]
10
11use core::mem::MaybeUninit;
12use core::num::NonZeroU64;
13use core::ptr;
14use core::ptr::NonNull;
15use core::sync::atomic::AtomicU8;
16
17use bitflags::bitflags;
18
19use crate::backend::c::{c_int, c_uint, c_void};
20use crate::backend::prctl::syscalls;
21use crate::ffi::CStr;
22#[cfg(feature = "alloc")]
23use crate::ffi::CString;
24use crate::io;
25use crate::pid::Pid;
26use crate::prctl::{
27    prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional, PointerAuthenticationKeys,
28};
29use crate::utils::as_ptr;
30
31//
32// PR_GET_KEEPCAPS/PR_SET_KEEPCAPS
33//
34
35const PR_GET_KEEPCAPS: c_int = 7;
36
37/// Get the current state of the calling thread's `keep capabilities` flag.
38///
39/// # References
40///  - [`prctl(PR_GET_KEEPCAPS,…)`]
41///
42/// [`prctl(PR_GET_KEEPCAPS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
43#[inline]
44pub fn get_keep_capabilities() -> io::Result<bool> {
45    unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0)
46}
47
48const PR_SET_KEEPCAPS: c_int = 8;
49
50/// Set the state of the calling thread's `keep capabilities` flag.
51///
52/// # References
53///  - [`prctl(PR_SET_KEEPCAPS,…)`]
54///
55/// [`prctl(PR_SET_KEEPCAPS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
56#[inline]
57pub fn set_keep_capabilities(enable: bool) -> io::Result<()> {
58    unsafe { prctl_2args(PR_SET_KEEPCAPS, usize::from(enable) as *mut _) }.map(|_r| ())
59}
60
61//
62// PR_GET_NAME/PR_SET_NAME
63//
64
65#[cfg(feature = "alloc")]
66const PR_GET_NAME: c_int = 16;
67
68/// Get the name of the calling thread.
69///
70/// # References
71///  - [`prctl(PR_GET_NAME,…)`]
72///
73/// [`prctl(PR_GET_NAME,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
74#[inline]
75#[cfg(feature = "alloc")]
76pub fn name() -> io::Result<CString> {
77    let mut buffer = [0_u8; 16];
78    unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? };
79
80    let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0);
81    CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ)
82}
83
84const PR_SET_NAME: c_int = 15;
85
86/// Set the name of the calling thread.
87///
88/// Unlike `pthread_setname_np`, this function silently truncates the name to
89/// 16 bytes, as the Linux syscall does.
90///
91/// # References
92///  - [`prctl(PR_SET_NAME,…)`]
93///
94/// [`prctl(PR_SET_NAME,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
95#[inline]
96pub fn set_name(name: &CStr) -> io::Result<()> {
97    unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ())
98}
99
100//
101// PR_GET_SECCOMP/PR_SET_SECCOMP
102//
103
104//const PR_GET_SECCOMP: c_int = 21;
105
106const SECCOMP_MODE_DISABLED: i32 = 0;
107const SECCOMP_MODE_STRICT: i32 = 1;
108const SECCOMP_MODE_FILTER: i32 = 2;
109
110/// `SECCOMP_MODE_*`.
111#[derive(Copy, Clone, Debug, Eq, PartialEq)]
112#[repr(i32)]
113pub enum SecureComputingMode {
114    /// Secure computing is not in use.
115    Disabled = SECCOMP_MODE_DISABLED,
116    /// Use hard-coded filter.
117    Strict = SECCOMP_MODE_STRICT,
118    /// Use user-supplied filter.
119    Filter = SECCOMP_MODE_FILTER,
120}
121
122impl TryFrom<i32> for SecureComputingMode {
123    type Error = io::Errno;
124
125    fn try_from(value: i32) -> Result<Self, Self::Error> {
126        match value {
127            SECCOMP_MODE_DISABLED => Ok(Self::Disabled),
128            SECCOMP_MODE_STRICT => Ok(Self::Strict),
129            SECCOMP_MODE_FILTER => Ok(Self::Filter),
130            _ => Err(io::Errno::RANGE),
131        }
132    }
133}
134
135/*
136/// Get the secure computing mode of the calling thread.
137///
138/// If the caller is not in secure computing mode, this returns
139/// [`SecureComputingMode::Disabled`]. If the caller is in strict secure
140/// computing mode, then this call will cause a [`Signal::Kill`] signal to be
141/// sent to the process. If the caller is in filter mode, and this system call
142/// is allowed by the seccomp filters, it returns
143/// [`SecureComputingMode::Filter`]; otherwise, the process is killed with a
144/// [`Signal::Kill`] signal.
145///
146/// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file
147/// provides a method of obtaining the same information, without the risk that
148/// the process is killed; see [the `proc` manual page].
149///
150/// # References
151///  - [`prctl(PR_GET_SECCOMP,…)`]
152///
153/// [`prctl(PR_GET_SECCOMP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
154/// [the `proc` manual page]: https://man7.org/linux/man-pages/man5/proc.5.html
155#[inline]
156pub fn secure_computing_mode() -> io::Result<SecureComputingMode> {
157    unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into)
158}
159*/
160
161const PR_SET_SECCOMP: c_int = 22;
162
163/// Set the secure computing mode for the calling thread, to limit the
164/// available system calls.
165///
166/// # References
167///  - [`prctl(PR_SET_SECCOMP,…)`]
168///
169/// [`prctl(PR_SET_SECCOMP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
170#[inline]
171pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> {
172    unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ())
173}
174
175//
176// PR_CAPBSET_READ/PR_CAPBSET_DROP
177//
178
179const PR_CAPBSET_READ: c_int = 23;
180
181/// Linux per-thread capability.
182#[derive(Copy, Clone, Debug, Eq, PartialEq)]
183#[repr(u32)]
184pub enum Capability {
185    /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this
186    /// overrides the restriction of changing file ownership and group
187    /// ownership.
188    ChangeOwnership = linux_raw_sys::general::CAP_CHOWN,
189    /// Override all DAC access, including ACL execute access if `_POSIX_ACL`
190    /// is defined. Excluding DAC access covered by
191    /// [`Capability::LinuxImmutable`].
192    DACOverride = linux_raw_sys::general::CAP_DAC_OVERRIDE,
193    /// Overrides all DAC restrictions regarding read and search on files and
194    /// directories, including ACL restrictions if `_POSIX_ACL` is defined.
195    /// Excluding DAC access covered by [`Capability::LinuxImmutable`].
196    DACReadSearch = linux_raw_sys::general::CAP_DAC_READ_SEARCH,
197    /// Overrides all restrictions about allowed operations on files, where
198    /// file owner ID must be equal to the user ID, except where
199    /// [`Capability::FileSetID`] is applicable. It doesn't override MAC
200    /// and DAC restrictions.
201    FileOwner = linux_raw_sys::general::CAP_FOWNER,
202    /// Overrides the following restrictions that the effective user ID shall
203    /// match the file owner ID when setting the `S_ISUID` and `S_ISGID`
204    /// bits on that file; that the effective group ID (or one of the
205    /// supplementary group IDs) shall match the file owner ID when setting the
206    /// `S_ISGID` bit on that file; that the `S_ISUID` and `S_ISGID` bits are
207    /// cleared on successful return from `chown` (not implemented).
208    FileSetID = linux_raw_sys::general::CAP_FSETID,
209    /// Overrides the restriction that the real or effective user ID of a
210    /// process sending a signal must match the real or effective user ID of
211    /// the process receiving the signal.
212    Kill = linux_raw_sys::general::CAP_KILL,
213    /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on
214    /// socket credentials passing.
215    SetGroupID = linux_raw_sys::general::CAP_SETGID,
216    /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on
217    /// socket credentials passing.
218    SetUserID = linux_raw_sys::general::CAP_SETUID,
219    /// Without VFS support for capabilities:
220    ///  - Transfer any capability in your permitted set to any pid.
221    ///  - remove any capability in your permitted set from any pid. With VFS
222    ///    support for capabilities (neither of above, but)
223    ///  - Add any capability from current's capability bounding set to the
224    ///    current process' inheritable set.
225    ///  - Allow taking bits out of capability bounding set.
226    ///  - Allow modification of the securebits for a process.
227    SetPermittedCapabilities = linux_raw_sys::general::CAP_SETPCAP,
228    /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes.
229    LinuxImmutable = linux_raw_sys::general::CAP_LINUX_IMMUTABLE,
230    /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM
231    /// VCIs below 32.
232    NetBindService = linux_raw_sys::general::CAP_NET_BIND_SERVICE,
233    /// Allow broadcasting, listen to multicast.
234    NetBroadcast = linux_raw_sys::general::CAP_NET_BROADCAST,
235    /// Allow interface configuration. Allow administration of IP firewall,
236    /// masquerading and accounting. Allow setting debug option on sockets.
237    /// Allow modification of routing tables. Allow setting arbitrary
238    /// process / process group ownership on sockets. Allow binding to any
239    /// address for transparent proxying (also via [`Capability::NetRaw`]).
240    /// Allow setting TOS (type of service). Allow setting promiscuous
241    /// mode. Allow clearing driver statistics. Allow multicasting. Allow
242    /// read/write of device-specific registers. Allow activation of ATM
243    /// control sockets.
244    NetAdmin = linux_raw_sys::general::CAP_NET_ADMIN,
245    /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow
246    /// binding to any address for transparent proxying (also via
247    /// [`Capability::NetAdmin`]).
248    NetRaw = linux_raw_sys::general::CAP_NET_RAW,
249    /// Allow locking of shared memory segments. Allow mlock and mlockall
250    /// (which doesn't really have anything to do with IPC).
251    IPCLock = linux_raw_sys::general::CAP_IPC_LOCK,
252    /// Override IPC ownership checks.
253    IPCOwner = linux_raw_sys::general::CAP_IPC_OWNER,
254    /// Insert and remove kernel modules - modify kernel without limit.
255    SystemModule = linux_raw_sys::general::CAP_SYS_MODULE,
256    /// Allow ioperm/iopl access. Allow sending USB messages to any device via
257    /// `/dev/bus/usb`.
258    SystemRawIO = linux_raw_sys::general::CAP_SYS_RAWIO,
259    /// Allow use of `chroot`.
260    SystemChangeRoot = linux_raw_sys::general::CAP_SYS_CHROOT,
261    /// Allow `ptrace` of any process.
262    SystemProcessTrace = linux_raw_sys::general::CAP_SYS_PTRACE,
263    /// Allow configuration of process accounting.
264    SystemProcessAccounting = linux_raw_sys::general::CAP_SYS_PACCT,
265    /// Allow configuration of the secure attention key. Allow administration
266    /// of the random device. Allow examination and configuration of disk
267    /// quotas. Allow setting the domainname. Allow setting the hostname.
268    /// Allow `mount` and `umount`, setting up new smb connection.
269    /// Allow some autofs root ioctls. Allow nfsservctl. Allow
270    /// `VM86_REQUEST_IRQ`. Allow to read/write pci config on alpha. Allow
271    /// `irix_prctl` on mips (setstacksize). Allow flushing all cache on
272    /// m68k (`sys_cacheflush`). Allow removing semaphores. Used instead of
273    /// [`Capability::ChangeOwnership`] to "chown" IPC message queues,
274    /// semaphores and shared memory. Allow locking/unlocking of shared
275    /// memory segment. Allow turning swap on/off. Allow forged pids on
276    /// socket credentials passing. Allow setting readahead and
277    /// flushing buffers on block devices. Allow setting geometry in floppy
278    /// driver. Allow turning DMA on/off in `xd` driver. Allow
279    /// administration of md devices (mostly the above, but some
280    /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram
281    /// device. Allow administration of `apm_bios`, serial and bttv (TV)
282    /// device. Allow manufacturer commands in isdn CAPI support driver.
283    /// Allow reading non-standardized portions of pci configuration space.
284    /// Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports.
285    /// Allow sending raw qic-117 commands. Allow enabling/disabling tagged
286    /// queuing on SCSI controllers and sending arbitrary SCSI commands.
287    /// Allow setting encryption key on loopback filesystem. Allow setting
288    /// zone reclaim policy. Allow everything under
289    /// [`Capability::BerkeleyPacketFilters`] and
290    /// [`Capability::PerformanceMonitoring`] for backward compatibility.
291    SystemAdmin = linux_raw_sys::general::CAP_SYS_ADMIN,
292    /// Allow use of `reboot`.
293    SystemBoot = linux_raw_sys::general::CAP_SYS_BOOT,
294    /// Allow raising priority and setting priority on other (different UID)
295    /// processes. Allow use of FIFO and round-robin (realtime) scheduling
296    /// on own processes and setting the scheduling algorithm used by
297    /// another process. Allow setting cpu affinity on other processes.
298    /// Allow setting realtime ioprio class. Allow setting ioprio class on
299    /// other processes.
300    SystemNice = linux_raw_sys::general::CAP_SYS_NICE,
301    /// Override resource limits. Set resource limits. Override quota limits.
302    /// Override reserved space on ext2 filesystem. Modify data journaling
303    /// mode on ext3 filesystem (uses journaling resources). NOTE: ext2
304    /// honors fsuid when checking for resource overrides, so you can
305    /// override using fsuid too. Override size restrictions on IPC message
306    /// queues. Allow more than 64hz interrupts from the real-time clock.
307    /// Override max number of consoles on console allocation. Override max
308    /// number of keymaps. Control memory reclaim behavior.
309    SystemResource = linux_raw_sys::general::CAP_SYS_RESOURCE,
310    /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow
311    /// setting the real-time clock.
312    SystemTime = linux_raw_sys::general::CAP_SYS_TIME,
313    /// Allow configuration of tty devices. Allow `vhangup` of tty.
314    SystemTTYConfig = linux_raw_sys::general::CAP_SYS_TTY_CONFIG,
315    /// Allow the privileged aspects of `mknod`.
316    MakeNode = linux_raw_sys::general::CAP_MKNOD,
317    /// Allow taking of leases on files.
318    Lease = linux_raw_sys::general::CAP_LEASE,
319    /// Allow writing the audit log via unicast netlink socket.
320    AuditWrite = linux_raw_sys::general::CAP_AUDIT_WRITE,
321    /// Allow configuration of audit via unicast netlink socket.
322    AuditControl = linux_raw_sys::general::CAP_AUDIT_CONTROL,
323    /// Set or remove capabilities on files. Map `uid=0` into a child user
324    /// namespace.
325    SetFileCapabilities = linux_raw_sys::general::CAP_SETFCAP,
326    /// Override MAC access. The base kernel enforces no MAC policy. An LSM may
327    /// enforce a MAC policy, and if it does and it chooses to implement
328    /// capability based overrides of that policy, this is the capability
329    /// it should use to do so.
330    MACOverride = linux_raw_sys::general::CAP_MAC_OVERRIDE,
331    /// Allow MAC configuration or state changes. The base kernel requires no
332    /// MAC configuration. An LSM may enforce a MAC policy, and if it does
333    /// and it chooses to implement capability based
334    /// checks on modifications to that policy or the data required to maintain
335    /// it, this is the capability it should use to do so.
336    MACAdmin = linux_raw_sys::general::CAP_MAC_ADMIN,
337    /// Allow configuring the kernel's `syslog` (`printk` behaviour).
338    SystemLog = linux_raw_sys::general::CAP_SYSLOG,
339    /// Allow triggering something that will wake the system.
340    WakeAlarm = linux_raw_sys::general::CAP_WAKE_ALARM,
341    /// Allow preventing system suspends.
342    BlockSuspend = linux_raw_sys::general::CAP_BLOCK_SUSPEND,
343    /// Allow reading the audit log via multicast netlink socket.
344    AuditRead = linux_raw_sys::general::CAP_AUDIT_READ,
345    /// Allow system performance and observability privileged operations using
346    /// `perf_events`, `i915_perf` and other kernel subsystems.
347    PerformanceMonitoring = linux_raw_sys::general::CAP_PERFMON,
348    /// This capability allows the following BPF operations:
349    ///  - Creating all types of BPF maps
350    ///  - Advanced verifier features
351    ///     - Indirect variable access
352    ///     - Bounded loops
353    ///     - BPF to BPF function calls
354    ///     - Scalar precision tracking
355    ///     - Larger complexity limits
356    ///     - Dead code elimination
357    ///     - And potentially other features
358    ///  - Loading BPF Type Format (BTF) data
359    ///  - Retrieve `xlated` and JITed code of BPF programs
360    ///  - Use `bpf_spin_lock` helper
361    ///
362    /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks
363    /// further:
364    ///  - BPF progs can use of pointer-to-integer conversions
365    ///  - speculation attack hardening measures are bypassed
366    ///  - `bpf_probe_read` to read arbitrary kernel memory is allowed
367    ///  - `bpf_trace_printk` to print kernel memory is allowed
368    ///
369    /// [`Capability::SystemAdmin`] is required to use `bpf_probe_write_user`.
370    ///
371    /// [`Capability::SystemAdmin`] is required to iterate system-wide loaded
372    /// programs, maps, links, and BTFs, and convert their IDs to file
373    /// descriptors.
374    ///
375    /// [`Capability::PerformanceMonitoring`] and
376    /// [`Capability::BerkeleyPacketFilters`] are required to load tracing
377    /// programs. [`Capability::NetAdmin`] and
378    /// [`Capability::BerkeleyPacketFilters`] are required to load
379    /// networking programs.
380    BerkeleyPacketFilters = linux_raw_sys::general::CAP_BPF,
381    /// Allow checkpoint/restore related operations. Allow PID selection during
382    /// `clone3`. Allow writing to `ns_last_pid`.
383    CheckpointRestore = linux_raw_sys::general::CAP_CHECKPOINT_RESTORE,
384}
385
386/// Check if the specified capability is in the calling thread's capability
387/// bounding set.
388///
389/// # References
390///  - [`prctl(PR_CAPBSET_READ,…)`]
391///
392/// [`prctl(PR_CAPBSET_READ,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
393#[inline]
394pub fn capability_is_in_bounding_set(capability: Capability) -> io::Result<bool> {
395    unsafe { prctl_2args(PR_CAPBSET_READ, capability as usize as *mut _) }.map(|r| r != 0)
396}
397
398const PR_CAPBSET_DROP: c_int = 24;
399
400/// If the calling thread has the [`Capability::SetPermittedCapabilities`]
401/// capability within its user namespace, then drop the specified capability
402/// from the thread's capability bounding set.
403///
404/// # References
405///  - [`prctl(PR_CAPBSET_DROP,…)`]
406///
407/// [`prctl(PR_CAPBSET_DROP,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
408#[inline]
409pub fn remove_capability_from_bounding_set(capability: Capability) -> io::Result<()> {
410    unsafe { prctl_2args(PR_CAPBSET_DROP, capability as usize as *mut _) }.map(|_r| ())
411}
412
413//
414// PR_GET_SECUREBITS/PR_SET_SECUREBITS
415//
416
417const PR_GET_SECUREBITS: c_int = 27;
418
419bitflags! {
420    /// `SECBIT_*`.
421    #[repr(transparent)]
422    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
423    pub struct CapabilitiesSecureBits: u32 {
424        /// If this bit is set, then the kernel does not grant capabilities
425        /// when a `set-user-ID-root` program is executed, or when a process
426        /// with an effective or real UID of 0 calls `execve`.
427        const NO_ROOT = 1_u32 << 0;
428        /// Set [`NO_ROOT`] irreversibly.
429        ///
430        /// [`NO_ROOT`]: Self::NO_ROOT
431        const NO_ROOT_LOCKED = 1_u32 << 1;
432        /// Setting this flag stops the kernel from adjusting the process'
433        /// permitted, effective, and ambient capability sets when the thread's
434        /// effective and filesystem UIDs are switched between zero and nonzero
435        /// values.
436        const NO_SETUID_FIXUP = 1_u32 << 2;
437        /// Set [`NO_SETUID_FIXUP`] irreversibly.
438        ///
439        /// [`NO_SETUID_FIXUP`]: Self::NO_SETUID_FIXUP
440        const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3;
441        /// Setting this flag allows a thread that has one or more 0 UIDs to
442        /// retain capabilities in its permitted set when it switches all of
443        /// its UIDs to nonzero values.
444        const KEEP_CAPS = 1_u32 << 4;
445        /// Set [`KEEP_CAPS`] irreversibly.
446        ///
447        /// [`KEEP_CAPS`]: Self::KEEP_CAPS
448        const KEEP_CAPS_LOCKED = 1_u32 << 5;
449        /// Setting this flag disallows raising ambient capabilities via the
450        /// `prctl`'s `PR_CAP_AMBIENT_RAISE` operation.
451        const NO_CAP_AMBIENT_RAISE = 1_u32 << 6;
452        /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly.
453        ///
454        /// [`NO_CAP_AMBIENT_RAISE`]: Self::NO_CAP_AMBIENT_RAISE
455        const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7;
456
457        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags>
458        const _ = !0;
459    }
460}
461
462/// Get the `securebits` flags of the calling thread.
463///
464/// # References
465///  - [`prctl(PR_GET_SECUREBITS,…)`]
466///
467/// [`prctl(PR_GET_SECUREBITS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
468#[inline]
469pub fn capabilities_secure_bits() -> io::Result<CapabilitiesSecureBits> {
470    let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint;
471    CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE)
472}
473
474const PR_SET_SECUREBITS: c_int = 28;
475
476/// Set the `securebits` flags of the calling thread.
477///
478/// # References
479///  - [`prctl(PR_SET_SECUREBITS,…)`]
480///
481/// [`prctl(PR_SET_SECUREBITS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
482#[inline]
483pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> {
484    unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ())
485}
486
487//
488// PR_GET_TIMERSLACK/PR_SET_TIMERSLACK
489//
490
491const PR_GET_TIMERSLACK: c_int = 30;
492
493/// Get the `current` timer slack value of the calling thread.
494///
495/// # References
496///  - [`prctl(PR_GET_TIMERSLACK,…)`]
497///
498/// [`prctl(PR_GET_TIMERSLACK,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
499#[inline]
500pub fn current_timer_slack() -> io::Result<u64> {
501    unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64)
502}
503
504const PR_SET_TIMERSLACK: c_int = 29;
505
506/// Sets the `current` timer slack value for the calling thread.
507///
508/// # References
509///  - [`prctl(PR_SET_TIMERSLACK,…)`]
510///
511/// [`prctl(PR_SET_TIMERSLACK,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
512#[inline]
513pub fn set_current_timer_slack(value: Option<NonZeroU64>) -> io::Result<()> {
514    let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?;
515    unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ())
516}
517
518//
519// PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS
520//
521
522const PR_GET_NO_NEW_PRIVS: c_int = 39;
523
524/// Get the value of the `no_new_privs` attribute for the calling thread.
525///
526/// # References
527///  - [`prctl(PR_GET_NO_NEW_PRIVS,…)`]
528///
529/// [`prctl(PR_GET_NO_NEW_PRIVS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
530#[inline]
531pub fn no_new_privs() -> io::Result<bool> {
532    unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0)
533}
534
535const PR_SET_NO_NEW_PRIVS: c_int = 38;
536
537/// Set the calling thread's `no_new_privs` attribute.
538///
539/// # References
540///  - [`prctl(PR_SET_NO_NEW_PRIVS,…)`]
541///
542/// [`prctl(PR_SET_NO_NEW_PRIVS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
543#[inline]
544pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> {
545    unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, usize::from(no_new_privs) as *mut _) }.map(|_r| ())
546}
547
548//
549// PR_GET_TID_ADDRESS
550//
551
552const PR_GET_TID_ADDRESS: c_int = 40;
553
554/// Get the `clear_child_tid` address set by `set_tid_address`
555/// and `clone`'s `CLONE_CHILD_CLEARTID` flag.
556///
557/// # References
558///  - [`prctl(PR_GET_TID_ADDRESS,…)`]
559///
560/// [`prctl(PR_GET_TID_ADDRESS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
561#[inline]
562pub fn get_clear_child_tid_address() -> io::Result<Option<NonNull<c_void>>> {
563    unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new)
564}
565
566//
567// PR_GET_THP_DISABLE/PR_SET_THP_DISABLE
568//
569
570const PR_GET_THP_DISABLE: c_int = 42;
571
572/// Get the current setting of the `THP disable` flag for the calling thread.
573///
574/// # References
575///  - [`prctl(PR_GET_THP_DISABLE,…)`]
576///
577/// [`prctl(PR_GET_THP_DISABLE,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
578#[inline]
579pub fn transparent_huge_pages_are_disabled() -> io::Result<bool> {
580    unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0)
581}
582
583const PR_SET_THP_DISABLE: c_int = 41;
584
585/// Set the state of the `THP disable` flag for the calling thread.
586///
587/// # References
588///  - [`prctl(PR_SET_THP_DISABLE,…)`]
589///
590/// [`prctl(PR_SET_THP_DISABLE,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
591#[inline]
592pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> {
593    unsafe { prctl_2args(PR_SET_THP_DISABLE, usize::from(thp_disable) as *mut _) }.map(|_r| ())
594}
595
596//
597// PR_CAP_AMBIENT
598//
599
600const PR_CAP_AMBIENT: c_int = 47;
601
602const PR_CAP_AMBIENT_IS_SET: usize = 1;
603
604/// Check if the specified capability is in the ambient set.
605///
606/// # References
607///  - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,…)`]
608///
609/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
610#[inline]
611pub fn capability_is_in_ambient_set(capability: Capability) -> io::Result<bool> {
612    let cap = capability as usize as *mut _;
613    unsafe { prctl_3args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET as *mut _, cap) }.map(|r| r != 0)
614}
615
616const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4;
617
618/// Remove all capabilities from the ambient set.
619///
620/// # References
621///  - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,…)`]
622///
623/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
624#[inline]
625pub fn clear_ambient_capability_set() -> io::Result<()> {
626    unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ())
627}
628
629const PR_CAP_AMBIENT_RAISE: usize = 2;
630const PR_CAP_AMBIENT_LOWER: usize = 3;
631
632/// Add or remove the specified capability to the ambient set.
633///
634/// # References
635///  - [`prctl(PR_CAP_AMBIENT,…)`]
636///
637/// [`prctl(PR_CAP_AMBIENT,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
638#[inline]
639pub fn configure_capability_in_ambient_set(capability: Capability, enable: bool) -> io::Result<()> {
640    let sub_operation = if enable {
641        PR_CAP_AMBIENT_RAISE
642    } else {
643        PR_CAP_AMBIENT_LOWER
644    };
645    let cap = capability as usize as *mut _;
646
647    unsafe { prctl_3args(PR_CAP_AMBIENT, sub_operation as *mut _, cap) }.map(|_r| ())
648}
649
650//
651// PR_SVE_GET_VL/PR_SVE_SET_VL
652//
653
654const PR_SVE_GET_VL: c_int = 51;
655
656const PR_SVE_VL_LEN_MASK: u32 = 0xffff;
657const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17;
658
659/// Scalable Vector Extension vector length configuration.
660#[derive(Copy, Clone, Debug, Eq, PartialEq)]
661pub struct SVEVectorLengthConfig {
662    /// Vector length in bytes.
663    pub vector_length_in_bytes: u32,
664    /// Vector length inherited across `execve`.
665    pub vector_length_inherited_across_execve: bool,
666}
667
668/// Get the thread's current SVE vector length configuration.
669///
670/// # References
671///  - [`prctl(PR_SVE_GET_VL,…)`]
672///
673/// [`prctl(PR_SVE_GET_VL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
674#[inline]
675pub fn sve_vector_length_configuration() -> io::Result<SVEVectorLengthConfig> {
676    let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint;
677    Ok(SVEVectorLengthConfig {
678        vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK,
679        vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0,
680    })
681}
682
683const PR_SVE_SET_VL: c_int = 50;
684
685const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18;
686
687/// Configure the thread's vector length of Scalable Vector Extension.
688///
689/// # References
690///  - [`prctl(PR_SVE_SET_VL,…)`]
691///
692/// # Safety
693///
694/// Please ensure the conditions necessary to safely call this function,
695/// as detailed in the references above.
696///
697/// [`prctl(PR_SVE_SET_VL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
698#[inline]
699pub unsafe fn set_sve_vector_length_configuration(
700    vector_length_in_bytes: usize,
701    vector_length_inherited_across_execve: bool,
702    defer_change_to_next_execve: bool,
703) -> io::Result<()> {
704    let vector_length_in_bytes =
705        u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?;
706
707    let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK;
708
709    if vector_length_inherited_across_execve {
710        bits |= PR_SVE_VL_INHERIT;
711    }
712
713    if defer_change_to_next_execve {
714        bits |= PR_SVE_SET_VL_ONEXEC;
715    }
716
717    prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ())
718}
719
720//
721// PR_PAC_RESET_KEYS
722//
723
724const PR_PAC_RESET_KEYS: c_int = 54;
725
726/// Securely reset the thread's pointer authentication keys to fresh random
727/// values generated by the kernel.
728///
729/// # References
730///  - [`prctl(PR_PAC_RESET_KEYS,…)`]
731///
732/// # Safety
733///
734/// Please ensure the conditions necessary to safely call this function,
735/// as detailed in the references above.
736///
737/// [`prctl(PR_PAC_RESET_KEYS,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
738#[inline]
739pub unsafe fn reset_pointer_authentication_keys(
740    keys: Option<PointerAuthenticationKeys>,
741) -> io::Result<()> {
742    let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits);
743    prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ())
744}
745
746//
747// PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL
748//
749
750const PR_GET_TAGGED_ADDR_CTRL: c_int = 56;
751
752const PR_MTE_TAG_SHIFT: u32 = 3;
753const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT;
754
755bitflags! {
756    /// Zero means addresses that are passed for the purpose of being
757    /// dereferenced by the kernel must be untagged.
758    #[repr(transparent)]
759    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
760    pub struct TaggedAddressMode: u32 {
761        /// Addresses that are passed for the purpose of being dereferenced by
762        /// the kernel may be tagged.
763        const ENABLED = 1_u32 << 0;
764        /// Synchronous tag check fault mode.
765        const TCF_SYNC = 1_u32 << 1;
766        /// Asynchronous tag check fault mode.
767        const TCF_ASYNC = 1_u32 << 2;
768
769        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags>
770        const _ = !0;
771    }
772}
773
774/// Get the current tagged address mode for the calling thread.
775///
776/// # References
777///  - [`prctl(PR_GET_TAGGED_ADDR_CTRL,…)`]
778///
779/// [`prctl(PR_GET_TAGGED_ADDR_CTRL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
780#[inline]
781pub fn current_tagged_address_mode() -> io::Result<(Option<TaggedAddressMode>, u32)> {
782    let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint;
783    let mode = r & 0b111_u32;
784    let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT;
785    Ok((TaggedAddressMode::from_bits(mode), mte_tag))
786}
787
788const PR_SET_TAGGED_ADDR_CTRL: c_int = 55;
789
790/// Controls support for passing tagged user-space addresses to the kernel.
791///
792/// # References
793///  - [`prctl(PR_SET_TAGGED_ADDR_CTRL,…)`]
794///
795/// # Safety
796///
797/// Please ensure the conditions necessary to safely call this function, as
798/// detailed in the references above.
799///
800/// [`prctl(PR_SET_TAGGED_ADDR_CTRL,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
801#[inline]
802pub unsafe fn set_current_tagged_address_mode(
803    mode: Option<TaggedAddressMode>,
804    mte_tag: u32,
805) -> io::Result<()> {
806    let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits)
807        | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK);
808    prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ())
809}
810
811//
812// PR_SET_SYSCALL_USER_DISPATCH
813//
814
815const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59;
816
817const PR_SYS_DISPATCH_OFF: usize = 0;
818
819/// Disable Syscall User Dispatch mechanism.
820///
821/// # References
822///  - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,…)`]
823///
824/// # Safety
825///
826/// Please ensure the conditions necessary to safely call this function, as
827/// detailed in the references above.
828///
829/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
830#[inline]
831pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> {
832    prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ())
833}
834
835const PR_SYS_DISPATCH_ON: usize = 1;
836
837/// Allow system calls to be executed.
838const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0;
839/// Block system calls from executing.
840const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1;
841
842/// Value of the fast switch flag controlling system calls user dispatch
843/// mechanism without the need to issue a syscall.
844#[derive(Copy, Clone, Debug, Eq, PartialEq)]
845#[repr(u8)]
846pub enum SysCallUserDispatchFastSwitch {
847    /// System calls are allowed to execute.
848    Allow = SYSCALL_DISPATCH_FILTER_ALLOW,
849    /// System calls are blocked from executing.
850    Block = SYSCALL_DISPATCH_FILTER_BLOCK,
851}
852
853impl TryFrom<u8> for SysCallUserDispatchFastSwitch {
854    type Error = io::Errno;
855
856    fn try_from(value: u8) -> Result<Self, Self::Error> {
857        match value {
858            SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow),
859            SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block),
860            _ => Err(io::Errno::RANGE),
861        }
862    }
863}
864
865/// Enable Syscall User Dispatch mechanism.
866///
867/// # References
868///  - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,…)`]
869///
870/// # Safety
871///
872/// Please ensure the conditions necessary to safely call this function, as
873/// detailed in the references above.
874///
875/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,…)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
876#[inline]
877pub unsafe fn enable_syscall_user_dispatch(
878    always_allowed_region: &[u8],
879    fast_switch_flag: &AtomicU8,
880) -> io::Result<()> {
881    syscalls::prctl(
882        PR_SET_SYSCALL_USER_DISPATCH,
883        PR_SYS_DISPATCH_ON as *mut _,
884        always_allowed_region.as_ptr() as *mut _,
885        always_allowed_region.len() as *mut _,
886        as_ptr(fast_switch_flag) as *mut _,
887    )
888    .map(|_r| ())
889}
890
891//
892// PR_SCHED_CORE
893//
894
895const PR_SCHED_CORE: c_int = 62;
896
897const PR_SCHED_CORE_GET: usize = 0;
898
899const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0;
900const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1;
901const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2;
902
903/// `PR_SCHED_CORE_SCOPE_*`.
904#[derive(Copy, Clone, Debug, Eq, PartialEq)]
905#[repr(u32)]
906pub enum CoreSchedulingScope {
907    /// Operation will be performed for the thread.
908    Thread = PR_SCHED_CORE_SCOPE_THREAD,
909    /// Operation will be performed for all tasks in the task group of the
910    /// process.
911    ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP,
912    /// Operation will be performed for all processes in the process group.
913    ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP,
914}
915
916impl TryFrom<u32> for CoreSchedulingScope {
917    type Error = io::Errno;
918
919    fn try_from(value: u32) -> Result<Self, Self::Error> {
920        match value {
921            PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread),
922            PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup),
923            PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup),
924            _ => Err(io::Errno::RANGE),
925        }
926    }
927}
928
929/// Get core scheduling cookie of a process.
930///
931/// # References
932///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,…)`]
933///
934/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,…)`]: https://www.kernel.org/doc/html/v6.10/admin-guide/hw-vuln/core-scheduling.html
935#[inline]
936pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<u64> {
937    let mut value: MaybeUninit<u64> = MaybeUninit::uninit();
938    unsafe {
939        syscalls::prctl(
940            PR_SCHED_CORE,
941            PR_SCHED_CORE_GET as *mut _,
942            pid.as_raw_nonzero().get() as usize as *mut _,
943            scope as usize as *mut _,
944            value.as_mut_ptr().cast(),
945        )?;
946        Ok(value.assume_init())
947    }
948}
949
950const PR_SCHED_CORE_CREATE: usize = 1;
951
952/// Create unique core scheduling cookie.
953///
954/// # References
955///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,…)`]
956///
957/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,…)`]: https://www.kernel.org/doc/html/v6.10/admin-guide/hw-vuln/core-scheduling.html
958#[inline]
959pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
960    unsafe {
961        syscalls::prctl(
962            PR_SCHED_CORE,
963            PR_SCHED_CORE_CREATE as *mut _,
964            pid.as_raw_nonzero().get() as usize as *mut _,
965            scope as usize as *mut _,
966            ptr::null_mut(),
967        )
968        .map(|_r| ())
969    }
970}
971
972const PR_SCHED_CORE_SHARE_TO: usize = 2;
973
974/// Push core scheduling cookie to a process.
975///
976/// # References
977///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,…)`]
978///
979/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,…)`]: https://www.kernel.org/doc/html/v6.10/admin-guide/hw-vuln/core-scheduling.html
980#[inline]
981pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
982    unsafe {
983        syscalls::prctl(
984            PR_SCHED_CORE,
985            PR_SCHED_CORE_SHARE_TO as *mut _,
986            pid.as_raw_nonzero().get() as usize as *mut _,
987            scope as usize as *mut _,
988            ptr::null_mut(),
989        )
990        .map(|_r| ())
991    }
992}
993
994const PR_SCHED_CORE_SHARE_FROM: usize = 3;
995
996/// Pull core scheduling cookie from a process.
997///
998/// # References
999///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,…)`]
1000///
1001/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,…)`]: https://www.kernel.org/doc/html/v6.10/admin-guide/hw-vuln/core-scheduling.html
1002#[inline]
1003pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
1004    unsafe {
1005        syscalls::prctl(
1006            PR_SCHED_CORE,
1007            PR_SCHED_CORE_SHARE_FROM as *mut _,
1008            pid.as_raw_nonzero().get() as usize as *mut _,
1009            scope as usize as *mut _,
1010            ptr::null_mut(),
1011        )
1012        .map(|_r| ())
1013    }
1014}