rustix/backend/linux_raw/thread/
syscalls.rs

1//! linux_raw syscalls supporting `rustix::thread`.
2//!
3//! # Safety
4//!
5//! See the `rustix::backend` module documentation for details.
6#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
7
8use crate::backend::c;
9use crate::backend::conv::{
10    by_mut, by_ref, c_int, c_uint, ret, ret_c_int, ret_c_int_infallible, ret_usize, slice,
11    slice_just_addr, slice_just_addr_mut, zero,
12};
13use crate::fd::BorrowedFd;
14use crate::io;
15use crate::pid::Pid;
16use crate::thread::{futex, ClockId, NanosleepRelativeResult, Timespec};
17use core::mem::MaybeUninit;
18use core::sync::atomic::AtomicU32;
19use linux_raw_sys::general::{__kernel_timespec, TIMER_ABSTIME};
20#[cfg(target_pointer_width = "32")]
21use {crate::utils::option_as_ptr, linux_raw_sys::general::timespec as __kernel_old_timespec};
22
23#[inline]
24pub(crate) fn clock_nanosleep_relative(
25    id: ClockId,
26    req: &__kernel_timespec,
27) -> NanosleepRelativeResult {
28    #[cfg(target_pointer_width = "32")]
29    unsafe {
30        let mut rem = MaybeUninit::<__kernel_timespec>::uninit();
31        match ret(syscall!(
32            __NR_clock_nanosleep_time64,
33            id,
34            c_int(0),
35            by_ref(req),
36            &mut rem
37        ))
38        .or_else(|err| {
39            // See the comments in `rustix_clock_gettime_via_syscall` about
40            // emulation.
41            if err == io::Errno::NOSYS {
42                clock_nanosleep_relative_old(id, req, &mut rem)
43            } else {
44                Err(err)
45            }
46        }) {
47            Ok(()) => NanosleepRelativeResult::Ok,
48            Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()),
49            Err(err) => NanosleepRelativeResult::Err(err),
50        }
51    }
52    #[cfg(target_pointer_width = "64")]
53    unsafe {
54        let mut rem = MaybeUninit::<__kernel_timespec>::uninit();
55        match ret(syscall!(
56            __NR_clock_nanosleep,
57            id,
58            c_int(0),
59            by_ref(req),
60            &mut rem
61        )) {
62            Ok(()) => NanosleepRelativeResult::Ok,
63            Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()),
64            Err(err) => NanosleepRelativeResult::Err(err),
65        }
66    }
67}
68
69#[cfg(target_pointer_width = "32")]
70unsafe fn clock_nanosleep_relative_old(
71    id: ClockId,
72    req: &__kernel_timespec,
73    rem: &mut MaybeUninit<__kernel_timespec>,
74) -> io::Result<()> {
75    let old_req = __kernel_old_timespec {
76        tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?,
77        tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?,
78    };
79    let mut old_rem = MaybeUninit::<__kernel_old_timespec>::uninit();
80    ret(syscall!(
81        __NR_clock_nanosleep,
82        id,
83        c_int(0),
84        by_ref(&old_req),
85        &mut old_rem
86    ))?;
87    let old_rem = old_rem.assume_init();
88    rem.write(__kernel_timespec {
89        tv_sec: old_rem.tv_sec.into(),
90        tv_nsec: old_rem.tv_nsec.into(),
91    });
92    Ok(())
93}
94
95#[inline]
96pub(crate) fn clock_nanosleep_absolute(id: ClockId, req: &__kernel_timespec) -> io::Result<()> {
97    #[cfg(target_pointer_width = "32")]
98    unsafe {
99        ret(syscall_readonly!(
100            __NR_clock_nanosleep_time64,
101            id,
102            c_uint(TIMER_ABSTIME),
103            by_ref(req),
104            zero()
105        ))
106        .or_else(|err| {
107            // See the comments in `rustix_clock_gettime_via_syscall` about
108            // emulation.
109            if err == io::Errno::NOSYS {
110                clock_nanosleep_absolute_old(id, req)
111            } else {
112                Err(err)
113            }
114        })
115    }
116    #[cfg(target_pointer_width = "64")]
117    unsafe {
118        ret(syscall_readonly!(
119            __NR_clock_nanosleep,
120            id,
121            c_uint(TIMER_ABSTIME),
122            by_ref(req),
123            zero()
124        ))
125    }
126}
127
128#[cfg(target_pointer_width = "32")]
129unsafe fn clock_nanosleep_absolute_old(id: ClockId, req: &__kernel_timespec) -> io::Result<()> {
130    let old_req = __kernel_old_timespec {
131        tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?,
132        tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?,
133    };
134    ret(syscall_readonly!(
135        __NR_clock_nanosleep,
136        id,
137        c_int(0),
138        by_ref(&old_req),
139        zero()
140    ))
141}
142
143#[inline]
144pub(crate) fn nanosleep(req: &__kernel_timespec) -> NanosleepRelativeResult {
145    #[cfg(target_pointer_width = "32")]
146    unsafe {
147        let mut rem = MaybeUninit::<__kernel_timespec>::uninit();
148        match ret(syscall!(
149            __NR_clock_nanosleep_time64,
150            ClockId::Realtime,
151            c_int(0),
152            by_ref(req),
153            &mut rem
154        ))
155        .or_else(|err| {
156            // See the comments in `rustix_clock_gettime_via_syscall` about
157            // emulation.
158            if err == io::Errno::NOSYS {
159                nanosleep_old(req, &mut rem)
160            } else {
161                Err(err)
162            }
163        }) {
164            Ok(()) => NanosleepRelativeResult::Ok,
165            Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()),
166            Err(err) => NanosleepRelativeResult::Err(err),
167        }
168    }
169    #[cfg(target_pointer_width = "64")]
170    unsafe {
171        let mut rem = MaybeUninit::<__kernel_timespec>::uninit();
172        match ret(syscall!(__NR_nanosleep, by_ref(req), &mut rem)) {
173            Ok(()) => NanosleepRelativeResult::Ok,
174            Err(io::Errno::INTR) => NanosleepRelativeResult::Interrupted(rem.assume_init()),
175            Err(err) => NanosleepRelativeResult::Err(err),
176        }
177    }
178}
179
180#[cfg(target_pointer_width = "32")]
181unsafe fn nanosleep_old(
182    req: &__kernel_timespec,
183    rem: &mut MaybeUninit<__kernel_timespec>,
184) -> io::Result<()> {
185    let old_req = __kernel_old_timespec {
186        tv_sec: req.tv_sec.try_into().map_err(|_| io::Errno::INVAL)?,
187        tv_nsec: req.tv_nsec.try_into().map_err(|_| io::Errno::INVAL)?,
188    };
189    let mut old_rem = MaybeUninit::<__kernel_old_timespec>::uninit();
190    ret(syscall!(__NR_nanosleep, by_ref(&old_req), &mut old_rem))?;
191    let old_rem = old_rem.assume_init();
192    rem.write(__kernel_timespec {
193        tv_sec: old_rem.tv_sec.into(),
194        tv_nsec: old_rem.tv_nsec.into(),
195    });
196    Ok(())
197}
198
199#[inline]
200pub(crate) fn gettid() -> Pid {
201    unsafe {
202        let tid = ret_c_int_infallible(syscall_readonly!(__NR_gettid));
203        Pid::from_raw_unchecked(tid)
204    }
205}
206
207/// # Safety
208///
209/// The raw pointers must point to valid aligned memory.
210#[inline]
211pub(crate) unsafe fn futex_val2(
212    uaddr: *const AtomicU32,
213    op: super::futex::Operation,
214    flags: futex::Flags,
215    val: u32,
216    val2: u32,
217    uaddr2: *const AtomicU32,
218    val3: u32,
219) -> io::Result<usize> {
220    // Pass `val2` in the least-significant bytes of the `timeout` argument.
221    // [“the kernel casts the timeout value first to unsigned long, then to
222    // uint32_t”], so we perform that exact conversion in reverse to create
223    // the pointer.
224    //
225    // [“the kernel casts the timeout value first to unsigned long, then to uint32_t”]: https://man7.org/linux/man-pages/man2/futex.2.html
226    let timeout = val2 as usize as *const Timespec;
227
228    #[cfg(target_pointer_width = "32")]
229    {
230        ret_usize(syscall!(
231            __NR_futex_time64,
232            uaddr,
233            (op, flags),
234            c_uint(val),
235            timeout,
236            uaddr2,
237            c_uint(val3)
238        ))
239    }
240    #[cfg(target_pointer_width = "64")]
241    ret_usize(syscall!(
242        __NR_futex,
243        uaddr,
244        (op, flags),
245        c_uint(val),
246        timeout,
247        uaddr2,
248        c_uint(val3)
249    ))
250}
251
252/// # Safety
253///
254/// The raw pointers must point to valid aligned memory.
255#[inline]
256pub(crate) unsafe fn futex_timeout(
257    uaddr: *const AtomicU32,
258    op: super::futex::Operation,
259    flags: futex::Flags,
260    val: u32,
261    timeout: *const Timespec,
262    uaddr2: *const AtomicU32,
263    val3: u32,
264) -> io::Result<usize> {
265    #[cfg(target_pointer_width = "32")]
266    {
267        ret_usize(syscall!(
268            __NR_futex_time64,
269            uaddr,
270            (op, flags),
271            c_uint(val),
272            timeout,
273            uaddr2,
274            c_uint(val3)
275        ))
276        .or_else(|err| {
277            // See the comments in `rustix_clock_gettime_via_syscall` about
278            // emulation.
279            if err == io::Errno::NOSYS {
280                futex_old_timespec(uaddr, op, flags, val, timeout, uaddr2, val3)
281            } else {
282                Err(err)
283            }
284        })
285    }
286    #[cfg(target_pointer_width = "64")]
287    ret_usize(syscall!(
288        __NR_futex,
289        uaddr,
290        (op, flags),
291        c_uint(val),
292        timeout,
293        uaddr2,
294        c_uint(val3)
295    ))
296}
297
298/// # Safety
299///
300/// The raw pointers must point to valid aligned memory.
301#[cfg(target_pointer_width = "32")]
302unsafe fn futex_old_timespec(
303    uaddr: *const AtomicU32,
304    op: super::futex::Operation,
305    flags: futex::Flags,
306    val: u32,
307    timeout: *const Timespec,
308    uaddr2: *const AtomicU32,
309    val3: u32,
310) -> io::Result<usize> {
311    let old_timeout = if timeout.is_null() {
312        None
313    } else {
314        Some(__kernel_old_timespec {
315            tv_sec: (*timeout).tv_sec.try_into().map_err(|_| io::Errno::INVAL)?,
316            tv_nsec: (*timeout)
317                .tv_nsec
318                .try_into()
319                .map_err(|_| io::Errno::INVAL)?,
320        })
321    };
322    ret_usize(syscall!(
323        __NR_futex,
324        uaddr,
325        (op, flags),
326        c_uint(val),
327        option_as_ptr(old_timeout.as_ref()),
328        uaddr2,
329        c_uint(val3)
330    ))
331}
332#[inline]
333pub(crate) fn setns(fd: BorrowedFd<'_>, nstype: c::c_int) -> io::Result<c::c_int> {
334    unsafe { ret_c_int(syscall_readonly!(__NR_setns, fd, c_int(nstype))) }
335}
336
337#[inline]
338pub(crate) fn unshare(flags: crate::thread::UnshareFlags) -> io::Result<()> {
339    unsafe { ret(syscall_readonly!(__NR_unshare, flags)) }
340}
341
342#[inline]
343pub(crate) fn capget(
344    header: &mut linux_raw_sys::general::__user_cap_header_struct,
345    data: &mut [MaybeUninit<linux_raw_sys::general::__user_cap_data_struct>],
346) -> io::Result<()> {
347    unsafe {
348        ret(syscall!(
349            __NR_capget,
350            by_mut(header),
351            slice_just_addr_mut(data)
352        ))
353    }
354}
355
356#[inline]
357pub(crate) fn capset(
358    header: &mut linux_raw_sys::general::__user_cap_header_struct,
359    data: &[linux_raw_sys::general::__user_cap_data_struct],
360) -> io::Result<()> {
361    unsafe { ret(syscall!(__NR_capset, by_mut(header), slice_just_addr(data))) }
362}
363
364#[inline]
365pub(crate) fn setuid_thread(uid: crate::ugid::Uid) -> io::Result<()> {
366    unsafe { ret(syscall_readonly!(__NR_setuid, uid)) }
367}
368
369#[inline]
370pub(crate) fn setresuid_thread(
371    ruid: crate::ugid::Uid,
372    euid: crate::ugid::Uid,
373    suid: crate::ugid::Uid,
374) -> io::Result<()> {
375    #[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc"))]
376    unsafe {
377        ret(syscall_readonly!(__NR_setresuid32, ruid, euid, suid))
378    }
379    #[cfg(not(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc")))]
380    unsafe {
381        ret(syscall_readonly!(__NR_setresuid, ruid, euid, suid))
382    }
383}
384
385#[inline]
386pub(crate) fn setgid_thread(gid: crate::ugid::Gid) -> io::Result<()> {
387    unsafe { ret(syscall_readonly!(__NR_setgid, gid)) }
388}
389
390#[inline]
391pub(crate) fn setresgid_thread(
392    rgid: crate::ugid::Gid,
393    egid: crate::ugid::Gid,
394    sgid: crate::ugid::Gid,
395) -> io::Result<()> {
396    #[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc"))]
397    unsafe {
398        ret(syscall_readonly!(__NR_setresgid32, rgid, egid, sgid))
399    }
400    #[cfg(not(any(target_arch = "x86", target_arch = "arm", target_arch = "sparc")))]
401    unsafe {
402        ret(syscall_readonly!(__NR_setresgid, rgid, egid, sgid))
403    }
404}
405
406#[inline]
407pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> {
408    let (addr, len) = slice(gids);
409    unsafe { ret(syscall_readonly!(__NR_setgroups, len, addr)) }
410}