rustix/backend/linux_raw/fs/
dir.rs

1use crate::fd::{AsFd, BorrowedFd, OwnedFd};
2use crate::ffi::{CStr, CString};
3use crate::fs::{
4    fcntl_getfl, fstat, fstatfs, fstatvfs, openat, FileType, Mode, OFlags, Stat, StatFs, StatVfs,
5};
6use crate::io;
7#[cfg(feature = "process")]
8use crate::process::fchdir;
9use crate::utils::as_ptr;
10use alloc::borrow::ToOwned as _;
11use alloc::vec::Vec;
12use core::fmt;
13use core::mem::size_of;
14use linux_raw_sys::general::{linux_dirent64, SEEK_SET};
15
16/// `DIR*`
17pub struct Dir {
18    /// The `OwnedFd` that we read directory entries from.
19    fd: OwnedFd,
20
21    /// Have we seen any errors in this iteration?
22    any_errors: bool,
23
24    /// Should we rewind the stream on the next iteration?
25    rewind: bool,
26
27    /// The buffer for `linux_dirent64` entries.
28    buf: Vec<u8>,
29
30    /// Where we are in the buffer.
31    pos: usize,
32}
33
34impl Dir {
35    /// Take ownership of `fd` and construct a `Dir` that reads entries from
36    /// the given directory file descriptor.
37    #[inline]
38    pub fn new<Fd: Into<OwnedFd>>(fd: Fd) -> io::Result<Self> {
39        Self::_new(fd.into())
40    }
41
42    #[inline]
43    fn _new(fd: OwnedFd) -> io::Result<Self> {
44        Ok(Self {
45            fd,
46            any_errors: false,
47            rewind: false,
48            buf: Vec::new(),
49            pos: 0,
50        })
51    }
52
53    /// Returns the file descriptor associated with the directory stream.
54    ///
55    /// The file descriptor is used internally by the directory stream. As a result, it is useful
56    /// only for functions which do not depend or alter the file position.
57    ///
58    /// # References
59    ///
60    ///   - [POSIX]
61    ///
62    /// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/dirfd.html
63    #[inline]
64    #[doc(alias = "dirfd")]
65    pub fn fd<'a>(&'a self) -> io::Result<BorrowedFd<'a>> {
66        Ok(self.fd.as_fd())
67    }
68
69    /// Borrow `fd` and construct a `Dir` that reads entries from the given
70    /// directory file descriptor.
71    #[inline]
72    pub fn read_from<Fd: AsFd>(fd: Fd) -> io::Result<Self> {
73        Self::_read_from(fd.as_fd())
74    }
75
76    #[inline]
77    fn _read_from(fd: BorrowedFd<'_>) -> io::Result<Self> {
78        let flags = fcntl_getfl(fd)?;
79        let fd_for_dir = openat(fd, cstr!("."), flags | OFlags::CLOEXEC, Mode::empty())?;
80
81        Ok(Self {
82            fd: fd_for_dir,
83            any_errors: false,
84            rewind: false,
85            buf: Vec::new(),
86            pos: 0,
87        })
88    }
89
90    /// `rewinddir(self)`
91    #[inline]
92    pub fn rewind(&mut self) {
93        self.any_errors = false;
94        self.rewind = true;
95        self.pos = self.buf.len();
96    }
97
98    /// `seekdir(self, offset)`
99    ///
100    /// This function is only available on 64-bit platforms because it's
101    /// implemented using [`libc::seekdir`] which only supports offsets that
102    /// fit in a `c_long`.
103    ///
104    /// [`libc::seekdir`]: https://docs.rs/libc/*/arm-unknown-linux-gnueabihf/libc/fn.seekdir.html
105    // In the linux_raw backend here, we don't use `libc::seekdir` and don't
106    // have this limitation, but it's a goal of rustix to support the same API
107    // on both the linux_raw and libc backends.
108    #[cfg(target_pointer_width = "64")]
109    #[cfg_attr(docsrs, doc(cfg(target_pointer_width = "64")))]
110    #[doc(alias = "seekdir")]
111    #[inline]
112    pub fn seek(&mut self, offset: i64) -> io::Result<()> {
113        self.any_errors = false;
114        self.rewind = false;
115        self.pos = self.buf.len();
116        match io::retry_on_intr(|| {
117            crate::backend::fs::syscalls::_seek(self.fd.as_fd(), offset, SEEK_SET)
118        }) {
119            Ok(_) => Ok(()),
120            Err(err) => {
121                self.any_errors = true;
122                Err(err)
123            }
124        }
125    }
126
127    /// `readdir(self)`, where `None` means the end of the directory.
128    pub fn read(&mut self) -> Option<io::Result<DirEntry>> {
129        // If we've seen errors, don't continue to try to read anything
130        // further.
131        if self.any_errors {
132            return None;
133        }
134
135        // If a rewind was requested, seek to the beginning.
136        if self.rewind {
137            self.rewind = false;
138            match io::retry_on_intr(|| {
139                crate::backend::fs::syscalls::_seek(self.fd.as_fd(), 0, SEEK_SET)
140            }) {
141                Ok(_) => (),
142                Err(err) => {
143                    self.any_errors = true;
144                    return Some(Err(err));
145                }
146            }
147        }
148
149        // Compute linux_dirent64 field offsets.
150        let z = linux_dirent64 {
151            d_ino: 0_u64,
152            d_off: 0_i64,
153            d_type: 0_u8,
154            d_reclen: 0_u16,
155            d_name: Default::default(),
156        };
157        let base = as_ptr(&z) as usize;
158        let offsetof_d_reclen = (as_ptr(&z.d_reclen) as usize) - base;
159        let offsetof_d_name = (as_ptr(&z.d_name) as usize) - base;
160        let offsetof_d_ino = (as_ptr(&z.d_ino) as usize) - base;
161        let offsetof_d_off = (as_ptr(&z.d_off) as usize) - base;
162        let offsetof_d_type = (as_ptr(&z.d_type) as usize) - base;
163
164        // Test if we need more entries, and if so, read more.
165        if self.buf.len() - self.pos < size_of::<linux_dirent64>() {
166            match self.read_more()? {
167                Ok(()) => (),
168                Err(err) => return Some(Err(err)),
169            }
170        }
171
172        // We successfully read an entry. Extract the fields.
173        let pos = self.pos;
174
175        // Do an unaligned u16 load.
176        let d_reclen = u16::from_ne_bytes([
177            self.buf[pos + offsetof_d_reclen],
178            self.buf[pos + offsetof_d_reclen + 1],
179        ]);
180        assert!(self.buf.len() - pos >= d_reclen as usize);
181        self.pos += d_reclen as usize;
182
183        // Read the NUL-terminated name from the `d_name` field. Without
184        // `unsafe`, we need to scan for the NUL twice: once to obtain a size
185        // for the slice, and then once within `CStr::from_bytes_with_nul`.
186        let name_start = pos + offsetof_d_name;
187        let name_len = self.buf[name_start..]
188            .iter()
189            .position(|x| *x == b'\0')
190            .unwrap();
191        let name = CStr::from_bytes_with_nul(&self.buf[name_start..][..=name_len]).unwrap();
192        let name = name.to_owned();
193        assert!(name.as_bytes().len() <= self.buf.len() - name_start);
194
195        // Do an unaligned `u64` load for `d_ino`.
196        let d_ino = u64::from_ne_bytes([
197            self.buf[pos + offsetof_d_ino],
198            self.buf[pos + offsetof_d_ino + 1],
199            self.buf[pos + offsetof_d_ino + 2],
200            self.buf[pos + offsetof_d_ino + 3],
201            self.buf[pos + offsetof_d_ino + 4],
202            self.buf[pos + offsetof_d_ino + 5],
203            self.buf[pos + offsetof_d_ino + 6],
204            self.buf[pos + offsetof_d_ino + 7],
205        ]);
206
207        // Do an unaligned `i64` load for `d_off`.
208        let d_off = i64::from_ne_bytes([
209            self.buf[pos + offsetof_d_off],
210            self.buf[pos + offsetof_d_off + 1],
211            self.buf[pos + offsetof_d_off + 2],
212            self.buf[pos + offsetof_d_off + 3],
213            self.buf[pos + offsetof_d_off + 4],
214            self.buf[pos + offsetof_d_off + 5],
215            self.buf[pos + offsetof_d_off + 6],
216            self.buf[pos + offsetof_d_off + 7],
217        ]);
218
219        let d_type = self.buf[pos + offsetof_d_type];
220
221        // Check that our types correspond to the `linux_dirent64` types.
222        let _ = linux_dirent64 {
223            d_ino,
224            d_off,
225            d_type,
226            d_reclen,
227            d_name: Default::default(),
228        };
229
230        Some(Ok(DirEntry {
231            d_ino,
232            d_off,
233            d_type,
234            name,
235        }))
236    }
237
238    #[must_use]
239    fn read_more(&mut self) -> Option<io::Result<()>> {
240        // The first few times we're called, we allocate a relatively small
241        // buffer, because many directories are small. If we're called more,
242        // use progressively larger allocations, up to a fixed maximum.
243        //
244        // The specific sizes and policy here have not been tuned in detail yet
245        // and may need to be adjusted. In doing so, we should be careful to
246        // avoid unbounded buffer growth. This buffer only exists to share the
247        // cost of a `getdents` call over many entries, so if it gets too big,
248        // cache and heap usage will outweigh the benefit. And ultimately,
249        // directories can contain more entries than we can allocate contiguous
250        // memory for, so we'll always need to cap the size at some point.
251        if self.buf.len() < 1024 * size_of::<linux_dirent64>() {
252            self.buf.reserve(32 * size_of::<linux_dirent64>());
253        }
254        self.buf.resize(self.buf.capacity(), 0);
255        let nread = match io::retry_on_intr(|| {
256            crate::backend::fs::syscalls::getdents(self.fd.as_fd(), &mut self.buf)
257        }) {
258            Ok(nread) => nread,
259            Err(io::Errno::NOENT) => {
260                self.any_errors = true;
261                return None;
262            }
263            Err(err) => {
264                self.any_errors = true;
265                return Some(Err(err));
266            }
267        };
268        self.buf.resize(nread, 0);
269        self.pos = 0;
270        if nread == 0 {
271            None
272        } else {
273            Some(Ok(()))
274        }
275    }
276
277    /// `fstat(self)`
278    #[inline]
279    pub fn stat(&self) -> io::Result<Stat> {
280        fstat(&self.fd)
281    }
282
283    /// `fstatfs(self)`
284    #[inline]
285    pub fn statfs(&self) -> io::Result<StatFs> {
286        fstatfs(&self.fd)
287    }
288
289    /// `fstatvfs(self)`
290    #[inline]
291    pub fn statvfs(&self) -> io::Result<StatVfs> {
292        fstatvfs(&self.fd)
293    }
294
295    /// `fchdir(self)`
296    #[cfg(feature = "process")]
297    #[cfg_attr(docsrs, doc(cfg(feature = "process")))]
298    #[inline]
299    pub fn chdir(&self) -> io::Result<()> {
300        fchdir(&self.fd)
301    }
302}
303
304impl Iterator for Dir {
305    type Item = io::Result<DirEntry>;
306
307    #[inline]
308    fn next(&mut self) -> Option<Self::Item> {
309        Self::read(self)
310    }
311}
312
313impl fmt::Debug for Dir {
314    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
315        f.debug_struct("Dir").field("fd", &self.fd).finish()
316    }
317}
318
319/// `struct dirent`
320#[derive(Debug)]
321pub struct DirEntry {
322    d_ino: u64,
323    d_type: u8,
324    d_off: i64,
325    name: CString,
326}
327
328impl DirEntry {
329    /// Returns the file name of this directory entry.
330    #[inline]
331    pub fn file_name(&self) -> &CStr {
332        &self.name
333    }
334
335    /// Returns the “offset” of this directory entry. This is not a true
336    /// numerical offset but an opaque cookie that identifies a position in the
337    /// given stream.
338    #[inline]
339    pub fn offset(&self) -> i64 {
340        self.d_off
341    }
342
343    /// Returns the type of this directory entry.
344    #[inline]
345    pub fn file_type(&self) -> FileType {
346        FileType::from_dirent_d_type(self.d_type)
347    }
348
349    /// Return the inode number of this directory entry.
350    #[inline]
351    pub fn ino(&self) -> u64 {
352        self.d_ino
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359
360    #[test]
361    fn dir_iterator_handles_io_errors() {
362        // create a dir, keep the FD, then delete the dir
363        let tmp = tempfile::tempdir().unwrap();
364        let fd = crate::fs::openat(
365            crate::fs::CWD,
366            tmp.path(),
367            crate::fs::OFlags::RDONLY | crate::fs::OFlags::CLOEXEC,
368            crate::fs::Mode::empty(),
369        )
370        .unwrap();
371
372        let file_fd = crate::fs::openat(
373            &fd,
374            tmp.path().join("test.txt"),
375            crate::fs::OFlags::WRONLY | crate::fs::OFlags::CREATE,
376            crate::fs::Mode::RWXU,
377        )
378        .unwrap();
379
380        let mut dir = Dir::read_from(&fd).unwrap();
381
382        // Reach inside the `Dir` and replace its directory with a file, which
383        // will cause the subsequent `getdents64` to fail.
384        crate::io::dup2(&file_fd, &mut dir.fd).unwrap();
385
386        assert!(matches!(dir.next(), Some(Err(_))));
387        assert!(dir.next().is_none());
388    }
389}