rustix/backend/linux_raw/
vdso.rs

1//! Parse the Linux vDSO.
2//!
3//! The following code is transliterated from
4//! tools/testing/selftests/vDSO/parse_vdso.c in Linux 6.12, which is licensed
5//! with Creative Commons Zero License, version 1.0,
6//! available at <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
7//!
8//! # Safety
9//!
10//! Parsing the vDSO involves a lot of raw pointer manipulation. This
11//! implementation follows Linux's reference implementation, and adds several
12//! additional safety checks.
13#![allow(unsafe_code)]
14
15use super::c;
16use crate::ffi::CStr;
17use crate::utils::check_raw_pointer;
18use core::ffi::c_void;
19use core::mem::size_of;
20use core::ptr::{null, null_mut};
21use linux_raw_sys::elf::*;
22
23#[cfg(target_arch = "s390x")]
24type ElfHashEntry = u64;
25#[cfg(not(target_arch = "s390x"))]
26type ElfHashEntry = u32;
27
28pub(super) struct Vdso {
29    // Load information
30    load_addr: *const Elf_Ehdr,
31    load_end: *const c_void, // the end of the `PT_LOAD` segment
32    pv_offset: usize,        // recorded paddr - recorded vaddr
33
34    // Symbol table
35    symtab: *const Elf_Sym,
36    symstrings: *const u8,
37    bucket: *const ElfHashEntry,
38    chain: *const ElfHashEntry,
39    nbucket: ElfHashEntry,
40    //nchain: ElfHashEntry,
41
42    // Version table
43    versym: *const u16,
44    verdef: *const Elf_Verdef,
45}
46
47/// Straight from the ELF specification...and then tweaked slightly, in order to
48/// avoid a few clang warnings.
49/// (And then translated to Rust).
50fn elf_hash(name: &CStr) -> u32 {
51    let mut h: u32 = 0;
52    for b in name.to_bytes() {
53        h = (h << 4).wrapping_add(u32::from(*b));
54        let g = h & 0xf000_0000;
55        if g != 0 {
56            h ^= g >> 24;
57        }
58        h &= !g;
59    }
60    h
61}
62
63/// Create a `Vdso` value by parsing the vDSO at the `sysinfo_ehdr` address.
64fn init_from_sysinfo_ehdr() -> Option<Vdso> {
65    // SAFETY: The auxv initialization code does extensive checks to ensure
66    // that the value we get really is an `AT_SYSINFO_EHDR` value from the
67    // kernel.
68    unsafe {
69        let hdr = super::param::auxv::sysinfo_ehdr();
70
71        // If the platform doesn't provide a `AT_SYSINFO_EHDR`, we can't locate
72        // the vDSO.
73        if hdr.is_null() {
74            return None;
75        }
76
77        let mut vdso = Vdso {
78            load_addr: hdr,
79            load_end: hdr.cast(),
80            pv_offset: 0,
81            symtab: null(),
82            symstrings: null(),
83            bucket: null(),
84            chain: null(),
85            nbucket: 0,
86            //nchain: 0,
87            versym: null(),
88            verdef: null(),
89        };
90
91        let hdr = &*hdr;
92        let pt = check_raw_pointer::<Elf_Phdr>(vdso.base_plus(hdr.e_phoff)? as *mut _)?.as_ptr();
93        let mut dyn_: *const Elf_Dyn = null();
94        let mut num_dyn = 0;
95
96        // We need two things from the segment table: the load offset
97        // and the dynamic table.
98        let mut found_vaddr = false;
99        for i in 0..hdr.e_phnum {
100            let phdr = &*pt.add(i as usize);
101            if phdr.p_type == PT_LOAD && !found_vaddr {
102                // The segment should be readable and executable, because it
103                // contains the symbol table and the function bodies.
104                if phdr.p_flags & (PF_R | PF_X) != (PF_R | PF_X) {
105                    return None;
106                }
107                found_vaddr = true;
108                vdso.load_end = vdso.base_plus(phdr.p_offset.checked_add(phdr.p_memsz)?)?;
109                vdso.pv_offset = phdr.p_offset.wrapping_sub(phdr.p_vaddr);
110            } else if phdr.p_type == PT_DYNAMIC {
111                // If `p_offset` is zero, it's more likely that we're looking
112                // at memory that has been zeroed than that the kernel has
113                // somehow aliased the `Ehdr` and the `Elf_Dyn` array.
114                if phdr.p_offset < size_of::<Elf_Ehdr>() {
115                    return None;
116                }
117
118                dyn_ = check_raw_pointer::<Elf_Dyn>(vdso.base_plus(phdr.p_offset)? as *mut _)?
119                    .as_ptr();
120                num_dyn = phdr.p_memsz / size_of::<Elf_Dyn>();
121            } else if phdr.p_type == PT_INTERP || phdr.p_type == PT_GNU_RELRO {
122                // Don't trust any ELF image that has an “interpreter” or
123                // that uses RELRO, which is likely to be a user ELF image
124                // rather and not the kernel vDSO.
125                return None;
126            }
127        }
128
129        if !found_vaddr || dyn_.is_null() {
130            return None; // Failed
131        }
132
133        // Fish out the useful bits of the dynamic table.
134        let mut hash: *const ElfHashEntry = null();
135        vdso.symstrings = null();
136        vdso.symtab = null();
137        vdso.versym = null();
138        vdso.verdef = null();
139        let mut i = 0;
140        loop {
141            if i == num_dyn {
142                return None;
143            }
144            let d = &*dyn_.add(i);
145            match d.d_tag {
146                DT_STRTAB => {
147                    vdso.symstrings =
148                        check_raw_pointer::<u8>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
149                            .as_ptr();
150                }
151                DT_SYMTAB => {
152                    vdso.symtab =
153                        check_raw_pointer::<Elf_Sym>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
154                            .as_ptr();
155                }
156                DT_HASH => {
157                    hash = check_raw_pointer::<ElfHashEntry>(
158                        vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _
159                    )?
160                    .as_ptr();
161                }
162                DT_VERSYM => {
163                    vdso.versym =
164                        check_raw_pointer::<u16>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
165                            .as_ptr();
166                }
167                DT_VERDEF => {
168                    vdso.verdef = check_raw_pointer::<Elf_Verdef>(
169                        vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _,
170                    )?
171                    .as_ptr();
172                }
173                DT_SYMENT => {
174                    if d.d_un.d_ptr != size_of::<Elf_Sym>() {
175                        return None; // Failed
176                    }
177                }
178                DT_NULL => break,
179                _ => {}
180            }
181            i = i.checked_add(1)?;
182        }
183        // `check_raw_pointer` will have checked these pointers for null,
184        // however they could still be null if the expected dynamic table
185        // entries are absent.
186        if vdso.symstrings.is_null() || vdso.symtab.is_null() || hash.is_null() {
187            return None; // Failed
188        }
189
190        if vdso.verdef.is_null() {
191            vdso.versym = null();
192        }
193
194        // Parse the hash table header.
195        vdso.nbucket = *hash.add(0);
196        //vdso.nchain = *hash.add(1);
197        vdso.bucket = hash.add(2);
198        vdso.chain = hash.add(vdso.nbucket as usize + 2);
199
200        // That's all we need.
201        Some(vdso)
202    }
203}
204
205impl Vdso {
206    /// Parse the vDSO.
207    ///
208    /// Returns `None` if the vDSO can't be located or if it doesn't conform to
209    /// our expectations.
210    #[inline]
211    pub(super) fn new() -> Option<Self> {
212        init_from_sysinfo_ehdr()
213    }
214
215    /// Check the version for a symbol.
216    ///
217    /// # Safety
218    ///
219    /// The raw pointers inside `self` must be valid.
220    unsafe fn match_version(&self, mut ver: u16, name: &CStr, hash: u32) -> bool {
221        // This is a helper function to check if the version indexed by
222        // ver matches name (which hashes to hash).
223        //
224        // The version definition table is a mess, and I don't know how
225        // to do this in better than linear time without allocating memory
226        // to build an index. I also don't know why the table has
227        // variable size entries in the first place.
228        //
229        // For added fun, I can't find a comprehensible specification of how
230        // to parse all the weird flags in the table.
231        //
232        // So I just parse the whole table every time.
233
234        // First step: find the version definition
235        ver &= 0x7fff; // Apparently bit 15 means "hidden"
236        let mut def = self.verdef;
237        loop {
238            if (*def).vd_version != VER_DEF_CURRENT {
239                return false; // Failed
240            }
241
242            if ((*def).vd_flags & VER_FLG_BASE) == 0 && ((*def).vd_ndx & 0x7fff) == ver {
243                break;
244            }
245
246            if (*def).vd_next == 0 {
247                return false; // No definition.
248            }
249
250            def = def
251                .cast::<u8>()
252                .add((*def).vd_next as usize)
253                .cast::<Elf_Verdef>();
254        }
255
256        // Now figure out whether it matches.
257        let aux = &*(def.cast::<u8>())
258            .add((*def).vd_aux as usize)
259            .cast::<Elf_Verdaux>();
260        (*def).vd_hash == hash
261            && (name == CStr::from_ptr(self.symstrings.add(aux.vda_name as usize).cast()))
262    }
263
264    /// Look up a symbol in the vDSO.
265    pub(super) fn sym(&self, version: &CStr, name: &CStr) -> *mut c::c_void {
266        let ver_hash = elf_hash(version);
267        let name_hash = elf_hash(name);
268
269        // SAFETY: The pointers in `self` must be valid.
270        unsafe {
271            let mut chain = *self
272                .bucket
273                .add((ElfHashEntry::from(name_hash) % self.nbucket) as usize);
274
275            while chain != ElfHashEntry::from(STN_UNDEF) {
276                let sym = &*self.symtab.add(chain as usize);
277
278                // Check for a defined global or weak function w/ right name.
279                //
280                // Accept `STT_NOTYPE` in addition to `STT_FUNC` for the symbol
281                // type, for compatibility with some versions of Linux on
282                // PowerPC64. See [this commit] in Linux for more background.
283                //
284                // [this commit]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/testing/selftests/vDSO/parse_vdso.c?id=0161bd38c24312853ed5ae9a425a1c41c4ac674a
285                if (ELF_ST_TYPE(sym.st_info) != STT_FUNC &&
286                        ELF_ST_TYPE(sym.st_info) != STT_NOTYPE)
287                    || (ELF_ST_BIND(sym.st_info) != STB_GLOBAL
288                        && ELF_ST_BIND(sym.st_info) != STB_WEAK)
289                    || sym.st_shndx == SHN_UNDEF
290                    || sym.st_shndx == SHN_ABS
291                    || ELF_ST_VISIBILITY(sym.st_other) != STV_DEFAULT
292                    || (name != CStr::from_ptr(self.symstrings.add(sym.st_name as usize).cast()))
293                    // Check symbol version.
294                    || (!self.versym.is_null()
295                        && !self.match_version(*self.versym.add(chain as usize), version, ver_hash))
296                {
297                    chain = *self.chain.add(chain as usize);
298                    continue;
299                }
300
301                let sum = self.addr_from_elf(sym.st_value).unwrap();
302                assert!(
303                    sum as usize >= self.load_addr as usize
304                        && sum as usize <= self.load_end as usize
305                );
306                return sum as *mut c::c_void;
307            }
308        }
309
310        null_mut()
311    }
312
313    /// Add the given address to the vDSO base address.
314    unsafe fn base_plus(&self, offset: usize) -> Option<*const c_void> {
315        // Check for overflow.
316        let _ = (self.load_addr as usize).checked_add(offset)?;
317        // Add the offset to the base.
318        Some(self.load_addr.cast::<u8>().add(offset).cast())
319    }
320
321    /// Translate an ELF-address-space address into a usable virtual address.
322    unsafe fn addr_from_elf(&self, elf_addr: usize) -> Option<*const c_void> {
323        self.base_plus(elf_addr.wrapping_add(self.pv_offset))
324    }
325}
326
327#[cfg(linux_raw)]
328#[test]
329#[ignore] // Until rustix is updated to the new vDSO format.
330fn test_vdso() {
331    let vdso = Vdso::new().unwrap();
332    assert!(!vdso.symtab.is_null());
333    assert!(!vdso.symstrings.is_null());
334
335    #[cfg(target_arch = "x86_64")]
336    let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
337    #[cfg(target_arch = "arm")]
338    let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
339    #[cfg(target_arch = "aarch64")]
340    let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
341    #[cfg(target_arch = "x86")]
342    let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
343    #[cfg(target_arch = "riscv64")]
344    let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
345    #[cfg(target_arch = "powerpc64")]
346    let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
347    #[cfg(target_arch = "s390x")]
348    let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_gettime"));
349    #[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
350    let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
351    #[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
352    let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
353
354    assert!(!ptr.is_null());
355}