1//! Parse the Linux vDSO.
2//!
3//! The following code is transliterated from
4//! tools/testing/selftests/vDSO/parse_vdso.c in Linux 6.12, which is licensed
5//! with Creative Commons Zero License, version 1.0,
6//! available at <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
7//!
8//! # Safety
9//!
10//! Parsing the vDSO involves a lot of raw pointer manipulation. This
11//! implementation follows Linux's reference implementation, and adds several
12//! additional safety checks.
13#![allow(unsafe_code)]
1415use super::c;
16use crate::ffi::CStr;
17use crate::utils::check_raw_pointer;
18use core::ffi::c_void;
19use core::mem::size_of;
20use core::ptr::{null, null_mut};
21use linux_raw_sys::elf::*;
2223#[cfg(target_arch = "s390x")]
24type ElfHashEntry = u64;
25#[cfg(not(target_arch = "s390x"))]
26type ElfHashEntry = u32;
2728pub(super) struct Vdso {
29// Load information
30load_addr: *const Elf_Ehdr,
31 load_end: *const c_void, // the end of the `PT_LOAD` segment
32pv_offset: usize, // recorded paddr - recorded vaddr
3334 // Symbol table
35symtab: *const Elf_Sym,
36 symstrings: *const u8,
37 bucket: *const ElfHashEntry,
38 chain: *const ElfHashEntry,
39 nbucket: ElfHashEntry,
40//nchain: ElfHashEntry,
4142 // Version table
43versym: *const u16,
44 verdef: *const Elf_Verdef,
45}
4647/// Straight from the ELF specification...and then tweaked slightly, in order to
48/// avoid a few clang warnings.
49/// (And then translated to Rust).
50fn elf_hash(name: &CStr) -> u32 {
51let mut h: u32 = 0;
52for b in name.to_bytes() {
53 h = (h << 4).wrapping_add(u32::from(*b));
54let g = h & 0xf000_0000;
55if g != 0 {
56 h ^= g >> 24;
57 }
58 h &= !g;
59 }
60 h
61}
6263/// Create a `Vdso` value by parsing the vDSO at the `sysinfo_ehdr` address.
64fn init_from_sysinfo_ehdr() -> Option<Vdso> {
65// SAFETY: The auxv initialization code does extensive checks to ensure
66 // that the value we get really is an `AT_SYSINFO_EHDR` value from the
67 // kernel.
68unsafe {
69let hdr = super::param::auxv::sysinfo_ehdr();
7071// If the platform doesn't provide a `AT_SYSINFO_EHDR`, we can't locate
72 // the vDSO.
73if hdr.is_null() {
74return None;
75 }
7677let mut vdso = Vdso {
78 load_addr: hdr,
79 load_end: hdr.cast(),
80 pv_offset: 0,
81 symtab: null(),
82 symstrings: null(),
83 bucket: null(),
84 chain: null(),
85 nbucket: 0,
86//nchain: 0,
87versym: null(),
88 verdef: null(),
89 };
9091let hdr = &*hdr;
92let pt = check_raw_pointer::<Elf_Phdr>(vdso.base_plus(hdr.e_phoff)? as *mut _)?.as_ptr();
93let mut dyn_: *const Elf_Dyn = null();
94let mut num_dyn = 0;
9596// We need two things from the segment table: the load offset
97 // and the dynamic table.
98let mut found_vaddr = false;
99for i in 0..hdr.e_phnum {
100let phdr = &*pt.add(i as usize);
101if phdr.p_type == PT_LOAD && !found_vaddr {
102// The segment should be readable and executable, because it
103 // contains the symbol table and the function bodies.
104if phdr.p_flags & (PF_R | PF_X) != (PF_R | PF_X) {
105return None;
106 }
107 found_vaddr = true;
108 vdso.load_end = vdso.base_plus(phdr.p_offset.checked_add(phdr.p_memsz)?)?;
109 vdso.pv_offset = phdr.p_offset.wrapping_sub(phdr.p_vaddr);
110 } else if phdr.p_type == PT_DYNAMIC {
111// If `p_offset` is zero, it's more likely that we're looking
112 // at memory that has been zeroed than that the kernel has
113 // somehow aliased the `Ehdr` and the `Elf_Dyn` array.
114if phdr.p_offset < size_of::<Elf_Ehdr>() {
115return None;
116 }
117118 dyn_ = check_raw_pointer::<Elf_Dyn>(vdso.base_plus(phdr.p_offset)? as *mut _)?
119.as_ptr();
120 num_dyn = phdr.p_memsz / size_of::<Elf_Dyn>();
121 } else if phdr.p_type == PT_INTERP || phdr.p_type == PT_GNU_RELRO {
122// Don't trust any ELF image that has an “interpreter” or
123 // that uses RELRO, which is likely to be a user ELF image
124 // rather and not the kernel vDSO.
125return None;
126 }
127 }
128129if !found_vaddr || dyn_.is_null() {
130return None; // Failed
131}
132133// Fish out the useful bits of the dynamic table.
134let mut hash: *const ElfHashEntry = null();
135 vdso.symstrings = null();
136 vdso.symtab = null();
137 vdso.versym = null();
138 vdso.verdef = null();
139let mut i = 0;
140loop {
141if i == num_dyn {
142return None;
143 }
144let d = &*dyn_.add(i);
145match d.d_tag {
146 DT_STRTAB => {
147 vdso.symstrings =
148 check_raw_pointer::<u8>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
149.as_ptr();
150 }
151 DT_SYMTAB => {
152 vdso.symtab =
153 check_raw_pointer::<Elf_Sym>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
154.as_ptr();
155 }
156 DT_HASH => {
157 hash = check_raw_pointer::<ElfHashEntry>(
158 vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _
159)?
160.as_ptr();
161 }
162 DT_VERSYM => {
163 vdso.versym =
164 check_raw_pointer::<u16>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
165.as_ptr();
166 }
167 DT_VERDEF => {
168 vdso.verdef = check_raw_pointer::<Elf_Verdef>(
169 vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _,
170 )?
171.as_ptr();
172 }
173 DT_SYMENT => {
174if d.d_un.d_ptr != size_of::<Elf_Sym>() {
175return None; // Failed
176}
177 }
178 DT_NULL => break,
179_ => {}
180 }
181 i = i.checked_add(1)?;
182 }
183// `check_raw_pointer` will have checked these pointers for null,
184 // however they could still be null if the expected dynamic table
185 // entries are absent.
186if vdso.symstrings.is_null() || vdso.symtab.is_null() || hash.is_null() {
187return None; // Failed
188}
189190if vdso.verdef.is_null() {
191 vdso.versym = null();
192 }
193194// Parse the hash table header.
195vdso.nbucket = *hash.add(0);
196//vdso.nchain = *hash.add(1);
197vdso.bucket = hash.add(2);
198 vdso.chain = hash.add(vdso.nbucket as usize + 2);
199200// That's all we need.
201Some(vdso)
202 }
203}
204205impl Vdso {
206/// Parse the vDSO.
207 ///
208 /// Returns `None` if the vDSO can't be located or if it doesn't conform to
209 /// our expectations.
210#[inline]
211pub(super) fn new() -> Option<Self> {
212 init_from_sysinfo_ehdr()
213 }
214215/// Check the version for a symbol.
216 ///
217 /// # Safety
218 ///
219 /// The raw pointers inside `self` must be valid.
220unsafe fn match_version(&self, mut ver: u16, name: &CStr, hash: u32) -> bool {
221// This is a helper function to check if the version indexed by
222 // ver matches name (which hashes to hash).
223 //
224 // The version definition table is a mess, and I don't know how
225 // to do this in better than linear time without allocating memory
226 // to build an index. I also don't know why the table has
227 // variable size entries in the first place.
228 //
229 // For added fun, I can't find a comprehensible specification of how
230 // to parse all the weird flags in the table.
231 //
232 // So I just parse the whole table every time.
233234 // First step: find the version definition
235ver &= 0x7fff; // Apparently bit 15 means "hidden"
236let mut def = self.verdef;
237loop {
238if (*def).vd_version != VER_DEF_CURRENT {
239return false; // Failed
240}
241242if ((*def).vd_flags & VER_FLG_BASE) == 0 && ((*def).vd_ndx & 0x7fff) == ver {
243break;
244 }
245246if (*def).vd_next == 0 {
247return false; // No definition.
248}
249250 def = def
251 .cast::<u8>()
252 .add((*def).vd_next as usize)
253 .cast::<Elf_Verdef>();
254 }
255256// Now figure out whether it matches.
257let aux = &*(def.cast::<u8>())
258 .add((*def).vd_aux as usize)
259 .cast::<Elf_Verdaux>();
260 (*def).vd_hash == hash
261 && (name == CStr::from_ptr(self.symstrings.add(aux.vda_name as usize).cast()))
262 }
263264/// Look up a symbol in the vDSO.
265pub(super) fn sym(&self, version: &CStr, name: &CStr) -> *mut c::c_void {
266let ver_hash = elf_hash(version);
267let name_hash = elf_hash(name);
268269// SAFETY: The pointers in `self` must be valid.
270unsafe {
271let mut chain = *self
272.bucket
273 .add((ElfHashEntry::from(name_hash) % self.nbucket) as usize);
274275while chain != ElfHashEntry::from(STN_UNDEF) {
276let sym = &*self.symtab.add(chain as usize);
277278// Check for a defined global or weak function w/ right name.
279 //
280 // Accept `STT_NOTYPE` in addition to `STT_FUNC` for the symbol
281 // type, for compatibility with some versions of Linux on
282 // PowerPC64. See [this commit] in Linux for more background.
283 //
284 // [this commit]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/testing/selftests/vDSO/parse_vdso.c?id=0161bd38c24312853ed5ae9a425a1c41c4ac674a
285if (ELF_ST_TYPE(sym.st_info) != STT_FUNC &&
286 ELF_ST_TYPE(sym.st_info) != STT_NOTYPE)
287 || (ELF_ST_BIND(sym.st_info) != STB_GLOBAL
288 && ELF_ST_BIND(sym.st_info) != STB_WEAK)
289 || sym.st_shndx == SHN_UNDEF
290 || sym.st_shndx == SHN_ABS
291 || ELF_ST_VISIBILITY(sym.st_other) != STV_DEFAULT
292 || (name != CStr::from_ptr(self.symstrings.add(sym.st_name as usize).cast()))
293// Check symbol version.
294|| (!self.versym.is_null()
295 && !self.match_version(*self.versym.add(chain as usize), version, ver_hash))
296 {
297 chain = *self.chain.add(chain as usize);
298continue;
299 }
300301let sum = self.addr_from_elf(sym.st_value).unwrap();
302assert!(
303 sum as usize >= self.load_addr as usize
304 && sum as usize <= self.load_end as usize
305 );
306return sum as *mut c::c_void;
307 }
308 }
309310 null_mut()
311 }
312313/// Add the given address to the vDSO base address.
314unsafe fn base_plus(&self, offset: usize) -> Option<*const c_void> {
315// Check for overflow.
316let _ = (self.load_addr as usize).checked_add(offset)?;
317// Add the offset to the base.
318Some(self.load_addr.cast::<u8>().add(offset).cast())
319 }
320321/// Translate an ELF-address-space address into a usable virtual address.
322unsafe fn addr_from_elf(&self, elf_addr: usize) -> Option<*const c_void> {
323self.base_plus(elf_addr.wrapping_add(self.pv_offset))
324 }
325}
326327#[cfg(linux_raw)]
328#[test]
329#[ignore] // Until rustix is updated to the new vDSO format.
330fn test_vdso() {
331let vdso = Vdso::new().unwrap();
332assert!(!vdso.symtab.is_null());
333assert!(!vdso.symstrings.is_null());
334335#[cfg(target_arch = "x86_64")]
336let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
337#[cfg(target_arch = "arm")]
338let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
339#[cfg(target_arch = "aarch64")]
340let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
341#[cfg(target_arch = "x86")]
342let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
343#[cfg(target_arch = "riscv64")]
344let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
345#[cfg(target_arch = "powerpc64")]
346let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
347#[cfg(target_arch = "s390x")]
348let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_gettime"));
349#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
350let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
351#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
352let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
353354assert!(!ptr.is_null());
355}