object/read/elf/
hash.rs

1use core::mem;
2
3use crate::elf;
4use crate::endian::{U32, U64};
5use crate::read::{ReadError, ReadRef, Result, SymbolIndex};
6
7use super::{FileHeader, Sym, SymbolTable, Version, VersionTable};
8
9/// A SysV symbol hash table in an ELF file.
10///
11/// Returned by [`SectionHeader::hash`](super::SectionHeader::hash).
12#[derive(Debug)]
13pub struct HashTable<'data, Elf: FileHeader> {
14    buckets: &'data [U32<Elf::Endian>],
15    chains: &'data [U32<Elf::Endian>],
16}
17
18impl<'data, Elf: FileHeader> HashTable<'data, Elf> {
19    /// Parse a SysV hash table.
20    ///
21    /// `data` should be from an [`elf::SHT_HASH`] section, or from a
22    /// segment pointed to via the [`elf::DT_HASH`] entry.
23    ///
24    /// The header is read at offset 0 in the given `data`.
25    pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result<Self> {
26        let mut offset = 0;
27        let header = data
28            .read::<elf::HashHeader<Elf::Endian>>(&mut offset)
29            .read_error("Invalid hash header")?;
30        let buckets = data
31            .read_slice(&mut offset, header.bucket_count.get(endian) as usize)
32            .read_error("Invalid hash buckets")?;
33        let chains = data
34            .read_slice(&mut offset, header.chain_count.get(endian) as usize)
35            .read_error("Invalid hash chains")?;
36        Ok(HashTable { buckets, chains })
37    }
38
39    /// Return the symbol table length.
40    pub fn symbol_table_length(&self) -> u32 {
41        self.chains.len() as u32
42    }
43
44    fn bucket(&self, endian: Elf::Endian, hash: u32) -> SymbolIndex {
45        SymbolIndex(self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize)
46    }
47
48    fn chain(&self, endian: Elf::Endian, index: SymbolIndex) -> SymbolIndex {
49        SymbolIndex(self.chains[index.0].get(endian) as usize)
50    }
51
52    /// Use the hash table to find the symbol table entry with the given name, hash and version.
53    pub fn find<R: ReadRef<'data>>(
54        &self,
55        endian: Elf::Endian,
56        name: &[u8],
57        hash: u32,
58        version: Option<&Version<'_>>,
59        symbols: &SymbolTable<'data, Elf, R>,
60        versions: &VersionTable<'data, Elf>,
61    ) -> Option<(SymbolIndex, &'data Elf::Sym)> {
62        // Get the chain start from the bucket for this hash.
63        let mut index = self.bucket(endian, hash);
64        // Avoid infinite loop.
65        let mut i = 0;
66        let strings = symbols.strings();
67        while index != SymbolIndex(0) && i < self.chains.len() {
68            if let Ok(symbol) = symbols.symbol(index) {
69                if symbol.name(endian, strings) == Ok(name)
70                    && versions.matches(endian, index, version)
71                {
72                    return Some((index, symbol));
73                }
74            }
75            index = self.chain(endian, index);
76            i += 1;
77        }
78        None
79    }
80}
81
82/// A GNU symbol hash table in an ELF file.
83///
84/// Returned by [`SectionHeader::gnu_hash`](super::SectionHeader::gnu_hash).
85#[derive(Debug)]
86pub struct GnuHashTable<'data, Elf: FileHeader> {
87    symbol_base: u32,
88    bloom_shift: u32,
89    bloom_filters: &'data [u8],
90    buckets: &'data [U32<Elf::Endian>],
91    values: &'data [U32<Elf::Endian>],
92}
93
94impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> {
95    /// Parse a GNU hash table.
96    ///
97    /// `data` should be from an [`elf::SHT_GNU_HASH`] section, or from a
98    /// segment pointed to via the [`elf::DT_GNU_HASH`] entry.
99    ///
100    /// The header is read at offset 0 in the given `data`.
101    ///
102    /// The header does not contain a length field, and so all of `data`
103    /// will be used as the hash table values. It does not matter if this
104    /// is longer than needed, and this will often the case when accessing
105    /// the hash table via the [`elf::DT_GNU_HASH`] entry.
106    pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result<Self> {
107        let mut offset = 0;
108        let header = data
109            .read::<elf::GnuHashHeader<Elf::Endian>>(&mut offset)
110            .read_error("Invalid GNU hash header")?;
111        let bloom_len =
112            u64::from(header.bloom_count.get(endian)) * mem::size_of::<Elf::Word>() as u64;
113        let bloom_filters = data
114            .read_bytes(&mut offset, bloom_len)
115            .read_error("Invalid GNU hash bloom filters")?;
116        let buckets = data
117            .read_slice(&mut offset, header.bucket_count.get(endian) as usize)
118            .read_error("Invalid GNU hash buckets")?;
119        let chain_count = (data.len() - offset as usize) / 4;
120        let values = data
121            .read_slice(&mut offset, chain_count)
122            .read_error("Invalid GNU hash values")?;
123        Ok(GnuHashTable {
124            symbol_base: header.symbol_base.get(endian),
125            bloom_shift: header.bloom_shift.get(endian),
126            bloom_filters,
127            buckets,
128            values,
129        })
130    }
131
132    /// Return the symbol table index of the first symbol in the hash table.
133    pub fn symbol_base(&self) -> u32 {
134        self.symbol_base
135    }
136
137    /// Determine the symbol table length by finding the last entry in the hash table.
138    ///
139    /// Returns `None` if the hash table is empty or invalid.
140    pub fn symbol_table_length(&self, endian: Elf::Endian) -> Option<u32> {
141        // Ensure we find a non-empty bucket.
142        if self.symbol_base == 0 {
143            return None;
144        }
145
146        // Find the highest chain index in a bucket.
147        let mut max_symbol = 0;
148        for bucket in self.buckets {
149            let bucket = bucket.get(endian);
150            if max_symbol < bucket {
151                max_symbol = bucket;
152            }
153        }
154
155        // Find the end of the chain.
156        for value in self
157            .values
158            .get(max_symbol.checked_sub(self.symbol_base)? as usize..)?
159        {
160            max_symbol += 1;
161            if value.get(endian) & 1 != 0 {
162                return Some(max_symbol);
163            }
164        }
165
166        None
167    }
168
169    fn bucket(&self, endian: Elf::Endian, hash: u32) -> SymbolIndex {
170        SymbolIndex(self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize)
171    }
172
173    /// Use the hash table to find the symbol table entry with the given name, hash, and version.
174    pub fn find<R: ReadRef<'data>>(
175        &self,
176        endian: Elf::Endian,
177        name: &[u8],
178        hash: u32,
179        version: Option<&Version<'_>>,
180        symbols: &SymbolTable<'data, Elf, R>,
181        versions: &VersionTable<'data, Elf>,
182    ) -> Option<(SymbolIndex, &'data Elf::Sym)> {
183        let word_bits = mem::size_of::<Elf::Word>() as u32 * 8;
184
185        // Test against bloom filter.
186        let bloom_count = self.bloom_filters.len() / mem::size_of::<Elf::Word>();
187        let offset =
188            ((hash / word_bits) & (bloom_count as u32 - 1)) * mem::size_of::<Elf::Word>() as u32;
189        let filter = if word_bits == 64 {
190            self.bloom_filters
191                .read_at::<U64<Elf::Endian>>(offset.into())
192                .ok()?
193                .get(endian)
194        } else {
195            self.bloom_filters
196                .read_at::<U32<Elf::Endian>>(offset.into())
197                .ok()?
198                .get(endian)
199                .into()
200        };
201        if filter & (1 << (hash % word_bits)) == 0 {
202            return None;
203        }
204        if filter & (1 << ((hash >> self.bloom_shift) % word_bits)) == 0 {
205            return None;
206        }
207
208        // Get the chain start from the bucket for this hash.
209        let mut index = self.bucket(endian, hash);
210        if index == SymbolIndex(0) {
211            return None;
212        }
213
214        // Test symbols in the chain.
215        let strings = symbols.strings();
216        let symbols = symbols.symbols().get(index.0..)?;
217        let values = self
218            .values
219            .get(index.0.checked_sub(self.symbol_base as usize)?..)?;
220        for (symbol, value) in symbols.iter().zip(values.iter()) {
221            let value = value.get(endian);
222            if value | 1 == hash | 1 {
223                if symbol.name(endian, strings) == Ok(name)
224                    && versions.matches(endian, index, version)
225                {
226                    return Some((index, symbol));
227                }
228            }
229            if value & 1 != 0 {
230                break;
231            }
232            index.0 += 1;
233        }
234        None
235    }
236}