object/read/macho/
symbol.rs

1use alloc::vec::Vec;
2use core::fmt::Debug;
3use core::{fmt, slice, str};
4
5use crate::endian::{self, Endianness};
6use crate::macho;
7use crate::pod::Pod;
8use crate::read::util::StringTable;
9use crate::read::{
10    self, ObjectMap, ObjectMapEntry, ObjectMapFile, ObjectSymbol, ObjectSymbolTable, ReadError,
11    ReadRef, Result, SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap,
12    SymbolMapEntry, SymbolScope, SymbolSection,
13};
14
15use super::{MachHeader, MachOFile};
16
17/// A table of symbol entries in a Mach-O file.
18///
19/// Also includes the string table used for the symbol names.
20///
21/// Returned by [`macho::SymtabCommand::symbols`].
22#[derive(Debug, Clone, Copy)]
23pub struct SymbolTable<'data, Mach: MachHeader, R = &'data [u8]>
24where
25    R: ReadRef<'data>,
26{
27    symbols: &'data [Mach::Nlist],
28    strings: StringTable<'data, R>,
29}
30
31impl<'data, Mach: MachHeader, R: ReadRef<'data>> Default for SymbolTable<'data, Mach, R> {
32    fn default() -> Self {
33        SymbolTable {
34            symbols: &[],
35            strings: Default::default(),
36        }
37    }
38}
39
40impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> {
41    #[inline]
42    pub(super) fn new(symbols: &'data [Mach::Nlist], strings: StringTable<'data, R>) -> Self {
43        SymbolTable { symbols, strings }
44    }
45
46    /// Return the string table used for the symbol names.
47    #[inline]
48    pub fn strings(&self) -> StringTable<'data, R> {
49        self.strings
50    }
51
52    /// Iterate over the symbols.
53    #[inline]
54    pub fn iter(&self) -> slice::Iter<'data, Mach::Nlist> {
55        self.symbols.iter()
56    }
57
58    /// Return true if the symbol table is empty.
59    #[inline]
60    pub fn is_empty(&self) -> bool {
61        self.symbols.is_empty()
62    }
63
64    /// The number of symbols.
65    #[inline]
66    pub fn len(&self) -> usize {
67        self.symbols.len()
68    }
69
70    /// Return the symbol at the given index.
71    pub fn symbol(&self, index: SymbolIndex) -> Result<&'data Mach::Nlist> {
72        self.symbols
73            .get(index.0)
74            .read_error("Invalid Mach-O symbol index")
75    }
76
77    /// Construct a map from addresses to a user-defined map entry.
78    pub fn map<Entry: SymbolMapEntry, F: Fn(&'data Mach::Nlist) -> Option<Entry>>(
79        &self,
80        f: F,
81    ) -> SymbolMap<Entry> {
82        let mut symbols = Vec::new();
83        for nlist in self.symbols {
84            if !nlist.is_definition() {
85                continue;
86            }
87            if let Some(entry) = f(nlist) {
88                symbols.push(entry);
89            }
90        }
91        SymbolMap::new(symbols)
92    }
93
94    /// Construct a map from addresses to symbol names and object file names.
95    pub fn object_map(&self, endian: Mach::Endian) -> ObjectMap<'data> {
96        let mut symbols = Vec::new();
97        let mut objects = Vec::new();
98        let mut object = None;
99        let mut current_function = None;
100        // Each module starts with one or two N_SO symbols (path, or directory + filename)
101        // and one N_OSO symbol. The module is terminated by an empty N_SO symbol.
102        for nlist in self.symbols {
103            let n_type = nlist.n_type();
104            if n_type & macho::N_STAB == 0 {
105                continue;
106            }
107            // TODO: includes variables too (N_GSYM, N_STSYM). These may need to get their
108            // address from regular symbols though.
109            match n_type {
110                macho::N_SO => {
111                    object = None;
112                }
113                macho::N_OSO => {
114                    object = None;
115                    if let Ok(name) = nlist.name(endian, self.strings) {
116                        if !name.is_empty() {
117                            object = Some(objects.len());
118                            // `N_OSO` symbol names can be either `/path/to/object.o`
119                            // or `/path/to/archive.a(object.o)`.
120                            let (path, member) = name
121                                .split_last()
122                                .and_then(|(last, head)| {
123                                    if *last != b')' {
124                                        return None;
125                                    }
126                                    let index = head.iter().position(|&x| x == b'(')?;
127                                    let (archive, rest) = head.split_at(index);
128                                    Some((archive, Some(&rest[1..])))
129                                })
130                                .unwrap_or((name, None));
131                            objects.push(ObjectMapFile::new(path, member));
132                        }
133                    }
134                }
135                macho::N_FUN => {
136                    if let Ok(name) = nlist.name(endian, self.strings) {
137                        if !name.is_empty() {
138                            current_function = Some((name, nlist.n_value(endian).into()))
139                        } else if let Some((name, address)) = current_function.take() {
140                            if let Some(object) = object {
141                                symbols.push(ObjectMapEntry {
142                                    address,
143                                    size: nlist.n_value(endian).into(),
144                                    name,
145                                    object,
146                                });
147                            }
148                        }
149                    }
150                }
151                _ => {}
152            }
153        }
154        ObjectMap {
155            symbols: SymbolMap::new(symbols),
156            objects,
157        }
158    }
159}
160
161/// A symbol table in a [`MachOFile32`](super::MachOFile32).
162pub type MachOSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
163    MachOSymbolTable<'data, 'file, macho::MachHeader32<Endian>, R>;
164/// A symbol table in a [`MachOFile64`](super::MachOFile64).
165pub type MachOSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
166    MachOSymbolTable<'data, 'file, macho::MachHeader64<Endian>, R>;
167
168/// A symbol table in a [`MachOFile`].
169#[derive(Debug, Clone, Copy)]
170pub struct MachOSymbolTable<'data, 'file, Mach, R = &'data [u8]>
171where
172    Mach: MachHeader,
173    R: ReadRef<'data>,
174{
175    pub(super) file: &'file MachOFile<'data, Mach, R>,
176}
177
178impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbolTable<'data, 'file, Mach, R>
179where
180    Mach: MachHeader,
181    R: ReadRef<'data>,
182{
183}
184
185impl<'data, 'file, Mach, R> ObjectSymbolTable<'data> for MachOSymbolTable<'data, 'file, Mach, R>
186where
187    Mach: MachHeader,
188    R: ReadRef<'data>,
189{
190    type Symbol = MachOSymbol<'data, 'file, Mach, R>;
191    type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>;
192
193    fn symbols(&self) -> Self::SymbolIterator {
194        MachOSymbolIterator::new(self.file)
195    }
196
197    fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> {
198        let nlist = self.file.symbols.symbol(index)?;
199        MachOSymbol::new(self.file, index, nlist).read_error("Unsupported Mach-O symbol index")
200    }
201}
202
203/// An iterator for the symbols in a [`MachOFile32`](super::MachOFile32).
204pub type MachOSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
205    MachOSymbolIterator<'data, 'file, macho::MachHeader32<Endian>, R>;
206/// An iterator for the symbols in a [`MachOFile64`](super::MachOFile64).
207pub type MachOSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
208    MachOSymbolIterator<'data, 'file, macho::MachHeader64<Endian>, R>;
209
210/// An iterator for the symbols in a [`MachOFile`].
211pub struct MachOSymbolIterator<'data, 'file, Mach, R = &'data [u8]>
212where
213    Mach: MachHeader,
214    R: ReadRef<'data>,
215{
216    file: &'file MachOFile<'data, Mach, R>,
217    index: SymbolIndex,
218}
219
220impl<'data, 'file, Mach, R> MachOSymbolIterator<'data, 'file, Mach, R>
221where
222    Mach: MachHeader,
223    R: ReadRef<'data>,
224{
225    pub(super) fn new(file: &'file MachOFile<'data, Mach, R>) -> Self {
226        MachOSymbolIterator {
227            file,
228            index: SymbolIndex(0),
229        }
230    }
231
232    pub(super) fn empty(file: &'file MachOFile<'data, Mach, R>) -> Self {
233        MachOSymbolIterator {
234            file,
235            index: SymbolIndex(file.symbols.len()),
236        }
237    }
238}
239
240impl<'data, 'file, Mach, R> fmt::Debug for MachOSymbolIterator<'data, 'file, Mach, R>
241where
242    Mach: MachHeader,
243    R: ReadRef<'data>,
244{
245    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246        f.debug_struct("MachOSymbolIterator").finish()
247    }
248}
249
250impl<'data, 'file, Mach, R> Iterator for MachOSymbolIterator<'data, 'file, Mach, R>
251where
252    Mach: MachHeader,
253    R: ReadRef<'data>,
254{
255    type Item = MachOSymbol<'data, 'file, Mach, R>;
256
257    fn next(&mut self) -> Option<Self::Item> {
258        loop {
259            let index = self.index;
260            let nlist = self.file.symbols.symbols.get(index.0)?;
261            self.index.0 += 1;
262            if let Some(symbol) = MachOSymbol::new(self.file, index, nlist) {
263                return Some(symbol);
264            }
265        }
266    }
267}
268
269/// A symbol in a [`MachOFile32`](super::MachOFile32).
270pub type MachOSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
271    MachOSymbol<'data, 'file, macho::MachHeader32<Endian>, R>;
272/// A symbol in a [`MachOFile64`](super::MachOFile64).
273pub type MachOSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
274    MachOSymbol<'data, 'file, macho::MachHeader64<Endian>, R>;
275
276/// A symbol in a [`MachOFile`].
277///
278/// Most functionality is provided by the [`ObjectSymbol`] trait implementation.
279#[derive(Debug, Clone, Copy)]
280pub struct MachOSymbol<'data, 'file, Mach, R = &'data [u8]>
281where
282    Mach: MachHeader,
283    R: ReadRef<'data>,
284{
285    file: &'file MachOFile<'data, Mach, R>,
286    index: SymbolIndex,
287    nlist: &'data Mach::Nlist,
288}
289
290impl<'data, 'file, Mach, R> MachOSymbol<'data, 'file, Mach, R>
291where
292    Mach: MachHeader,
293    R: ReadRef<'data>,
294{
295    pub(super) fn new(
296        file: &'file MachOFile<'data, Mach, R>,
297        index: SymbolIndex,
298        nlist: &'data Mach::Nlist,
299    ) -> Option<Self> {
300        if nlist.n_type() & macho::N_STAB != 0 {
301            return None;
302        }
303        Some(MachOSymbol { file, index, nlist })
304    }
305
306    /// Get the Mach-O file containing this symbol.
307    pub fn macho_file(&self) -> &'file MachOFile<'data, Mach, R> {
308        self.file
309    }
310
311    /// Get the raw Mach-O symbol structure.
312    pub fn macho_symbol(&self) -> &'data Mach::Nlist {
313        self.nlist
314    }
315}
316
317impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbol<'data, 'file, Mach, R>
318where
319    Mach: MachHeader,
320    R: ReadRef<'data>,
321{
322}
323
324impl<'data, 'file, Mach, R> ObjectSymbol<'data> for MachOSymbol<'data, 'file, Mach, R>
325where
326    Mach: MachHeader,
327    R: ReadRef<'data>,
328{
329    #[inline]
330    fn index(&self) -> SymbolIndex {
331        self.index
332    }
333
334    fn name_bytes(&self) -> Result<&'data [u8]> {
335        self.nlist.name(self.file.endian, self.file.symbols.strings)
336    }
337
338    fn name(&self) -> Result<&'data str> {
339        let name = self.name_bytes()?;
340        str::from_utf8(name)
341            .ok()
342            .read_error("Non UTF-8 Mach-O symbol name")
343    }
344
345    #[inline]
346    fn address(&self) -> u64 {
347        self.nlist.n_value(self.file.endian).into()
348    }
349
350    #[inline]
351    fn size(&self) -> u64 {
352        0
353    }
354
355    fn kind(&self) -> SymbolKind {
356        self.section()
357            .index()
358            .and_then(|index| self.file.section_internal(index).ok())
359            .map(|section| match section.kind {
360                SectionKind::Text => SymbolKind::Text,
361                SectionKind::Data
362                | SectionKind::ReadOnlyData
363                | SectionKind::ReadOnlyString
364                | SectionKind::UninitializedData
365                | SectionKind::Common => SymbolKind::Data,
366                SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => {
367                    SymbolKind::Tls
368                }
369                _ => SymbolKind::Unknown,
370            })
371            .unwrap_or(SymbolKind::Unknown)
372    }
373
374    fn section(&self) -> SymbolSection {
375        match self.nlist.n_type() & macho::N_TYPE {
376            macho::N_UNDF => SymbolSection::Undefined,
377            macho::N_ABS => SymbolSection::Absolute,
378            macho::N_SECT => {
379                let n_sect = self.nlist.n_sect();
380                if n_sect != 0 {
381                    SymbolSection::Section(SectionIndex(n_sect as usize))
382                } else {
383                    SymbolSection::Unknown
384                }
385            }
386            _ => SymbolSection::Unknown,
387        }
388    }
389
390    #[inline]
391    fn is_undefined(&self) -> bool {
392        self.nlist.n_type() & macho::N_TYPE == macho::N_UNDF
393    }
394
395    #[inline]
396    fn is_definition(&self) -> bool {
397        self.nlist.is_definition()
398    }
399
400    #[inline]
401    fn is_common(&self) -> bool {
402        // Mach-O common symbols are based on section, not symbol
403        false
404    }
405
406    #[inline]
407    fn is_weak(&self) -> bool {
408        self.nlist.n_desc(self.file.endian) & (macho::N_WEAK_REF | macho::N_WEAK_DEF) != 0
409    }
410
411    fn scope(&self) -> SymbolScope {
412        let n_type = self.nlist.n_type();
413        if n_type & macho::N_TYPE == macho::N_UNDF {
414            SymbolScope::Unknown
415        } else if n_type & macho::N_EXT == 0 {
416            SymbolScope::Compilation
417        } else if n_type & macho::N_PEXT != 0 {
418            SymbolScope::Linkage
419        } else {
420            SymbolScope::Dynamic
421        }
422    }
423
424    #[inline]
425    fn is_global(&self) -> bool {
426        self.scope() != SymbolScope::Compilation
427    }
428
429    #[inline]
430    fn is_local(&self) -> bool {
431        self.scope() == SymbolScope::Compilation
432    }
433
434    #[inline]
435    fn flags(&self) -> SymbolFlags<SectionIndex, SymbolIndex> {
436        let n_desc = self.nlist.n_desc(self.file.endian);
437        SymbolFlags::MachO { n_desc }
438    }
439}
440
441/// A trait for generic access to [`macho::Nlist32`] and [`macho::Nlist64`].
442#[allow(missing_docs)]
443pub trait Nlist: Debug + Pod {
444    type Word: Into<u64>;
445    type Endian: endian::Endian;
446
447    fn n_strx(&self, endian: Self::Endian) -> u32;
448    fn n_type(&self) -> u8;
449    fn n_sect(&self) -> u8;
450    fn n_desc(&self, endian: Self::Endian) -> u16;
451    fn n_value(&self, endian: Self::Endian) -> Self::Word;
452
453    fn name<'data, R: ReadRef<'data>>(
454        &self,
455        endian: Self::Endian,
456        strings: StringTable<'data, R>,
457    ) -> Result<&'data [u8]> {
458        strings
459            .get(self.n_strx(endian))
460            .read_error("Invalid Mach-O symbol name offset")
461    }
462
463    /// Return true if this is a STAB symbol.
464    ///
465    /// This determines the meaning of the `n_type` field.
466    fn is_stab(&self) -> bool {
467        self.n_type() & macho::N_STAB != 0
468    }
469
470    /// Return true if this is an undefined symbol.
471    fn is_undefined(&self) -> bool {
472        let n_type = self.n_type();
473        n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_UNDF
474    }
475
476    /// Return true if the symbol is a definition of a function or data object.
477    fn is_definition(&self) -> bool {
478        let n_type = self.n_type();
479        n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_SECT
480    }
481
482    /// Return the library ordinal.
483    ///
484    /// This is either a 1-based index into the dylib load commands,
485    /// or a special ordinal.
486    #[inline]
487    fn library_ordinal(&self, endian: Self::Endian) -> u8 {
488        (self.n_desc(endian) >> 8) as u8
489    }
490}
491
492impl<Endian: endian::Endian> Nlist for macho::Nlist32<Endian> {
493    type Word = u32;
494    type Endian = Endian;
495
496    fn n_strx(&self, endian: Self::Endian) -> u32 {
497        self.n_strx.get(endian)
498    }
499    fn n_type(&self) -> u8 {
500        self.n_type
501    }
502    fn n_sect(&self) -> u8 {
503        self.n_sect
504    }
505    fn n_desc(&self, endian: Self::Endian) -> u16 {
506        self.n_desc.get(endian)
507    }
508    fn n_value(&self, endian: Self::Endian) -> Self::Word {
509        self.n_value.get(endian)
510    }
511}
512
513impl<Endian: endian::Endian> Nlist for macho::Nlist64<Endian> {
514    type Word = u64;
515    type Endian = Endian;
516
517    fn n_strx(&self, endian: Self::Endian) -> u32 {
518        self.n_strx.get(endian)
519    }
520    fn n_type(&self) -> u8 {
521        self.n_type
522    }
523    fn n_sect(&self) -> u8 {
524        self.n_sect
525    }
526    fn n_desc(&self, endian: Self::Endian) -> u16 {
527        self.n_desc.get(endian)
528    }
529    fn n_value(&self, endian: Self::Endian) -> Self::Word {
530        self.n_value.get(endian)
531    }
532}