object/read/
traits.rs

1use alloc::borrow::Cow;
2use alloc::vec::Vec;
3
4use crate::endian::Endianness;
5use crate::read::{
6    self, Architecture, CodeView, ComdatKind, CompressedData, CompressedFileRange, Export,
7    FileFlags, Import, ObjectKind, ObjectMap, Relocation, RelocationMap, Result, SectionFlags,
8    SectionIndex, SectionKind, SegmentFlags, SubArchitecture, SymbolFlags, SymbolIndex, SymbolKind,
9    SymbolMap, SymbolMapName, SymbolScope, SymbolSection,
10};
11
12/// An object file.
13///
14/// This is the primary trait for the unified read API.
15pub trait Object<'data>: read::private::Sealed {
16    /// A loadable segment in the object file.
17    type Segment<'file>: ObjectSegment<'data>
18    where
19        Self: 'file,
20        'data: 'file;
21
22    /// An iterator for the loadable segments in the object file.
23    type SegmentIterator<'file>: Iterator<Item = Self::Segment<'file>>
24    where
25        Self: 'file,
26        'data: 'file;
27
28    /// A section in the object file.
29    type Section<'file>: ObjectSection<'data>
30    where
31        Self: 'file,
32        'data: 'file;
33
34    /// An iterator for the sections in the object file.
35    type SectionIterator<'file>: Iterator<Item = Self::Section<'file>>
36    where
37        Self: 'file,
38        'data: 'file;
39
40    /// A COMDAT section group in the object file.
41    type Comdat<'file>: ObjectComdat<'data>
42    where
43        Self: 'file,
44        'data: 'file;
45
46    /// An iterator for the COMDAT section groups in the object file.
47    type ComdatIterator<'file>: Iterator<Item = Self::Comdat<'file>>
48    where
49        Self: 'file,
50        'data: 'file;
51
52    /// A symbol in the object file.
53    type Symbol<'file>: ObjectSymbol<'data>
54    where
55        Self: 'file,
56        'data: 'file;
57
58    /// An iterator for symbols in the object file.
59    type SymbolIterator<'file>: Iterator<Item = Self::Symbol<'file>>
60    where
61        Self: 'file,
62        'data: 'file;
63
64    /// A symbol table in the object file.
65    type SymbolTable<'file>: ObjectSymbolTable<
66        'data,
67        Symbol = Self::Symbol<'file>,
68        SymbolIterator = Self::SymbolIterator<'file>,
69    >
70    where
71        Self: 'file,
72        'data: 'file;
73
74    /// An iterator for the dynamic relocations in the file.
75    ///
76    /// The first field in the item tuple is the address
77    /// that the relocation applies to.
78    type DynamicRelocationIterator<'file>: Iterator<Item = (u64, Relocation)>
79    where
80        Self: 'file,
81        'data: 'file;
82
83    /// Get the architecture type of the file.
84    fn architecture(&self) -> Architecture;
85
86    /// Get the sub-architecture type of the file if known.
87    ///
88    /// A value of `None` has a range of meanings: the file supports all
89    /// sub-architectures, the file does not explicitly specify a
90    /// sub-architecture, or the sub-architecture is currently unrecognized.
91    fn sub_architecture(&self) -> Option<SubArchitecture> {
92        None
93    }
94
95    /// Get the endianness of the file.
96    #[inline]
97    fn endianness(&self) -> Endianness {
98        if self.is_little_endian() {
99            Endianness::Little
100        } else {
101            Endianness::Big
102        }
103    }
104
105    /// Return true if the file is little endian, false if it is big endian.
106    fn is_little_endian(&self) -> bool;
107
108    /// Return true if the file can contain 64-bit addresses.
109    fn is_64(&self) -> bool;
110
111    /// Return the kind of this object.
112    fn kind(&self) -> ObjectKind;
113
114    /// Get an iterator for the loadable segments in the file.
115    ///
116    /// For ELF, this is program headers with type [`PT_LOAD`](crate::elf::PT_LOAD).
117    /// For Mach-O, this is load commands with type [`LC_SEGMENT`](crate::macho::LC_SEGMENT)
118    /// or [`LC_SEGMENT_64`](crate::macho::LC_SEGMENT_64).
119    /// For PE, this is all sections.
120    fn segments(&self) -> Self::SegmentIterator<'_>;
121
122    /// Get the section named `section_name`, if such a section exists.
123    ///
124    /// If `section_name` starts with a '.' then it is treated as a system
125    /// section name, and is compared using the conventions specific to the
126    /// object file format. This includes:
127    /// - if ".debug_str_offsets" is requested for a Mach-O object file, then
128    ///   the actual section name that is searched for is "__debug_str_offs".
129    /// - if ".debug_info" is requested for an ELF object file, then
130    ///   ".zdebug_info" may be returned (and similarly for other debug
131    ///   sections). Similarly, if ".debug_info" is requested for a Mach-O
132    ///   object file, then "__zdebug_info" may be returned.
133    ///
134    /// For some object files, multiple segments may contain sections with the
135    /// same name. In this case, the first matching section will be used.
136    ///
137    /// This method skips over sections with invalid names.
138    fn section_by_name(&self, section_name: &str) -> Option<Self::Section<'_>> {
139        self.section_by_name_bytes(section_name.as_bytes())
140    }
141
142    /// Like [`Self::section_by_name`], but allows names that are not UTF-8.
143    fn section_by_name_bytes<'file>(
144        &'file self,
145        section_name: &[u8],
146    ) -> Option<Self::Section<'file>>;
147
148    /// Get the section at the given index.
149    ///
150    /// The meaning of the index depends on the object file.
151    ///
152    /// For some object files, this requires iterating through all sections.
153    ///
154    /// Returns an error if the index is invalid.
155    fn section_by_index(&self, index: SectionIndex) -> Result<Self::Section<'_>>;
156
157    /// Get an iterator for the sections in the file.
158    fn sections(&self) -> Self::SectionIterator<'_>;
159
160    /// Get an iterator for the COMDAT section groups in the file.
161    fn comdats(&self) -> Self::ComdatIterator<'_>;
162
163    /// Get the debugging symbol table, if any.
164    fn symbol_table(&self) -> Option<Self::SymbolTable<'_>>;
165
166    /// Get the debugging symbol at the given index.
167    ///
168    /// The meaning of the index depends on the object file.
169    ///
170    /// Returns an error if the index is invalid.
171    fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol<'_>>;
172
173    /// Get an iterator for the debugging symbols in the file.
174    ///
175    /// This may skip over symbols that are malformed or unsupported.
176    ///
177    /// For Mach-O files, this does not include STAB entries.
178    fn symbols(&self) -> Self::SymbolIterator<'_>;
179
180    /// Get the symbol named `symbol_name`, if the symbol exists.
181    fn symbol_by_name<'file>(&'file self, symbol_name: &str) -> Option<Self::Symbol<'file>> {
182        self.symbol_by_name_bytes(symbol_name.as_bytes())
183    }
184
185    /// Like [`Self::symbol_by_name`], but allows names that are not UTF-8.
186    fn symbol_by_name_bytes<'file>(&'file self, symbol_name: &[u8]) -> Option<Self::Symbol<'file>> {
187        self.symbols()
188            .find(|sym| sym.name_bytes() == Ok(symbol_name))
189    }
190
191    /// Get the dynamic linking symbol table, if any.
192    ///
193    /// Only ELF has a separate dynamic linking symbol table.
194    /// Consider using [`Self::exports`] or [`Self::imports`] instead.
195    fn dynamic_symbol_table(&self) -> Option<Self::SymbolTable<'_>>;
196
197    /// Get an iterator for the dynamic linking symbols in the file.
198    ///
199    /// This may skip over symbols that are malformed or unsupported.
200    ///
201    /// Only ELF has dynamic linking symbols.
202    /// Other file formats will return an empty iterator.
203    /// Consider using [`Self::exports`] or [`Self::imports`] instead.
204    fn dynamic_symbols(&self) -> Self::SymbolIterator<'_>;
205
206    /// Get the dynamic relocations for this file.
207    ///
208    /// Symbol indices in these relocations refer to the dynamic symbol table.
209    ///
210    /// Only ELF has dynamic relocations.
211    fn dynamic_relocations(&self) -> Option<Self::DynamicRelocationIterator<'_>>;
212
213    /// Construct a map from addresses to symbol names.
214    ///
215    /// The map will only contain defined text and data symbols.
216    /// The dynamic symbol table will only be used if there are no debugging symbols.
217    fn symbol_map(&self) -> SymbolMap<SymbolMapName<'data>> {
218        let mut symbols = Vec::new();
219        if let Some(table) = self.symbol_table().or_else(|| self.dynamic_symbol_table()) {
220            // Sometimes symbols share addresses. Collect them all then choose the "best".
221            let mut all_symbols = Vec::new();
222            for symbol in table.symbols() {
223                // Must have an address.
224                if !symbol.is_definition() {
225                    continue;
226                }
227                // Must have a name.
228                let name = match symbol.name() {
229                    Ok(name) => name,
230                    _ => continue,
231                };
232                if name.is_empty() {
233                    continue;
234                }
235
236                // Lower is better.
237                let mut priority = 0u32;
238
239                // Prefer known kind.
240                match symbol.kind() {
241                    SymbolKind::Text | SymbolKind::Data => {}
242                    SymbolKind::Unknown => priority += 1,
243                    _ => continue,
244                }
245                priority *= 2;
246
247                // Prefer global visibility.
248                priority += match symbol.scope() {
249                    SymbolScope::Unknown => 3,
250                    SymbolScope::Compilation => 2,
251                    SymbolScope::Linkage => 1,
252                    SymbolScope::Dynamic => 0,
253                };
254                priority *= 4;
255
256                // Prefer later entries (earlier symbol is likely to be less specific).
257                let index = !0 - symbol.index().0;
258
259                // Tuple is ordered for sort.
260                all_symbols.push((symbol.address(), priority, index, name));
261            }
262            // Unstable sort is okay because tuple includes index.
263            all_symbols.sort_unstable();
264
265            let mut previous_address = !0;
266            for (address, _priority, _index, name) in all_symbols {
267                if address != previous_address {
268                    symbols.push(SymbolMapName::new(address, name));
269                    previous_address = address;
270                }
271            }
272        }
273        SymbolMap::new(symbols)
274    }
275
276    /// Construct a map from addresses to symbol names and object file names.
277    ///
278    /// This is derived from Mach-O STAB entries.
279    fn object_map(&self) -> ObjectMap<'data> {
280        ObjectMap::default()
281    }
282
283    /// Get the imported symbols.
284    fn imports(&self) -> Result<Vec<Import<'data>>>;
285
286    /// Get the exported symbols that expose both a name and an address.
287    ///
288    /// Some file formats may provide other kinds of symbols that can be retrieved using
289    /// the low level API.
290    fn exports(&self) -> Result<Vec<Export<'data>>>;
291
292    /// Return true if the file contains DWARF debug information sections, false if not.
293    fn has_debug_symbols(&self) -> bool;
294
295    /// The UUID from a Mach-O [`LC_UUID`](crate::macho::LC_UUID) load command.
296    #[inline]
297    fn mach_uuid(&self) -> Result<Option<[u8; 16]>> {
298        Ok(None)
299    }
300
301    /// The build ID from an ELF [`NT_GNU_BUILD_ID`](crate::elf::NT_GNU_BUILD_ID) note.
302    #[inline]
303    fn build_id(&self) -> Result<Option<&'data [u8]>> {
304        Ok(None)
305    }
306
307    /// The filename and CRC from a `.gnu_debuglink` section.
308    #[inline]
309    fn gnu_debuglink(&self) -> Result<Option<(&'data [u8], u32)>> {
310        Ok(None)
311    }
312
313    /// The filename and build ID from a `.gnu_debugaltlink` section.
314    #[inline]
315    fn gnu_debugaltlink(&self) -> Result<Option<(&'data [u8], &'data [u8])>> {
316        Ok(None)
317    }
318
319    /// The filename and GUID from the PE CodeView section.
320    #[inline]
321    fn pdb_info(&self) -> Result<Option<CodeView<'_>>> {
322        Ok(None)
323    }
324
325    /// Get the base address used for relative virtual addresses.
326    ///
327    /// Currently this is only non-zero for PE.
328    fn relative_address_base(&self) -> u64;
329
330    /// Get the virtual address of the entry point of the binary.
331    fn entry(&self) -> u64;
332
333    /// File flags that are specific to each file format.
334    fn flags(&self) -> FileFlags;
335}
336
337/// A loadable segment in an [`Object`].
338///
339/// This trait is part of the unified read API.
340pub trait ObjectSegment<'data>: read::private::Sealed {
341    /// Returns the virtual address of the segment.
342    fn address(&self) -> u64;
343
344    /// Returns the size of the segment in memory.
345    fn size(&self) -> u64;
346
347    /// Returns the alignment of the segment in memory.
348    fn align(&self) -> u64;
349
350    /// Returns the offset and size of the segment in the file.
351    fn file_range(&self) -> (u64, u64);
352
353    /// Returns a reference to the file contents of the segment.
354    ///
355    /// The length of this data may be different from the size of the
356    /// segment in memory.
357    fn data(&self) -> Result<&'data [u8]>;
358
359    /// Return the segment data in the given range.
360    ///
361    /// Returns `Ok(None)` if the segment does not contain the given range.
362    fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>>;
363
364    /// Returns the name of the segment.
365    fn name_bytes(&self) -> Result<Option<&[u8]>>;
366
367    /// Returns the name of the segment.
368    ///
369    /// Returns an error if the name is not UTF-8.
370    fn name(&self) -> Result<Option<&str>>;
371
372    /// Return the flags of segment.
373    fn flags(&self) -> SegmentFlags;
374}
375
376/// A section in an [`Object`].
377///
378/// This trait is part of the unified read API.
379pub trait ObjectSection<'data>: read::private::Sealed {
380    /// An iterator for the relocations for a section.
381    ///
382    /// The first field in the item tuple is the section offset
383    /// that the relocation applies to.
384    type RelocationIterator: Iterator<Item = (u64, Relocation)>;
385
386    /// Returns the section index.
387    fn index(&self) -> SectionIndex;
388
389    /// Returns the address of the section.
390    fn address(&self) -> u64;
391
392    /// Returns the size of the section in memory.
393    fn size(&self) -> u64;
394
395    /// Returns the alignment of the section in memory.
396    fn align(&self) -> u64;
397
398    /// Returns offset and size of on-disk segment (if any).
399    fn file_range(&self) -> Option<(u64, u64)>;
400
401    /// Returns the raw contents of the section.
402    ///
403    /// The length of this data may be different from the size of the
404    /// section in memory.
405    ///
406    /// This does not do any decompression.
407    fn data(&self) -> Result<&'data [u8]>;
408
409    /// Return the raw contents of the section data in the given range.
410    ///
411    /// This does not do any decompression.
412    ///
413    /// Returns `Ok(None)` if the section does not contain the given range.
414    fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>>;
415
416    /// Returns the potentially compressed file range of the section,
417    /// along with information about the compression.
418    fn compressed_file_range(&self) -> Result<CompressedFileRange>;
419
420    /// Returns the potentially compressed contents of the section,
421    /// along with information about the compression.
422    fn compressed_data(&self) -> Result<CompressedData<'data>>;
423
424    /// Returns the uncompressed contents of the section.
425    ///
426    /// The length of this data may be different from the size of the
427    /// section in memory.
428    ///
429    /// If no compression is detected, then returns the data unchanged.
430    /// Returns `Err` if decompression fails.
431    fn uncompressed_data(&self) -> Result<Cow<'data, [u8]>> {
432        self.compressed_data()?.decompress()
433    }
434
435    /// Returns the name of the section.
436    fn name_bytes(&self) -> Result<&'data [u8]>;
437
438    /// Returns the name of the section.
439    ///
440    /// Returns an error if the name is not UTF-8.
441    fn name(&self) -> Result<&'data str>;
442
443    /// Returns the name of the segment for this section.
444    fn segment_name_bytes(&self) -> Result<Option<&[u8]>>;
445
446    /// Returns the name of the segment for this section.
447    ///
448    /// Returns an error if the name is not UTF-8.
449    fn segment_name(&self) -> Result<Option<&str>>;
450
451    /// Return the kind of this section.
452    fn kind(&self) -> SectionKind;
453
454    /// Get the relocations for this section.
455    fn relocations(&self) -> Self::RelocationIterator;
456
457    /// Construct a relocation map for this section.
458    fn relocation_map(&self) -> Result<RelocationMap>;
459
460    /// Section flags that are specific to each file format.
461    fn flags(&self) -> SectionFlags;
462}
463
464/// A COMDAT section group in an [`Object`].
465///
466/// This trait is part of the unified read API.
467pub trait ObjectComdat<'data>: read::private::Sealed {
468    /// An iterator for the sections in the section group.
469    type SectionIterator: Iterator<Item = SectionIndex>;
470
471    /// Returns the COMDAT selection kind.
472    fn kind(&self) -> ComdatKind;
473
474    /// Returns the index of the symbol used for the name of COMDAT section group.
475    fn symbol(&self) -> SymbolIndex;
476
477    /// Returns the name of the COMDAT section group.
478    fn name_bytes(&self) -> Result<&'data [u8]>;
479
480    /// Returns the name of the COMDAT section group.
481    ///
482    /// Returns an error if the name is not UTF-8.
483    fn name(&self) -> Result<&'data str>;
484
485    /// Get the sections in this section group.
486    fn sections(&self) -> Self::SectionIterator;
487}
488
489/// A symbol table in an [`Object`].
490///
491/// This trait is part of the unified read API.
492pub trait ObjectSymbolTable<'data>: read::private::Sealed {
493    /// A symbol table entry.
494    type Symbol: ObjectSymbol<'data>;
495
496    /// An iterator for the symbols in a symbol table.
497    type SymbolIterator: Iterator<Item = Self::Symbol>;
498
499    /// Get an iterator for the symbols in the table.
500    ///
501    /// This may skip over symbols that are malformed or unsupported.
502    fn symbols(&self) -> Self::SymbolIterator;
503
504    /// Get the symbol at the given index.
505    ///
506    /// The meaning of the index depends on the object file.
507    ///
508    /// Returns an error if the index is invalid.
509    fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol>;
510}
511
512/// A symbol table entry in an [`Object`].
513///
514/// This trait is part of the unified read API.
515pub trait ObjectSymbol<'data>: read::private::Sealed {
516    /// The index of the symbol.
517    fn index(&self) -> SymbolIndex;
518
519    /// The name of the symbol.
520    fn name_bytes(&self) -> Result<&'data [u8]>;
521
522    /// The name of the symbol.
523    ///
524    /// Returns an error if the name is not UTF-8.
525    fn name(&self) -> Result<&'data str>;
526
527    /// The address of the symbol. May be zero if the address is unknown.
528    fn address(&self) -> u64;
529
530    /// The size of the symbol. May be zero if the size is unknown.
531    fn size(&self) -> u64;
532
533    /// Return the kind of this symbol.
534    fn kind(&self) -> SymbolKind;
535
536    /// Returns the section where the symbol is defined.
537    fn section(&self) -> SymbolSection;
538
539    /// Returns the section index for the section containing this symbol.
540    ///
541    /// May return `None` if the symbol is not defined in a section.
542    fn section_index(&self) -> Option<SectionIndex> {
543        self.section().index()
544    }
545
546    /// Return true if the symbol is undefined.
547    fn is_undefined(&self) -> bool;
548
549    /// Return true if the symbol is a definition of a function or data object
550    /// that has a known address.
551    ///
552    /// This is primarily used to implement [`Object::symbol_map`].
553    fn is_definition(&self) -> bool;
554
555    /// Return true if the symbol is common data.
556    ///
557    /// Note: does not check for [`SymbolSection::Section`] with [`SectionKind::Common`].
558    fn is_common(&self) -> bool;
559
560    /// Return true if the symbol is weak.
561    fn is_weak(&self) -> bool;
562
563    /// Returns the symbol scope.
564    fn scope(&self) -> SymbolScope;
565
566    /// Return true if the symbol visible outside of the compilation unit.
567    ///
568    /// This treats [`SymbolScope::Unknown`] as global.
569    fn is_global(&self) -> bool;
570
571    /// Return true if the symbol is only visible within the compilation unit.
572    fn is_local(&self) -> bool;
573
574    /// Symbol flags that are specific to each file format.
575    fn flags(&self) -> SymbolFlags<SectionIndex, SymbolIndex>;
576}
577
578/// An iterator for files that don't have dynamic relocations.
579#[derive(Debug)]
580pub struct NoDynamicRelocationIterator;
581
582impl Iterator for NoDynamicRelocationIterator {
583    type Item = (u64, Relocation);
584
585    #[inline]
586    fn next(&mut self) -> Option<Self::Item> {
587        None
588    }
589}