object/read/macho/
dyld_cache.rs

1use alloc::vec::Vec;
2use core::slice;
3
4use crate::endian::{Endian, Endianness};
5use crate::macho;
6use crate::read::{Architecture, Error, File, ReadError, ReadRef, Result};
7
8/// A parsed representation of the dyld shared cache.
9#[derive(Debug)]
10pub struct DyldCache<'data, E = Endianness, R = &'data [u8]>
11where
12    E: Endian,
13    R: ReadRef<'data>,
14{
15    endian: E,
16    data: R,
17    subcaches: Vec<DyldSubCache<'data, E, R>>,
18    mappings: &'data [macho::DyldCacheMappingInfo<E>],
19    images: &'data [macho::DyldCacheImageInfo<E>],
20    arch: Architecture,
21}
22
23/// Information about a subcache.
24#[derive(Debug)]
25pub struct DyldSubCache<'data, E = Endianness, R = &'data [u8]>
26where
27    E: Endian,
28    R: ReadRef<'data>,
29{
30    data: R,
31    mappings: &'data [macho::DyldCacheMappingInfo<E>],
32}
33
34/// A slice of structs describing each subcache. The struct gained
35/// an additional field (the file suffix) in dyld-1042.1 (macOS 13 / iOS 16),
36/// so this is an enum of the two possible slice types.
37#[derive(Debug, Clone, Copy)]
38#[non_exhaustive]
39pub enum DyldSubCacheSlice<'data, E: Endian> {
40    /// V1, used between dyld-940 and dyld-1042.1.
41    V1(&'data [macho::DyldSubCacheEntryV1<E>]),
42    /// V2, used since dyld-1042.1.
43    V2(&'data [macho::DyldSubCacheEntryV2<E>]),
44}
45
46// This is the offset of the end of the images_across_all_subcaches_count field.
47const MIN_HEADER_SIZE_SUBCACHES_V1: u32 = 0x1c8;
48
49// This is the offset of the end of the cacheSubType field.
50// This field comes right after the images_across_all_subcaches_count field,
51// and we don't currently have it in our definition of the DyldCacheHeader type.
52const MIN_HEADER_SIZE_SUBCACHES_V2: u32 = 0x1d0;
53
54impl<'data, E, R> DyldCache<'data, E, R>
55where
56    E: Endian,
57    R: ReadRef<'data>,
58{
59    /// Parse the raw dyld shared cache data.
60    ///
61    /// For shared caches from macOS 12 / iOS 15 and above, the subcache files need to be
62    /// supplied as well, in the correct order, with the `.symbols` subcache last (if present).
63    /// For example, `data` would be the data for `dyld_shared_cache_x86_64`,
64    /// and `subcache_data` would be the data for `[dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...]`.
65    pub fn parse(data: R, subcache_data: &[R]) -> Result<Self> {
66        let header = macho::DyldCacheHeader::parse(data)?;
67        let (arch, endian) = header.parse_magic()?;
68        let mappings = header.mappings(endian, data)?;
69
70        let symbols_subcache_uuid = header.symbols_subcache_uuid(endian);
71        let subcaches_info = header.subcaches(endian, data)?;
72        let subcaches_count = match subcaches_info {
73            Some(DyldSubCacheSlice::V1(subcaches)) => subcaches.len(),
74            Some(DyldSubCacheSlice::V2(subcaches)) => subcaches.len(),
75            None => 0,
76        };
77        if subcache_data.len() != subcaches_count + symbols_subcache_uuid.is_some() as usize {
78            return Err(Error("Incorrect number of SubCaches"));
79        }
80
81        // Split out the .symbols subcache data from the other subcaches.
82        let (symbols_subcache_data_and_uuid, subcache_data) =
83            if let Some(symbols_uuid) = symbols_subcache_uuid {
84                let (sym_data, rest_data) = subcache_data.split_last().unwrap();
85                (Some((*sym_data, symbols_uuid)), rest_data)
86            } else {
87                (None, subcache_data)
88            };
89
90        // Read the regular SubCaches, if present.
91        let mut subcaches = Vec::new();
92        if let Some(subcaches_info) = subcaches_info {
93            let (v1, v2) = match subcaches_info {
94                DyldSubCacheSlice::V1(s) => (s, &[][..]),
95                DyldSubCacheSlice::V2(s) => (&[][..], s),
96            };
97            let uuids = v1.iter().map(|e| &e.uuid).chain(v2.iter().map(|e| &e.uuid));
98            for (&data, uuid) in subcache_data.iter().zip(uuids) {
99                let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
100                if &sc_header.uuid != uuid {
101                    return Err(Error("Unexpected SubCache UUID"));
102                }
103                let mappings = sc_header.mappings(endian, data)?;
104                subcaches.push(DyldSubCache { data, mappings });
105            }
106        }
107
108        // Read the .symbols SubCache, if present.
109        // Other than the UUID verification, the symbols SubCache is currently unused.
110        let _symbols_subcache = match symbols_subcache_data_and_uuid {
111            Some((data, uuid)) => {
112                let sc_header = macho::DyldCacheHeader::<E>::parse(data)?;
113                if sc_header.uuid != uuid {
114                    return Err(Error("Unexpected .symbols SubCache UUID"));
115                }
116                let mappings = sc_header.mappings(endian, data)?;
117                Some(DyldSubCache { data, mappings })
118            }
119            None => None,
120        };
121
122        let images = header.images(endian, data)?;
123        Ok(DyldCache {
124            endian,
125            data,
126            subcaches,
127            mappings,
128            images,
129            arch,
130        })
131    }
132
133    /// Get the architecture type of the file.
134    pub fn architecture(&self) -> Architecture {
135        self.arch
136    }
137
138    /// Get the endianness of the file.
139    #[inline]
140    pub fn endianness(&self) -> Endianness {
141        if self.is_little_endian() {
142            Endianness::Little
143        } else {
144            Endianness::Big
145        }
146    }
147
148    /// Return true if the file is little endian, false if it is big endian.
149    pub fn is_little_endian(&self) -> bool {
150        self.endian.is_little_endian()
151    }
152
153    /// Iterate over the images in this cache.
154    pub fn images<'cache>(&'cache self) -> DyldCacheImageIterator<'data, 'cache, E, R> {
155        DyldCacheImageIterator {
156            cache: self,
157            iter: self.images.iter(),
158        }
159    }
160
161    /// Find the address in a mapping and return the cache or subcache data it was found in,
162    /// together with the translated file offset.
163    pub fn data_and_offset_for_address(&self, address: u64) -> Option<(R, u64)> {
164        if let Some(file_offset) = address_to_file_offset(address, self.endian, self.mappings) {
165            return Some((self.data, file_offset));
166        }
167        for subcache in &self.subcaches {
168            if let Some(file_offset) =
169                address_to_file_offset(address, self.endian, subcache.mappings)
170            {
171                return Some((subcache.data, file_offset));
172            }
173        }
174        None
175    }
176}
177
178/// An iterator over all the images (dylibs) in the dyld shared cache.
179#[derive(Debug)]
180pub struct DyldCacheImageIterator<'data, 'cache, E = Endianness, R = &'data [u8]>
181where
182    E: Endian,
183    R: ReadRef<'data>,
184{
185    cache: &'cache DyldCache<'data, E, R>,
186    iter: slice::Iter<'data, macho::DyldCacheImageInfo<E>>,
187}
188
189impl<'data, 'cache, E, R> Iterator for DyldCacheImageIterator<'data, 'cache, E, R>
190where
191    E: Endian,
192    R: ReadRef<'data>,
193{
194    type Item = DyldCacheImage<'data, 'cache, E, R>;
195
196    fn next(&mut self) -> Option<DyldCacheImage<'data, 'cache, E, R>> {
197        let image_info = self.iter.next()?;
198        Some(DyldCacheImage {
199            cache: self.cache,
200            image_info,
201        })
202    }
203}
204
205/// One image (dylib) from inside the dyld shared cache.
206#[derive(Debug)]
207pub struct DyldCacheImage<'data, 'cache, E = Endianness, R = &'data [u8]>
208where
209    E: Endian,
210    R: ReadRef<'data>,
211{
212    pub(crate) cache: &'cache DyldCache<'data, E, R>,
213    image_info: &'data macho::DyldCacheImageInfo<E>,
214}
215
216impl<'data, 'cache, E, R> DyldCacheImage<'data, 'cache, E, R>
217where
218    E: Endian,
219    R: ReadRef<'data>,
220{
221    /// The file system path of this image.
222    pub fn path(&self) -> Result<&'data str> {
223        let path = self.image_info.path(self.cache.endian, self.cache.data)?;
224        // The path should always be ascii, so from_utf8 should always succeed.
225        let path = core::str::from_utf8(path).map_err(|_| Error("Path string not valid utf-8"))?;
226        Ok(path)
227    }
228
229    /// The subcache data which contains the Mach-O header for this image,
230    /// together with the file offset at which this image starts.
231    pub fn image_data_and_offset(&self) -> Result<(R, u64)> {
232        let address = self.image_info.address.get(self.cache.endian);
233        self.cache
234            .data_and_offset_for_address(address)
235            .ok_or(Error("Address not found in any mapping"))
236    }
237
238    /// Parse this image into an Object.
239    pub fn parse_object(&self) -> Result<File<'data, R>> {
240        File::parse_dyld_cache_image(self)
241    }
242}
243
244impl<E: Endian> macho::DyldCacheHeader<E> {
245    /// Read the dyld cache header.
246    pub fn parse<'data, R: ReadRef<'data>>(data: R) -> Result<&'data Self> {
247        data.read_at::<macho::DyldCacheHeader<E>>(0)
248            .read_error("Invalid dyld cache header size or alignment")
249    }
250
251    /// Returns (arch, endian) based on the magic string.
252    pub fn parse_magic(&self) -> Result<(Architecture, E)> {
253        let (arch, is_big_endian) = match &self.magic {
254            b"dyld_v1    i386\0" => (Architecture::I386, false),
255            b"dyld_v1  x86_64\0" => (Architecture::X86_64, false),
256            b"dyld_v1 x86_64h\0" => (Architecture::X86_64, false),
257            b"dyld_v1     ppc\0" => (Architecture::PowerPc, true),
258            b"dyld_v1   armv6\0" => (Architecture::Arm, false),
259            b"dyld_v1   armv7\0" => (Architecture::Arm, false),
260            b"dyld_v1  armv7f\0" => (Architecture::Arm, false),
261            b"dyld_v1  armv7s\0" => (Architecture::Arm, false),
262            b"dyld_v1  armv7k\0" => (Architecture::Arm, false),
263            b"dyld_v1   arm64\0" => (Architecture::Aarch64, false),
264            b"dyld_v1  arm64e\0" => (Architecture::Aarch64, false),
265            _ => return Err(Error("Unrecognized dyld cache magic")),
266        };
267        let endian =
268            E::from_big_endian(is_big_endian).read_error("Unsupported dyld cache endian")?;
269        Ok((arch, endian))
270    }
271
272    /// Return the mapping information table.
273    pub fn mappings<'data, R: ReadRef<'data>>(
274        &self,
275        endian: E,
276        data: R,
277    ) -> Result<&'data [macho::DyldCacheMappingInfo<E>]> {
278        data.read_slice_at::<macho::DyldCacheMappingInfo<E>>(
279            self.mapping_offset.get(endian).into(),
280            self.mapping_count.get(endian) as usize,
281        )
282        .read_error("Invalid dyld cache mapping size or alignment")
283    }
284
285    /// Return the information about subcaches, if present.
286    ///
287    /// Returns `None` for dyld caches produced before dyld-940 (macOS 12).
288    pub fn subcaches<'data, R: ReadRef<'data>>(
289        &self,
290        endian: E,
291        data: R,
292    ) -> Result<Option<DyldSubCacheSlice<'data, E>>> {
293        let header_size = self.mapping_offset.get(endian);
294        if header_size >= MIN_HEADER_SIZE_SUBCACHES_V2 {
295            let subcaches = data
296                .read_slice_at::<macho::DyldSubCacheEntryV2<E>>(
297                    self.subcaches_offset.get(endian).into(),
298                    self.subcaches_count.get(endian) as usize,
299                )
300                .read_error("Invalid dyld subcaches size or alignment")?;
301            Ok(Some(DyldSubCacheSlice::V2(subcaches)))
302        } else if header_size >= MIN_HEADER_SIZE_SUBCACHES_V1 {
303            let subcaches = data
304                .read_slice_at::<macho::DyldSubCacheEntryV1<E>>(
305                    self.subcaches_offset.get(endian).into(),
306                    self.subcaches_count.get(endian) as usize,
307                )
308                .read_error("Invalid dyld subcaches size or alignment")?;
309            Ok(Some(DyldSubCacheSlice::V1(subcaches)))
310        } else {
311            Ok(None)
312        }
313    }
314
315    /// Return the UUID for the .symbols subcache, if present.
316    pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> {
317        if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
318            let uuid = self.symbols_subcache_uuid;
319            if uuid != [0; 16] {
320                return Some(uuid);
321            }
322        }
323        None
324    }
325
326    /// Return the image information table.
327    pub fn images<'data, R: ReadRef<'data>>(
328        &self,
329        endian: E,
330        data: R,
331    ) -> Result<&'data [macho::DyldCacheImageInfo<E>]> {
332        if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES_V1 {
333            data.read_slice_at::<macho::DyldCacheImageInfo<E>>(
334                self.images_across_all_subcaches_offset.get(endian).into(),
335                self.images_across_all_subcaches_count.get(endian) as usize,
336            )
337            .read_error("Invalid dyld cache image size or alignment")
338        } else {
339            data.read_slice_at::<macho::DyldCacheImageInfo<E>>(
340                self.images_offset.get(endian).into(),
341                self.images_count.get(endian) as usize,
342            )
343            .read_error("Invalid dyld cache image size or alignment")
344        }
345    }
346}
347
348impl<E: Endian> macho::DyldCacheImageInfo<E> {
349    /// The file system path of this image.
350    pub fn path<'data, R: ReadRef<'data>>(&self, endian: E, data: R) -> Result<&'data [u8]> {
351        let r_start = self.path_file_offset.get(endian).into();
352        let r_end = data.len().read_error("Couldn't get data len()")?;
353        data.read_bytes_at_until(r_start..r_end, 0)
354            .read_error("Couldn't read dyld cache image path")
355    }
356
357    /// Find the file offset of the image by looking up its address in the mappings.
358    pub fn file_offset(
359        &self,
360        endian: E,
361        mappings: &[macho::DyldCacheMappingInfo<E>],
362    ) -> Result<u64> {
363        let address = self.address.get(endian);
364        address_to_file_offset(address, endian, mappings)
365            .read_error("Invalid dyld cache image address")
366    }
367}
368
369/// Find the file offset of the image by looking up its address in the mappings.
370pub fn address_to_file_offset<E: Endian>(
371    address: u64,
372    endian: E,
373    mappings: &[macho::DyldCacheMappingInfo<E>],
374) -> Option<u64> {
375    for mapping in mappings {
376        let mapping_address = mapping.address.get(endian);
377        if address >= mapping_address
378            && address < mapping_address.wrapping_add(mapping.size.get(endian))
379        {
380            return Some(address - mapping_address + mapping.file_offset.get(endian));
381        }
382    }
383    None
384}