lzma_rs/decode/
xz.rs

1//! Decoder for the `.xz` file format.
2
3use crate::decode::lzma2::Lzma2Decoder;
4use crate::decode::util;
5use crate::error;
6use crate::xz::crc::{CRC32, CRC64};
7use crate::xz::{footer, header, CheckMethod, StreamFlags};
8use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
9use std::io;
10use std::io::Read;
11
12#[derive(Debug)]
13struct Record {
14    unpadded_size: u64,
15    unpacked_size: u64,
16}
17
18pub fn decode_stream<R, W>(input: &mut R, output: &mut W) -> error::Result<()>
19where
20    R: io::BufRead,
21    W: io::Write,
22{
23    let header = header::StreamHeader::parse(input)?;
24
25    let mut records: Vec<Record> = vec![];
26    let index_size = loop {
27        let mut count_input = util::CountBufRead::new(input);
28        let header_size = count_input.read_u8()?;
29        lzma_info!("XZ block header_size byte: 0x{:02x}", header_size);
30
31        if header_size == 0 {
32            lzma_info!("XZ records: {:?}", records);
33            check_index(&mut count_input, &records)?;
34            let index_size = count_input.count();
35            break index_size;
36        }
37
38        read_block(
39            &mut count_input,
40            output,
41            header.stream_flags.check_method,
42            &mut records,
43            header_size,
44        )?;
45    };
46
47    let crc32 = input.read_u32::<LittleEndian>()?;
48    let mut digest = CRC32.digest();
49    {
50        let mut digested = util::CrcDigestRead::new(input, &mut digest);
51        let backward_size = digested.read_u32::<LittleEndian>()?;
52        if index_size as u32 != (backward_size + 1) << 2 {
53            return Err(error::Error::XzError(format!(
54                "Invalid index size: expected {} but got {}",
55                (backward_size + 1) << 2,
56                index_size
57            )));
58        }
59
60        let stream_flags = {
61            let field = digested.read_u16::<BigEndian>()?;
62            StreamFlags::parse(field)?
63        };
64
65        if header.stream_flags != stream_flags {
66            return Err(error::Error::XzError(format!(
67                "Flags in header ({:?}) does not match footer ({:?})",
68                header.stream_flags, stream_flags
69            )));
70        }
71    }
72
73    let digest_crc32 = digest.finalize();
74    if crc32 != digest_crc32 {
75        return Err(error::Error::XzError(format!(
76            "Invalid footer CRC32: expected 0x{:08x} but got 0x{:08x}",
77            crc32, digest_crc32
78        )));
79    }
80
81    if !util::read_tag(input, footer::XZ_MAGIC_FOOTER)? {
82        return Err(error::Error::XzError(format!(
83            "Invalid footer magic, expected {:?}",
84            footer::XZ_MAGIC_FOOTER
85        )));
86    }
87
88    if !util::is_eof(input)? {
89        return Err(error::Error::XzError(
90            "Unexpected data after last XZ block".to_string(),
91        ));
92    }
93    Ok(())
94}
95
96fn check_index<'a, R>(
97    count_input: &mut util::CountBufRead<'a, R>,
98    records: &[Record],
99) -> error::Result<()>
100where
101    R: io::BufRead,
102{
103    let mut digest = CRC32.digest();
104    let index_tag = 0u8;
105    digest.update(&[index_tag]);
106    {
107        let mut digested = util::CrcDigestRead::new(count_input, &mut digest);
108
109        let num_records = get_multibyte(&mut digested)?;
110        if num_records != records.len() as u64 {
111            return Err(error::Error::XzError(format!(
112                "Expected {} records but got {} records",
113                num_records,
114                records.len()
115            )));
116        }
117
118        for (i, record) in records.iter().enumerate() {
119            lzma_info!("XZ index checking record {}: {:?}", i, record);
120
121            let unpadded_size = get_multibyte(&mut digested)?;
122            if unpadded_size != record.unpadded_size {
123                return Err(error::Error::XzError(format!(
124                    "Invalid index for record {}: unpadded size ({}) does not match index ({})",
125                    i, record.unpadded_size, unpadded_size
126                )));
127            }
128
129            let unpacked_size = get_multibyte(&mut digested)?;
130            if unpacked_size != record.unpacked_size {
131                return Err(error::Error::XzError(format!(
132                    "Invalid index for record {}: unpacked size ({}) does not match index ({})",
133                    i, record.unpacked_size, unpacked_size
134                )));
135            }
136        }
137    };
138    // TODO: create padding parser function
139    let count = count_input.count();
140    let padding_size = ((count ^ 0x03) + 1) & 0x03;
141    lzma_info!(
142        "XZ index: {} byte(s) read, {} byte(s) of padding",
143        count,
144        padding_size
145    );
146
147    {
148        let mut digested = util::CrcDigestRead::new(count_input, &mut digest);
149        for _ in 0..padding_size {
150            let byte = digested.read_u8()?;
151            if byte != 0 {
152                return Err(error::Error::XzError(
153                    "Invalid index padding, must be null bytes".to_string(),
154                ));
155            }
156        }
157    };
158
159    let digest_crc32 = digest.finalize();
160    lzma_info!("XZ index checking digest 0x{:08x}", digest_crc32);
161
162    let crc32 = count_input.read_u32::<LittleEndian>()?;
163    if crc32 != digest_crc32 {
164        return Err(error::Error::XzError(format!(
165            "Invalid index CRC32: expected 0x{:08x} but got 0x{:08x}",
166            crc32, digest_crc32
167        )));
168    }
169
170    Ok(())
171}
172
173#[derive(Debug)]
174enum FilterId {
175    Lzma2,
176}
177
178fn get_filter_id(id: u64) -> error::Result<FilterId> {
179    match id {
180        0x21 => Ok(FilterId::Lzma2),
181        _ => Err(error::Error::XzError(format!("Unknown filter id {}", id))),
182    }
183}
184
185struct Filter {
186    filter_id: FilterId,
187    props: Vec<u8>,
188}
189
190struct BlockHeader {
191    filters: Vec<Filter>,
192    packed_size: Option<u64>,
193    unpacked_size: Option<u64>,
194}
195
196fn read_block<'a, R, W>(
197    count_input: &mut util::CountBufRead<'a, R>,
198    output: &mut W,
199    check_method: CheckMethod,
200    records: &mut Vec<Record>,
201    header_size: u8,
202) -> error::Result<bool>
203where
204    R: io::BufRead,
205    W: io::Write,
206{
207    let mut digest = CRC32.digest();
208    digest.update(&[header_size]);
209    let header_size = ((header_size as u64) << 2) - 1;
210
211    let block_header = {
212        let mut taken = count_input.take(header_size);
213        let mut digested = io::BufReader::new(util::CrcDigestRead::new(&mut taken, &mut digest));
214        read_block_header(&mut digested, header_size)?
215    };
216
217    let crc32 = count_input.read_u32::<LittleEndian>()?;
218    let digest_crc32 = digest.finalize();
219    if crc32 != digest_crc32 {
220        return Err(error::Error::XzError(format!(
221            "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}",
222            crc32, digest_crc32
223        )));
224    }
225
226    let mut tmpbuf: Vec<u8> = Vec::new();
227    let filters = block_header.filters;
228    for (i, filter) in filters.iter().enumerate() {
229        if i == 0 {
230            // TODO: use SubBufRead on input if packed_size is known?
231            let packed_size = decode_filter(count_input, &mut tmpbuf, filter)?;
232            if let Some(expected_packed_size) = block_header.packed_size {
233                if (packed_size as u64) != expected_packed_size {
234                    return Err(error::Error::XzError(format!(
235                        "Invalid compressed size: expected {} but got {}",
236                        expected_packed_size, packed_size
237                    )));
238                }
239            }
240        } else {
241            let mut newbuf: Vec<u8> = Vec::new();
242            decode_filter(
243                &mut io::BufReader::new(tmpbuf.as_slice()),
244                &mut newbuf,
245                filter,
246            )?;
247            // TODO: does this move or copy?
248            tmpbuf = newbuf;
249        }
250    }
251
252    let unpacked_size = tmpbuf.len();
253    lzma_info!("XZ block decompressed to {} byte(s)", tmpbuf.len());
254
255    if let Some(expected_unpacked_size) = block_header.unpacked_size {
256        if (unpacked_size as u64) != expected_unpacked_size {
257            return Err(error::Error::XzError(format!(
258                "Invalid decompressed size: expected {} but got {}",
259                expected_unpacked_size, unpacked_size
260            )));
261        }
262    }
263
264    let count = count_input.count();
265    let padding_size = ((count ^ 0x03) + 1) & 0x03;
266    lzma_info!(
267        "XZ block: {} byte(s) read, {} byte(s) of padding, check method {:?}",
268        count,
269        padding_size,
270        check_method
271    );
272    for _ in 0..padding_size {
273        let byte = count_input.read_u8()?;
274        if byte != 0 {
275            return Err(error::Error::XzError(
276                "Invalid block padding, must be null bytes".to_string(),
277            ));
278        }
279    }
280    validate_block_check(count_input, tmpbuf.as_slice(), check_method)?;
281
282    output.write_all(tmpbuf.as_slice())?;
283    records.push(Record {
284        unpadded_size: (count_input.count() - padding_size) as u64,
285        unpacked_size: unpacked_size as u64,
286    });
287
288    let finished = false;
289    Ok(finished)
290}
291
292/// Verify block checksum against the "Block Check" field.
293///
294/// See spec section 3.4 for details.
295fn validate_block_check<R>(
296    input: &mut R,
297    buf: &[u8],
298    check_method: CheckMethod,
299) -> error::Result<()>
300where
301    R: io::BufRead,
302{
303    match check_method {
304        CheckMethod::None => (),
305        CheckMethod::Crc32 => {
306            let crc32 = input.read_u32::<LittleEndian>()?;
307            let digest_crc32 = CRC32.checksum(buf);
308            if crc32 != digest_crc32 {
309                return Err(error::Error::XzError(format!(
310                    "Invalid block CRC32, expected 0x{:08x} but got 0x{:08x}",
311                    crc32, digest_crc32
312                )));
313            }
314        }
315        CheckMethod::Crc64 => {
316            let crc64 = input.read_u64::<LittleEndian>()?;
317            let digest_crc64 = CRC64.checksum(buf);
318            if crc64 != digest_crc64 {
319                return Err(error::Error::XzError(format!(
320                    "Invalid block CRC64, expected 0x{:016x} but got 0x{:016x}",
321                    crc64, digest_crc64
322                )));
323            }
324        }
325        // TODO
326        CheckMethod::Sha256 => {
327            return Err(error::Error::XzError(
328                "Unsupported SHA-256 checksum (not yet implemented)".to_string(),
329            ));
330        }
331    }
332    Ok(())
333}
334
335fn decode_filter<R, W>(input: &mut R, output: &mut W, filter: &Filter) -> error::Result<usize>
336where
337    R: io::BufRead,
338    W: io::Write,
339{
340    let mut count_input = util::CountBufRead::new(input);
341    match filter.filter_id {
342        FilterId::Lzma2 => {
343            if filter.props.len() != 1 {
344                return Err(error::Error::XzError(format!(
345                    "Invalid properties for filter {:?}",
346                    filter.filter_id
347                )));
348            }
349            // TODO: properties??
350            Lzma2Decoder::new().decompress(&mut count_input, output)?;
351            Ok(count_input.count())
352        }
353    }
354}
355
356fn read_block_header<R>(input: &mut R, header_size: u64) -> error::Result<BlockHeader>
357where
358    R: io::BufRead,
359{
360    let flags = input.read_u8()?;
361    let num_filters = (flags & 0x03) + 1;
362    let reserved = flags & 0x3C;
363    let has_packed_size = flags & 0x40 != 0;
364    let has_unpacked_size = flags & 0x80 != 0;
365
366    lzma_info!(
367        "XZ block header: {{ header_size: {}, flags: {}, num_filters: {}, has_packed_size: {}, has_unpacked_size: {} }}",
368        header_size,
369        flags,
370        num_filters,
371        has_packed_size,
372        has_unpacked_size
373    );
374
375    if reserved != 0 {
376        return Err(error::Error::XzError(format!(
377            "Invalid block flags {}, reserved bits (mask 0x3C) must be zero",
378            flags
379        )));
380    }
381
382    let packed_size = if has_packed_size {
383        Some(get_multibyte(input)?)
384    } else {
385        None
386    };
387
388    let unpacked_size = if has_unpacked_size {
389        Some(get_multibyte(input)?)
390    } else {
391        None
392    };
393
394    lzma_info!(
395        "XZ block header: {{ packed_size: {:?}, unpacked_size: {:?} }}",
396        packed_size,
397        unpacked_size
398    );
399
400    let mut filters: Vec<Filter> = vec![];
401    for _ in 0..num_filters {
402        let filter_id = get_filter_id(get_multibyte(input)?)?;
403        let size_of_properties = get_multibyte(input)?;
404
405        lzma_info!(
406            "XZ filter: {{ filter_id: {:?}, size_of_properties: {} }}",
407            filter_id,
408            size_of_properties
409        );
410
411        // Early abort to avoid allocating a large vector
412        if size_of_properties > header_size {
413            return Err(error::Error::XzError(format!(
414                "Size of filter properties exceeds block header size ({} > {})",
415                size_of_properties, header_size
416            )));
417        }
418
419        let mut buf = vec![0; size_of_properties as usize];
420        input.read_exact(buf.as_mut_slice()).map_err(|e| {
421            error::Error::XzError(format!(
422                "Could not read filter properties of size {}: {}",
423                size_of_properties, e
424            ))
425        })?;
426
427        lzma_info!("XZ filter properties: {:?}", buf);
428
429        filters.push(Filter {
430            filter_id,
431            props: buf,
432        })
433    }
434
435    if !util::flush_zero_padding(input)? {
436        return Err(error::Error::XzError(
437            "Invalid block header padding, must be null bytes".to_string(),
438        ));
439    }
440
441    Ok(BlockHeader {
442        filters,
443        packed_size,
444        unpacked_size,
445    })
446}
447
448pub fn get_multibyte<R>(input: &mut R) -> error::Result<u64>
449where
450    R: io::Read,
451{
452    let mut result = 0;
453    for i in 0..9 {
454        let byte = input.read_u8()?;
455        result ^= ((byte & 0x7F) as u64) << (i * 7);
456        if (byte & 0x80) == 0 {
457            return Ok(result);
458        }
459    }
460
461    Err(error::Error::XzError(
462        "Invalid multi-byte encoding".to_string(),
463    ))
464}