1use crate::decode::lzma2::Lzma2Decoder;
4use crate::decode::util;
5use crate::error;
6use crate::xz::crc::{CRC32, CRC64};
7use crate::xz::{footer, header, CheckMethod, StreamFlags};
8use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
9use std::io;
10use std::io::Read;
11
12#[derive(Debug)]
13struct Record {
14 unpadded_size: u64,
15 unpacked_size: u64,
16}
17
18pub fn decode_stream<R, W>(input: &mut R, output: &mut W) -> error::Result<()>
19where
20 R: io::BufRead,
21 W: io::Write,
22{
23 let header = header::StreamHeader::parse(input)?;
24
25 let mut records: Vec<Record> = vec![];
26 let index_size = loop {
27 let mut count_input = util::CountBufRead::new(input);
28 let header_size = count_input.read_u8()?;
29 lzma_info!("XZ block header_size byte: 0x{:02x}", header_size);
30
31 if header_size == 0 {
32 lzma_info!("XZ records: {:?}", records);
33 check_index(&mut count_input, &records)?;
34 let index_size = count_input.count();
35 break index_size;
36 }
37
38 read_block(
39 &mut count_input,
40 output,
41 header.stream_flags.check_method,
42 &mut records,
43 header_size,
44 )?;
45 };
46
47 let crc32 = input.read_u32::<LittleEndian>()?;
48 let mut digest = CRC32.digest();
49 {
50 let mut digested = util::CrcDigestRead::new(input, &mut digest);
51 let backward_size = digested.read_u32::<LittleEndian>()?;
52 if index_size as u32 != (backward_size + 1) << 2 {
53 return Err(error::Error::XzError(format!(
54 "Invalid index size: expected {} but got {}",
55 (backward_size + 1) << 2,
56 index_size
57 )));
58 }
59
60 let stream_flags = {
61 let field = digested.read_u16::<BigEndian>()?;
62 StreamFlags::parse(field)?
63 };
64
65 if header.stream_flags != stream_flags {
66 return Err(error::Error::XzError(format!(
67 "Flags in header ({:?}) does not match footer ({:?})",
68 header.stream_flags, stream_flags
69 )));
70 }
71 }
72
73 let digest_crc32 = digest.finalize();
74 if crc32 != digest_crc32 {
75 return Err(error::Error::XzError(format!(
76 "Invalid footer CRC32: expected 0x{:08x} but got 0x{:08x}",
77 crc32, digest_crc32
78 )));
79 }
80
81 if !util::read_tag(input, footer::XZ_MAGIC_FOOTER)? {
82 return Err(error::Error::XzError(format!(
83 "Invalid footer magic, expected {:?}",
84 footer::XZ_MAGIC_FOOTER
85 )));
86 }
87
88 if !util::is_eof(input)? {
89 return Err(error::Error::XzError(
90 "Unexpected data after last XZ block".to_string(),
91 ));
92 }
93 Ok(())
94}
95
96fn check_index<'a, R>(
97 count_input: &mut util::CountBufRead<'a, R>,
98 records: &[Record],
99) -> error::Result<()>
100where
101 R: io::BufRead,
102{
103 let mut digest = CRC32.digest();
104 let index_tag = 0u8;
105 digest.update(&[index_tag]);
106 {
107 let mut digested = util::CrcDigestRead::new(count_input, &mut digest);
108
109 let num_records = get_multibyte(&mut digested)?;
110 if num_records != records.len() as u64 {
111 return Err(error::Error::XzError(format!(
112 "Expected {} records but got {} records",
113 num_records,
114 records.len()
115 )));
116 }
117
118 for (i, record) in records.iter().enumerate() {
119 lzma_info!("XZ index checking record {}: {:?}", i, record);
120
121 let unpadded_size = get_multibyte(&mut digested)?;
122 if unpadded_size != record.unpadded_size {
123 return Err(error::Error::XzError(format!(
124 "Invalid index for record {}: unpadded size ({}) does not match index ({})",
125 i, record.unpadded_size, unpadded_size
126 )));
127 }
128
129 let unpacked_size = get_multibyte(&mut digested)?;
130 if unpacked_size != record.unpacked_size {
131 return Err(error::Error::XzError(format!(
132 "Invalid index for record {}: unpacked size ({}) does not match index ({})",
133 i, record.unpacked_size, unpacked_size
134 )));
135 }
136 }
137 };
138 let count = count_input.count();
140 let padding_size = ((count ^ 0x03) + 1) & 0x03;
141 lzma_info!(
142 "XZ index: {} byte(s) read, {} byte(s) of padding",
143 count,
144 padding_size
145 );
146
147 {
148 let mut digested = util::CrcDigestRead::new(count_input, &mut digest);
149 for _ in 0..padding_size {
150 let byte = digested.read_u8()?;
151 if byte != 0 {
152 return Err(error::Error::XzError(
153 "Invalid index padding, must be null bytes".to_string(),
154 ));
155 }
156 }
157 };
158
159 let digest_crc32 = digest.finalize();
160 lzma_info!("XZ index checking digest 0x{:08x}", digest_crc32);
161
162 let crc32 = count_input.read_u32::<LittleEndian>()?;
163 if crc32 != digest_crc32 {
164 return Err(error::Error::XzError(format!(
165 "Invalid index CRC32: expected 0x{:08x} but got 0x{:08x}",
166 crc32, digest_crc32
167 )));
168 }
169
170 Ok(())
171}
172
173#[derive(Debug)]
174enum FilterId {
175 Lzma2,
176}
177
178fn get_filter_id(id: u64) -> error::Result<FilterId> {
179 match id {
180 0x21 => Ok(FilterId::Lzma2),
181 _ => Err(error::Error::XzError(format!("Unknown filter id {}", id))),
182 }
183}
184
185struct Filter {
186 filter_id: FilterId,
187 props: Vec<u8>,
188}
189
190struct BlockHeader {
191 filters: Vec<Filter>,
192 packed_size: Option<u64>,
193 unpacked_size: Option<u64>,
194}
195
196fn read_block<'a, R, W>(
197 count_input: &mut util::CountBufRead<'a, R>,
198 output: &mut W,
199 check_method: CheckMethod,
200 records: &mut Vec<Record>,
201 header_size: u8,
202) -> error::Result<bool>
203where
204 R: io::BufRead,
205 W: io::Write,
206{
207 let mut digest = CRC32.digest();
208 digest.update(&[header_size]);
209 let header_size = ((header_size as u64) << 2) - 1;
210
211 let block_header = {
212 let mut taken = count_input.take(header_size);
213 let mut digested = io::BufReader::new(util::CrcDigestRead::new(&mut taken, &mut digest));
214 read_block_header(&mut digested, header_size)?
215 };
216
217 let crc32 = count_input.read_u32::<LittleEndian>()?;
218 let digest_crc32 = digest.finalize();
219 if crc32 != digest_crc32 {
220 return Err(error::Error::XzError(format!(
221 "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}",
222 crc32, digest_crc32
223 )));
224 }
225
226 let mut tmpbuf: Vec<u8> = Vec::new();
227 let filters = block_header.filters;
228 for (i, filter) in filters.iter().enumerate() {
229 if i == 0 {
230 let packed_size = decode_filter(count_input, &mut tmpbuf, filter)?;
232 if let Some(expected_packed_size) = block_header.packed_size {
233 if (packed_size as u64) != expected_packed_size {
234 return Err(error::Error::XzError(format!(
235 "Invalid compressed size: expected {} but got {}",
236 expected_packed_size, packed_size
237 )));
238 }
239 }
240 } else {
241 let mut newbuf: Vec<u8> = Vec::new();
242 decode_filter(
243 &mut io::BufReader::new(tmpbuf.as_slice()),
244 &mut newbuf,
245 filter,
246 )?;
247 tmpbuf = newbuf;
249 }
250 }
251
252 let unpacked_size = tmpbuf.len();
253 lzma_info!("XZ block decompressed to {} byte(s)", tmpbuf.len());
254
255 if let Some(expected_unpacked_size) = block_header.unpacked_size {
256 if (unpacked_size as u64) != expected_unpacked_size {
257 return Err(error::Error::XzError(format!(
258 "Invalid decompressed size: expected {} but got {}",
259 expected_unpacked_size, unpacked_size
260 )));
261 }
262 }
263
264 let count = count_input.count();
265 let padding_size = ((count ^ 0x03) + 1) & 0x03;
266 lzma_info!(
267 "XZ block: {} byte(s) read, {} byte(s) of padding, check method {:?}",
268 count,
269 padding_size,
270 check_method
271 );
272 for _ in 0..padding_size {
273 let byte = count_input.read_u8()?;
274 if byte != 0 {
275 return Err(error::Error::XzError(
276 "Invalid block padding, must be null bytes".to_string(),
277 ));
278 }
279 }
280 validate_block_check(count_input, tmpbuf.as_slice(), check_method)?;
281
282 output.write_all(tmpbuf.as_slice())?;
283 records.push(Record {
284 unpadded_size: (count_input.count() - padding_size) as u64,
285 unpacked_size: unpacked_size as u64,
286 });
287
288 let finished = false;
289 Ok(finished)
290}
291
292fn validate_block_check<R>(
296 input: &mut R,
297 buf: &[u8],
298 check_method: CheckMethod,
299) -> error::Result<()>
300where
301 R: io::BufRead,
302{
303 match check_method {
304 CheckMethod::None => (),
305 CheckMethod::Crc32 => {
306 let crc32 = input.read_u32::<LittleEndian>()?;
307 let digest_crc32 = CRC32.checksum(buf);
308 if crc32 != digest_crc32 {
309 return Err(error::Error::XzError(format!(
310 "Invalid block CRC32, expected 0x{:08x} but got 0x{:08x}",
311 crc32, digest_crc32
312 )));
313 }
314 }
315 CheckMethod::Crc64 => {
316 let crc64 = input.read_u64::<LittleEndian>()?;
317 let digest_crc64 = CRC64.checksum(buf);
318 if crc64 != digest_crc64 {
319 return Err(error::Error::XzError(format!(
320 "Invalid block CRC64, expected 0x{:016x} but got 0x{:016x}",
321 crc64, digest_crc64
322 )));
323 }
324 }
325 CheckMethod::Sha256 => {
327 return Err(error::Error::XzError(
328 "Unsupported SHA-256 checksum (not yet implemented)".to_string(),
329 ));
330 }
331 }
332 Ok(())
333}
334
335fn decode_filter<R, W>(input: &mut R, output: &mut W, filter: &Filter) -> error::Result<usize>
336where
337 R: io::BufRead,
338 W: io::Write,
339{
340 let mut count_input = util::CountBufRead::new(input);
341 match filter.filter_id {
342 FilterId::Lzma2 => {
343 if filter.props.len() != 1 {
344 return Err(error::Error::XzError(format!(
345 "Invalid properties for filter {:?}",
346 filter.filter_id
347 )));
348 }
349 Lzma2Decoder::new().decompress(&mut count_input, output)?;
351 Ok(count_input.count())
352 }
353 }
354}
355
356fn read_block_header<R>(input: &mut R, header_size: u64) -> error::Result<BlockHeader>
357where
358 R: io::BufRead,
359{
360 let flags = input.read_u8()?;
361 let num_filters = (flags & 0x03) + 1;
362 let reserved = flags & 0x3C;
363 let has_packed_size = flags & 0x40 != 0;
364 let has_unpacked_size = flags & 0x80 != 0;
365
366 lzma_info!(
367 "XZ block header: {{ header_size: {}, flags: {}, num_filters: {}, has_packed_size: {}, has_unpacked_size: {} }}",
368 header_size,
369 flags,
370 num_filters,
371 has_packed_size,
372 has_unpacked_size
373 );
374
375 if reserved != 0 {
376 return Err(error::Error::XzError(format!(
377 "Invalid block flags {}, reserved bits (mask 0x3C) must be zero",
378 flags
379 )));
380 }
381
382 let packed_size = if has_packed_size {
383 Some(get_multibyte(input)?)
384 } else {
385 None
386 };
387
388 let unpacked_size = if has_unpacked_size {
389 Some(get_multibyte(input)?)
390 } else {
391 None
392 };
393
394 lzma_info!(
395 "XZ block header: {{ packed_size: {:?}, unpacked_size: {:?} }}",
396 packed_size,
397 unpacked_size
398 );
399
400 let mut filters: Vec<Filter> = vec![];
401 for _ in 0..num_filters {
402 let filter_id = get_filter_id(get_multibyte(input)?)?;
403 let size_of_properties = get_multibyte(input)?;
404
405 lzma_info!(
406 "XZ filter: {{ filter_id: {:?}, size_of_properties: {} }}",
407 filter_id,
408 size_of_properties
409 );
410
411 if size_of_properties > header_size {
413 return Err(error::Error::XzError(format!(
414 "Size of filter properties exceeds block header size ({} > {})",
415 size_of_properties, header_size
416 )));
417 }
418
419 let mut buf = vec![0; size_of_properties as usize];
420 input.read_exact(buf.as_mut_slice()).map_err(|e| {
421 error::Error::XzError(format!(
422 "Could not read filter properties of size {}: {}",
423 size_of_properties, e
424 ))
425 })?;
426
427 lzma_info!("XZ filter properties: {:?}", buf);
428
429 filters.push(Filter {
430 filter_id,
431 props: buf,
432 })
433 }
434
435 if !util::flush_zero_padding(input)? {
436 return Err(error::Error::XzError(
437 "Invalid block header padding, must be null bytes".to_string(),
438 ));
439 }
440
441 Ok(BlockHeader {
442 filters,
443 packed_size,
444 unpacked_size,
445 })
446}
447
448pub fn get_multibyte<R>(input: &mut R) -> error::Result<u64>
449where
450 R: io::Read,
451{
452 let mut result = 0;
453 for i in 0..9 {
454 let byte = input.read_u8()?;
455 result ^= ((byte & 0x7F) as u64) << (i * 7);
456 if (byte & 0x80) == 0 {
457 return Ok(result);
458 }
459 }
460
461 Err(error::Error::XzError(
462 "Invalid multi-byte encoding".to_string(),
463 ))
464}