1use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer};
2use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder};
3use crate::decompress::{Options, UnpackedSize};
4use crate::error;
5use crate::util::vec2d::Vec2D;
6use byteorder::{LittleEndian, ReadBytesExt};
7use std::io;
8
9const MAX_REQUIRED_INPUT: usize = 20;
14
15#[derive(Debug, PartialEq)]
20enum ProcessingMode {
21 Partial,
24 Finish,
28}
29
30#[derive(Debug, PartialEq)]
34enum ProcessingStatus {
35 Continue,
36 Finished,
37}
38
39#[derive(Debug, Copy, Clone)]
40pub struct LzmaProperties {
42 pub lc: u32, pub lp: u32, pub pb: u32, }
57
58impl LzmaProperties {
59 pub(crate) fn validate(&self) {
61 assert!(self.lc <= 8);
62 assert!(self.lp <= 4);
63 assert!(self.pb <= 4);
64 }
65}
66
67#[derive(Debug, Copy, Clone)]
68pub struct LzmaParams {
70 pub(crate) properties: LzmaProperties,
72 pub(crate) dict_size: u32,
74 pub(crate) unpacked_size: Option<u64>,
76}
77
78impl LzmaParams {
79 #[cfg(feature = "raw_decoder")]
81 pub fn new(
82 properties: LzmaProperties,
83 dict_size: u32,
84 unpacked_size: Option<u64>,
85 ) -> LzmaParams {
86 Self {
87 properties,
88 dict_size,
89 unpacked_size,
90 }
91 }
92
93 pub fn read_header<R>(input: &mut R, options: &Options) -> error::Result<LzmaParams>
95 where
96 R: io::BufRead,
97 {
98 let props = input.read_u8().map_err(error::Error::HeaderTooShort)?;
100
101 let mut pb = props as u32;
102 if pb >= 225 {
103 return Err(error::Error::LzmaError(format!(
104 "LZMA header invalid properties: {} must be < 225",
105 pb
106 )));
107 }
108
109 let lc: u32 = pb % 9;
110 pb /= 9;
111 let lp: u32 = pb % 5;
112 pb /= 5;
113
114 lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb);
115
116 let dict_size_provided = input
118 .read_u32::<LittleEndian>()
119 .map_err(error::Error::HeaderTooShort)?;
120 let dict_size = if dict_size_provided < 0x1000 {
121 0x1000
122 } else {
123 dict_size_provided
124 };
125
126 lzma_info!("Dict size: {}", dict_size);
127
128 let unpacked_size: Option<u64> = match options.unpacked_size {
130 UnpackedSize::ReadFromHeader => {
131 let unpacked_size_provided = input
132 .read_u64::<LittleEndian>()
133 .map_err(error::Error::HeaderTooShort)?;
134 let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
135 if marker_mandatory {
136 None
137 } else {
138 Some(unpacked_size_provided)
139 }
140 }
141 UnpackedSize::ReadHeaderButUseProvided(x) => {
142 input
143 .read_u64::<LittleEndian>()
144 .map_err(error::Error::HeaderTooShort)?;
145 x
146 }
147 UnpackedSize::UseProvided(x) => x,
148 };
149
150 lzma_info!("Unpacked size: {:?}", unpacked_size);
151
152 let params = LzmaParams {
153 properties: LzmaProperties { lc, lp, pb },
154 dict_size,
155 unpacked_size,
156 };
157
158 Ok(params)
159 }
160}
161
162#[derive(Debug)]
163pub(crate) struct DecoderState {
164 partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>,
167 pub(crate) lzma_props: LzmaProperties,
168 unpacked_size: Option<u64>,
169 literal_probs: Vec2D<u16>,
170 pos_slot_decoder: [BitTree; 4],
171 align_decoder: BitTree,
172 pos_decoders: [u16; 115],
173 is_match: [u16; 192], is_rep: [u16; 12],
175 is_rep_g0: [u16; 12],
176 is_rep_g1: [u16; 12],
177 is_rep_g2: [u16; 12],
178 is_rep_0long: [u16; 192],
179 state: usize,
180 rep: [usize; 4],
181 len_decoder: LenDecoder,
182 rep_len_decoder: LenDecoder,
183}
184
185impl DecoderState {
186 pub fn new(lzma_props: LzmaProperties, unpacked_size: Option<u64>) -> Self {
187 lzma_props.validate();
188 DecoderState {
189 partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]),
190 lzma_props,
191 unpacked_size,
192 literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)),
193 pos_slot_decoder: [
194 BitTree::new(6),
195 BitTree::new(6),
196 BitTree::new(6),
197 BitTree::new(6),
198 ],
199 align_decoder: BitTree::new(4),
200 pos_decoders: [0x400; 115],
201 is_match: [0x400; 192],
202 is_rep: [0x400; 12],
203 is_rep_g0: [0x400; 12],
204 is_rep_g1: [0x400; 12],
205 is_rep_g2: [0x400; 12],
206 is_rep_0long: [0x400; 192],
207 state: 0,
208 rep: [0; 4],
209 len_decoder: LenDecoder::new(),
210 rep_len_decoder: LenDecoder::new(),
211 }
212 }
213
214 pub fn reset_state(&mut self, new_props: LzmaProperties) {
215 new_props.validate();
216 if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp {
217 self.literal_probs.fill(0x400);
219 } else {
220 self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300));
222 }
223
224 self.lzma_props = new_props;
225 self.pos_slot_decoder.iter_mut().for_each(|t| t.reset());
226 self.align_decoder.reset();
227 self.pos_decoders = [0x400; 115];
231 self.is_match = [0x400; 192];
232 self.is_rep = [0x400; 12];
233 self.is_rep_g0 = [0x400; 12];
234 self.is_rep_g1 = [0x400; 12];
235 self.is_rep_g2 = [0x400; 12];
236 self.is_rep_0long = [0x400; 192];
237 self.state = 0;
238 self.rep = [0; 4];
239 self.len_decoder.reset();
240 self.rep_len_decoder.reset();
241 }
242
243 pub fn set_unpacked_size(&mut self, unpacked_size: Option<u64>) {
244 self.unpacked_size = unpacked_size;
245 }
246
247 pub fn process<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
248 &mut self,
249 output: &mut LZB,
250 rangecoder: &mut RangeDecoder<'a, R>,
251 ) -> error::Result<()> {
252 self.process_mode(output, rangecoder, ProcessingMode::Finish)
253 }
254
255 #[cfg(feature = "stream")]
256 pub fn process_stream<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
257 &mut self,
258 output: &mut LZB,
259 rangecoder: &mut RangeDecoder<'a, R>,
260 ) -> error::Result<()> {
261 self.process_mode(output, rangecoder, ProcessingMode::Partial)
262 }
263
264 fn process_next_inner<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
271 &mut self,
272 output: &mut LZB,
273 rangecoder: &mut RangeDecoder<'a, R>,
274 update: bool,
275 ) -> error::Result<ProcessingStatus> {
276 let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1);
277
278 if !rangecoder.decode_bit(
280 &mut self.is_match[(self.state << 4) + pos_state],
282 update,
283 )? {
284 let byte: u8 = self.decode_literal(output, rangecoder, update)?;
285
286 if update {
287 lzma_debug!("Literal: {}", byte);
288 output.append_literal(byte)?;
289
290 self.state = if self.state < 4 {
291 0
292 } else if self.state < 10 {
293 self.state - 3
294 } else {
295 self.state - 6
296 };
297 }
298 return Ok(ProcessingStatus::Continue);
299 }
300
301 let mut len: usize;
303 if rangecoder.decode_bit(&mut self.is_rep[self.state], update)? {
305 if !rangecoder.decode_bit(&mut self.is_rep_g0[self.state], update)? {
307 if !rangecoder.decode_bit(
309 &mut self.is_rep_0long[(self.state << 4) + pos_state],
310 update,
311 )? {
312 if update {
314 self.state = if self.state < 7 { 9 } else { 11 };
315 let dist = self.rep[0] + 1;
316 output.append_lz(1, dist)?;
317 }
318 return Ok(ProcessingStatus::Continue);
319 }
320 } else {
322 let idx: usize;
323 if !rangecoder.decode_bit(&mut self.is_rep_g1[self.state], update)? {
324 idx = 1;
325 } else if !rangecoder.decode_bit(&mut self.is_rep_g2[self.state], update)? {
326 idx = 2;
327 } else {
328 idx = 3;
329 }
330 if update {
331 let dist = self.rep[idx];
333 for i in (0..idx).rev() {
334 self.rep[i + 1] = self.rep[i];
335 }
336 self.rep[0] = dist
337 }
338 }
339
340 len = self.rep_len_decoder.decode(rangecoder, pos_state, update)?;
341
342 if update {
343 self.state = if self.state < 7 { 8 } else { 11 };
345 }
346 } else {
348 if update {
349 self.rep[3] = self.rep[2];
351 self.rep[2] = self.rep[1];
352 self.rep[1] = self.rep[0];
353 }
354
355 len = self.len_decoder.decode(rangecoder, pos_state, update)?;
356
357 if update {
358 self.state = if self.state < 7 { 7 } else { 10 };
360 }
361
362 let rep_0 = self.decode_distance(rangecoder, len, update)?;
363
364 if update {
365 self.rep[0] = rep_0;
366 if self.rep[0] == 0xFFFF_FFFF {
367 if rangecoder.is_finished_ok()? {
368 return Ok(ProcessingStatus::Finished);
369 }
370 return Err(error::Error::LzmaError(String::from(
371 "Found end-of-stream marker but more bytes are available",
372 )));
373 }
374 }
375 }
376
377 if update {
378 len += 2;
379
380 let dist = self.rep[0] + 1;
381 output.append_lz(len, dist)?;
382 }
383
384 Ok(ProcessingStatus::Continue)
385 }
386
387 fn process_next<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
388 &mut self,
389 output: &mut LZB,
390 rangecoder: &mut RangeDecoder<'a, R>,
391 ) -> error::Result<ProcessingStatus> {
392 self.process_next_inner(output, rangecoder, true)
393 }
394
395 fn try_process_next<W: io::Write, LZB: LzBuffer<W>>(
401 &mut self,
402 output: &mut LZB,
403 buf: &[u8],
404 range: u32,
405 code: u32,
406 ) -> error::Result<()> {
407 let mut temp = std::io::Cursor::new(buf);
408 let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code);
409 let _ = self.process_next_inner(output, &mut rangecoder, false)?;
410 Ok(())
411 }
412
413 fn read_partial_input_buf<'a, R: io::BufRead>(
415 &mut self,
416 rangecoder: &mut RangeDecoder<'a, R>,
417 ) -> error::Result<()> {
418 let start = self.partial_input_buf.position() as usize;
420 let bytes_read =
421 rangecoder.read_into(&mut self.partial_input_buf.get_mut()[start..])? as u64;
422 self.partial_input_buf
423 .set_position(self.partial_input_buf.position() + bytes_read);
424 Ok(())
425 }
426
427 fn process_mode<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
428 &mut self,
429 output: &mut LZB,
430 rangecoder: &mut RangeDecoder<'a, R>,
431 mode: ProcessingMode,
432 ) -> error::Result<()> {
433 loop {
434 if let Some(unpacked_size) = self.unpacked_size {
435 if output.len() as u64 >= unpacked_size {
436 break;
437 }
438 } else if match mode {
439 ProcessingMode::Partial => {
440 rangecoder.is_eof()? && self.partial_input_buf.position() as usize == 0
441 }
442 ProcessingMode::Finish => {
443 rangecoder.is_finished_ok()? && self.partial_input_buf.position() as usize == 0
444 }
445 } {
446 break;
447 }
448
449 if self.partial_input_buf.position() as usize > 0 {
450 self.read_partial_input_buf(rangecoder)?;
451 let tmp = *self.partial_input_buf.get_ref();
452
453 if mode == ProcessingMode::Partial
455 && (self.partial_input_buf.position() as usize) < MAX_REQUIRED_INPUT
456 && self
457 .try_process_next(
458 output,
459 &tmp[..self.partial_input_buf.position() as usize],
460 rangecoder.range,
461 rangecoder.code,
462 )
463 .is_err()
464 {
465 return Ok(());
466 }
467
468 let mut tmp_reader =
470 io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]);
471 let mut tmp_rangecoder =
472 RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code);
473 let res = self.process_next(output, &mut tmp_rangecoder)?;
474
475 rangecoder.set(tmp_rangecoder.range, tmp_rangecoder.code);
477
478 let end = self.partial_input_buf.position();
480 let new_len = end - tmp_reader.position();
481 self.partial_input_buf.get_mut()[..new_len as usize]
482 .copy_from_slice(&tmp[tmp_reader.position() as usize..end as usize]);
483 self.partial_input_buf.set_position(new_len);
484
485 if res == ProcessingStatus::Finished {
486 break;
487 };
488 } else {
489 let buf: &[u8] = rangecoder.stream.fill_buf()?;
490 if mode == ProcessingMode::Partial
491 && buf.len() < MAX_REQUIRED_INPUT
492 && self
493 .try_process_next(output, buf, rangecoder.range, rangecoder.code)
494 .is_err()
495 {
496 return self.read_partial_input_buf(rangecoder);
497 }
498
499 if self.process_next(output, rangecoder)? == ProcessingStatus::Finished {
500 break;
501 };
502 }
503 }
504
505 if let Some(len) = self.unpacked_size {
506 if mode == ProcessingMode::Finish && len != output.len() as u64 {
507 return Err(error::Error::LzmaError(format!(
508 "Expected unpacked size of {} but decompressed to {}",
509 len,
510 output.len()
511 )));
512 }
513 }
514
515 Ok(())
516 }
517
518 fn decode_literal<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
519 &mut self,
520 output: &mut LZB,
521 rangecoder: &mut RangeDecoder<'a, R>,
522 update: bool,
523 ) -> error::Result<u8> {
524 let def_prev_byte = 0u8;
525 let prev_byte = output.last_or(def_prev_byte) as usize;
526
527 let mut result: usize = 1;
528 let lit_state = ((output.len() & ((1 << self.lzma_props.lp) - 1)) << self.lzma_props.lc)
529 + (prev_byte >> (8 - self.lzma_props.lc));
530 let probs = &mut self.literal_probs[lit_state];
531
532 if self.state >= 7 {
533 let mut match_byte = output.last_n(self.rep[0] + 1)? as usize;
534
535 while result < 0x100 {
536 let match_bit = (match_byte >> 7) & 1;
537 match_byte <<= 1;
538 let bit = rangecoder
539 .decode_bit(&mut probs[((1 + match_bit) << 8) + result], update)?
540 as usize;
541 result = (result << 1) ^ bit;
542 if match_bit != bit {
543 break;
544 }
545 }
546 }
547
548 while result < 0x100 {
549 result = (result << 1) ^ (rangecoder.decode_bit(&mut probs[result], update)? as usize);
550 }
551
552 Ok((result - 0x100) as u8)
553 }
554
555 fn decode_distance<'a, R: io::BufRead>(
556 &mut self,
557 rangecoder: &mut RangeDecoder<'a, R>,
558 length: usize,
559 update: bool,
560 ) -> error::Result<usize> {
561 let len_state = if length > 3 { 3 } else { length };
562
563 let pos_slot = self.pos_slot_decoder[len_state].parse(rangecoder, update)? as usize;
564 if pos_slot < 4 {
565 return Ok(pos_slot);
566 }
567
568 let num_direct_bits = (pos_slot >> 1) - 1;
569 let mut result = (2 ^ (pos_slot & 1)) << num_direct_bits;
570
571 if pos_slot < 14 {
572 result += rangecoder.parse_reverse_bit_tree(
573 num_direct_bits,
574 &mut self.pos_decoders,
575 result - pos_slot,
576 update,
577 )? as usize;
578 } else {
579 result += (rangecoder.get(num_direct_bits - 4)? as usize) << 4;
580 result += self.align_decoder.parse_reverse(rangecoder, update)? as usize;
581 }
582
583 Ok(result)
584 }
585}
586
587#[derive(Debug)]
588pub struct LzmaDecoder {
590 params: LzmaParams,
591 memlimit: usize,
592 state: DecoderState,
593}
594
595impl LzmaDecoder {
596 pub fn new(params: LzmaParams, memlimit: Option<usize>) -> error::Result<LzmaDecoder> {
599 Ok(Self {
600 params,
601 memlimit: memlimit.unwrap_or(usize::MAX),
602 state: DecoderState::new(params.properties, params.unpacked_size),
603 })
604 }
605
606 #[cfg(feature = "raw_decoder")]
613 pub fn reset(&mut self, unpacked_size: Option<Option<u64>>) {
614 self.state.reset_state(self.params.properties);
615
616 if let Some(unpacked_size) = unpacked_size {
617 self.state.set_unpacked_size(unpacked_size);
618 }
619 }
620
621 pub fn decompress<W: io::Write, R: io::BufRead>(
623 &mut self,
624 input: &mut R,
625 output: &mut W,
626 ) -> error::Result<()> {
627 let mut output =
628 LzCircularBuffer::from_stream(output, self.params.dict_size as usize, self.memlimit);
629
630 let mut rangecoder = RangeDecoder::new(input)
631 .map_err(|e| error::Error::LzmaError(format!("LZMA stream too short: {}", e)))?;
632 self.state.process(&mut output, &mut rangecoder)?;
633 output.finish()?;
634 Ok(())
635 }
636}