core: fix or silence clippy warnings
[nihav.git] / nihav-core / src / compr / deflate.rs
1 //! Deflate format (RFC 1951) support.
2 //!
3 //! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`].
4 //!
5 //! [`Inflate`]: ./struct.Inflate.html
6 //! [`gzip_decode`]: ./fn.gzip_decode.html
7 //!
8 //! # Examples
9 //!
10 //! Decompressing full input buffer into sufficiently large output buffer:
11 //! ```
12 //! # use nihav_core::compr::DecompressError;
13 //! use nihav_core::compr::deflate::Inflate;
14 //!
15 //! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
16 //! # let mut output_buffer = [0u8; 16];
17 //! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
18 //! # Ok(())
19 //! # }
20 //! ```
21 //!
22 //! Decompressing input chunks into portions of output:
23 //! ```
24 //! use nihav_core::compr::DecompressError;
25 //! use nihav_core::compr::deflate::Inflate;
26 //!
27 //! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
28 //! let mut inflate = Inflate::new();
29 //! let mut dst_buf: Vec<u8> = Vec::new();
30 //! let mut output_chunk = [0u8; 1024];
31 //! for src in input_data.chunks(512) {
32 //! let mut repeat = false;
33 //! loop {
34 //! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
35 //! match ret {
36 //! Ok(len) => { // we got a buffer decoded successfully to the end
37 //! dst_buf.extend_from_slice(&output_chunk[..len]);
38 //! break;
39 //! },
40 //! Err(DecompressError::ShortData) => { // this block of data was fully read
41 //! break;
42 //! },
43 //! Err(DecompressError::OutputFull) => {
44 //! // the output buffer is full, flush it and continue decoding the same block
45 //! repeat = true;
46 //! dst_buf.extend_from_slice(&output_chunk);
47 //! },
48 //! Err(err) => {
49 //! return Err(err);
50 //! },
51 //! }
52 //! }
53 //! }
54 //! # Ok(())
55 //! # }
56 //! ```
57
58 use crate::io::byteio::*;
59 use crate::io::bitreader::*;
60 use crate::io::codebook::*;
61 use super::*;
62
63 const NUM_LITERALS: usize = 287;
64 const NUM_DISTS: usize = 32;
65
66 struct FixedLenCodeReader {}
67
68 impl CodebookDescReader<u16> for FixedLenCodeReader {
69 fn bits(&mut self, idx: usize) -> u8 {
70 if idx < 144 { 8 }
71 else if idx < 256 { 9 }
72 else if idx < 280 { 7 }
73 else { 8 }
74 }
75 #[allow(clippy::identity_op)]
76 fn code(&mut self, idx: usize) -> u32 {
77 let base = idx as u32;
78 let bits = self.bits(idx);
79 if idx < 144 { reverse_bits(base + 0x30, bits) }
80 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
81 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
82 else { reverse_bits(base + 0xC0 - 280, bits) }
83 }
84 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
85 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
86 }
87
88 #[derive(Clone,Copy,Default)]
89 struct BitReaderState {
90 pos: usize,
91 bitbuf: u32,
92 bits: u8,
93 }
94
95 struct CurrentSource<'a> {
96 src: &'a [u8],
97 br: BitReaderState,
98 }
99
100 impl<'a> CurrentSource<'a> {
101 fn new(src: &'a [u8], br: BitReaderState) -> Self {
102 let mut newsrc = Self { src, br };
103 newsrc.br.pos = 0;
104 newsrc.refill();
105 newsrc
106 }
107 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
108 let mut newsrc = Self { src, br };
109 newsrc.refill();
110 newsrc
111 }
112 fn refill(&mut self) {
113 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
114 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
115 self.br.bits += 8;
116 self.br.pos += 1;
117 }
118 }
119 fn skip_cache(&mut self, nbits: u8) {
120 self.br.bitbuf >>= nbits;
121 self.br.bits -= nbits;
122 }
123 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
124 if nbits == 0 { return Ok(0); }
125 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
126 if self.br.bits < nbits {
127 self.refill();
128 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
129 }
130 let ret = self.br.bitbuf & ((1 << nbits) - 1);
131 self.skip_cache(nbits);
132 Ok(ret)
133 }
134 fn read_bool(&mut self) -> BitReaderResult<bool> {
135 if self.br.bits == 0 {
136 self.refill();
137 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
138 }
139 let ret = (self.br.bitbuf & 1) != 0;
140 self.skip_cache(1);
141 Ok(ret)
142 }
143 fn peek(&mut self, nbits: u8) -> u32 {
144 if nbits == 0 || nbits > 16 { return 0; }
145 if self.br.bits < nbits {
146 self.refill();
147 }
148 self.br.bitbuf & ((1 << nbits) - 1)
149 }
150 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
151 if u32::from(self.br.bits) >= nbits {
152 self.skip_cache(nbits as u8);
153 } else {
154 unreachable!();
155 }
156 Ok(())
157 }
158 fn align(&mut self) {
159 let b = self.br.bits & 7;
160 if b != 0 {
161 self.skip_cache(8 - (b as u8));
162 }
163 }
164 fn left(&self) -> isize {
165 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
166 }
167 }
168
169 impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
170 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
171 let mut esc = true;
172 let mut idx = 0;
173 let mut lut_bits = cb.lut_bits;
174 let orig_br = self.br;
175 while esc {
176 let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize);
177 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
178 let bits = cb.table[lut_idx] & 0x7F;
179 esc = (cb.table[lut_idx] & 0x80) != 0;
180 idx = (cb.table[lut_idx] >> 8) as usize;
181 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
182 if (skip_bits as isize) > self.left() {
183 self.br = orig_br;
184 self.refill();
185 return Err(CodebookError::MemoryError);
186 }
187 self.skip(skip_bits as u32).unwrap();
188 lut_bits = bits as u8;
189 }
190 Ok(cb.syms[idx])
191 }
192 }
193
194 enum InflateState {
195 Start,
196 BlockStart,
197 BlockMode,
198 StaticBlockLen,
199 StaticBlockInvLen(u32),
200 StaticBlockCopy(usize),
201 FixedBlock,
202 FixedBlockLengthExt(usize, u8),
203 FixedBlockDist(usize),
204 FixedBlockDistExt(usize, usize, u8),
205 FixedBlockCopy(usize, usize),
206 FixedBlockLiteral(u8),
207 DynBlockHlit,
208 DynBlockHdist,
209 DynBlockHclen,
210 DynLengths(usize),
211 DynCodeLengths,
212 DynCodeLengthsAdd(usize),
213 DynBlock,
214 DynBlockLengthExt(usize, u8),
215 DynBlockDist(usize),
216 DynBlockDistExt(usize, usize, u8),
217 DynCopy(usize, usize),
218 DynBlockLiteral(u8),
219 End,
220 }
221
222 ///! The decompressor for deflated streams (RFC 1951).
223 pub struct Inflate {
224 br: BitReaderState,
225 fix_len_cb: Codebook<u16>,
226
227 buf: [u8; 65536],
228 bpos: usize,
229 output_idx: usize,
230
231 state: InflateState,
232 final_block: bool,
233 hlit: usize,
234 hdist: usize,
235 dyn_len_cb: Option<Codebook<u32>>,
236 dyn_lit_cb: Option<Codebook<u32>>,
237 dyn_dist_cb: Option<Codebook<u32>>,
238 len_lengths: [u8; 19],
239 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
240 cur_len_idx: usize,
241 }
242
243 const LENGTH_ADD_BITS: [u8; 29] = [
244 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
245 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
246 4, 4, 4, 4, 5, 5, 5, 5, 0
247 ];
248 const LENGTH_BASE: [u16; 29] = [
249 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
250 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
251 67, 83, 99, 115, 131, 163, 195, 227, 258
252 ];
253 const DIST_ADD_BITS: [u8; 30] = [
254 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
255 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
256 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
257 ];
258 const DIST_BASE: [u16; 30] = [
259 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
260 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
261 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
262 ];
263 const LEN_RECODE: [usize; 19] = [
264 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
265 ];
266 const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
267 const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
268
269 macro_rules! read_bits {
270 ($self: expr, $csrc: expr, $bits: expr) => ({
271 if $csrc.left() < $bits as isize {
272 $self.br = $csrc.br;
273 return Err(DecompressError::ShortData);
274 }
275 $csrc.read($bits).unwrap()
276 })
277 }
278
279 macro_rules! read_cb {
280 ($self: expr, $csrc: expr, $cb: expr) => ({
281 let ret = $csrc.read_cb($cb);
282 if let Err(CodebookError::MemoryError) = ret {
283 $self.br = $csrc.br;
284 return Err(DecompressError::ShortData);
285 }
286 match ret {
287 Ok(val) => val,
288 Err(_) => {
289 $self.state = InflateState::End;
290 return Err(DecompressError::InvalidData);
291 },
292 }
293 })
294 }
295
296 impl Inflate {
297 ///! Creates a new instance of `Inflate` struct.
298 pub fn new() -> Self {
299 let mut cr = FixedLenCodeReader {};
300 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
301 Self {
302 br: BitReaderState::default(),
303 fix_len_cb,
304
305 buf: [0; 65536],
306 bpos: 0,
307 output_idx: 0,
308
309 state: InflateState::Start,
310 final_block: false,
311 dyn_len_cb: None,
312 dyn_lit_cb: None,
313 dyn_dist_cb: None,
314 hlit: 0,
315 hdist: 0,
316 len_lengths: [0; 19],
317 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
318 cur_len_idx: 0,
319 }
320 }
321 fn put_literal(&mut self, val: u8) {
322 self.buf[self.bpos] = val;
323 self.bpos += 1;
324 }
325 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
326 let mask = self.buf.len() - 1;
327 if self.bpos < offset {
328 return Err(DecompressError::InvalidData);
329 }
330 let cstart = (self.bpos - offset) & mask;
331 for i in 0..len {
332 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
333 dst[i] = self.buf[(cstart + i) & mask];
334 }
335 self.bpos += len;
336 Ok(())
337 }
338 ///! Reports whether decoder has finished decoding the input.
339 pub fn is_finished(&self) -> bool {
340 match self.state {
341 InflateState::End => true,
342 _ => false,
343 }
344 }
345 ///! Reports the current amount of bytes output into the destination buffer after the last run.
346 pub fn get_current_output_size(&self) -> usize { self.output_idx }
347 ///! Reports the total amount of bytes decoded so far.
348 pub fn get_total_output_size(&self) -> usize { self.bpos }
349 ///! Tries to decompress input data and write it to the output buffer.
350 ///!
351 ///! Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
352 ///! * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
353 ///! * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
354 ///! * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
355 ///!
356 ///! [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
357 ///! [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
358 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
359 if src.is_empty() || dst.is_empty() {
360 return Err(DecompressError::InvalidArgument);
361 }
362 let mut csrc = if !continue_block {
363 CurrentSource::new(src, self.br)
364 } else {
365 self.output_idx = 0;
366 CurrentSource::reinit(src, self.br)
367 };
368 'main: loop {
369 match self.state {
370 InflateState::Start | InflateState::BlockStart => {
371 if csrc.left() == 0 {
372 self.br = csrc.br;
373 return Err(DecompressError::ShortData);
374 }
375 self.final_block = csrc.read_bool().unwrap();
376 self.state = InflateState::BlockMode;
377 },
378 InflateState::BlockMode => {
379 let bmode = read_bits!(self, csrc, 2);
380 match bmode {
381 0 => {
382 csrc.align();
383 self.state = InflateState::StaticBlockLen;
384 },
385 1 => { self.state = InflateState::FixedBlock; },
386 2 => { self.state = InflateState::DynBlockHlit; },
387 _ => {
388 self.state = InflateState::End;
389 return Err(DecompressError::InvalidHeader);
390 },
391 };
392 },
393 InflateState::StaticBlockLen => {
394 let len = read_bits!(self, csrc, 16);
395 self.state = InflateState::StaticBlockInvLen(len);
396 },
397 InflateState::StaticBlockInvLen(len) => {
398 let inv_len = read_bits!(self, csrc, 16);
399 if len != !inv_len {
400 self.state = InflateState::End;
401 return Err(DecompressError::InvalidHeader);
402 }
403 self.state = InflateState::StaticBlockCopy(len as usize);
404 },
405 InflateState::StaticBlockCopy(len) => {
406 for i in 0..len {
407 if csrc.left() < 8 {
408 self.br = csrc.br;
409 self.state = InflateState::StaticBlockCopy(len - i);
410 return Err(DecompressError::ShortData);
411 }
412 let val = csrc.read(8).unwrap() as u8;
413 self.put_literal(val);
414 }
415 self.state = InflateState::BlockStart;
416 }
417 InflateState::FixedBlock => {
418 let val = read_cb!(self, csrc, &self.fix_len_cb);
419 if val < 256 {
420 if self.output_idx >= dst.len() {
421 self.br = csrc.br;
422 self.state = InflateState::FixedBlockLiteral(val as u8);
423 return Err(DecompressError::OutputFull);
424 }
425 self.put_literal(val as u8);
426 dst[self.output_idx] = val as u8;
427 self.output_idx += 1;
428 } else if val == 256 {
429 if self.final_block {
430 self.state = InflateState::End;
431 return Ok(self.output_idx);
432 } else {
433 self.state = InflateState::BlockStart;
434 }
435 } else {
436 let len_idx = (val - 257) as usize;
437 if len_idx >= LENGTH_BASE.len() {
438 self.state = InflateState::End;
439 return Err(DecompressError::InvalidData);
440 }
441 let len_bits = LENGTH_ADD_BITS[len_idx];
442 let add_base = LENGTH_BASE[len_idx] as usize;
443 if len_bits > 0 {
444 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
445 } else {
446 self.state = InflateState::FixedBlockDist(add_base);
447 }
448 }
449 },
450 InflateState::FixedBlockLiteral(sym) => {
451 if self.output_idx >= dst.len() {
452 self.br = csrc.br;
453 return Err(DecompressError::OutputFull);
454 }
455 self.put_literal(sym);
456 dst[self.output_idx] = sym;
457 self.output_idx += 1;
458 self.state = InflateState::FixedBlock;
459 },
460 InflateState::FixedBlockLengthExt(base, bits) => {
461 let add = read_bits!(self, csrc, bits) as usize;
462 self.state = InflateState::FixedBlockDist(base + add);
463 },
464 InflateState::FixedBlockDist(length) => {
465 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
466 if dist_idx >= DIST_BASE.len() {
467 self.state = InflateState::End;
468 return Err(DecompressError::InvalidData);
469 }
470 let dist_bits = DIST_ADD_BITS[dist_idx];
471 let dist_base = DIST_BASE[dist_idx] as usize;
472 if dist_bits == 0 {
473 self.state = InflateState::FixedBlockCopy(length, dist_base);
474 } else {
475 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
476 }
477 },
478 InflateState::FixedBlockDistExt(length, base, bits) => {
479 let add = read_bits!(self, csrc, bits) as usize;
480 self.state = InflateState::FixedBlockCopy(length, base + add);
481 },
482 InflateState::FixedBlockCopy(length, dist) => {
483 if self.output_idx + length > dst.len() {
484 let copy_size = dst.len() - self.output_idx;
485 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
486 if ret.is_err() {
487 self.state = InflateState::End;
488 return Err(DecompressError::InvalidData);
489 }
490 self.output_idx += copy_size;
491 self.br = csrc.br;
492 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
493 return Err(DecompressError::OutputFull);
494 }
495 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
496 if ret.is_err() {
497 self.state = InflateState::End;
498 return Err(DecompressError::InvalidData);
499 }
500 self.output_idx += length;
501 self.state = InflateState::FixedBlock;
502 }
503 InflateState::DynBlockHlit => {
504 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
505 if self.hlit >= 287 {
506 self.state = InflateState::End;
507 return Err(DecompressError::InvalidHeader);
508 }
509 self.state = InflateState::DynBlockHdist;
510 }
511 InflateState::DynBlockHdist => {
512 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
513 self.state = InflateState::DynBlockHclen;
514 },
515 InflateState::DynBlockHclen => {
516 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
517 self.cur_len_idx = 0;
518 self.len_lengths = [0; 19];
519 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
520 self.state = InflateState::DynLengths(hclen);
521 },
522 InflateState::DynLengths(len) => {
523 for i in 0..len {
524 if csrc.left() < 3 {
525 self.br = csrc.br;
526 self.state = InflateState::DynLengths(len - i);
527 return Err(DecompressError::ShortData);
528 }
529 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
530 self.cur_len_idx += 1;
531 }
532 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
533 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
534 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
535 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
536 if ret.is_err() {
537 self.state = InflateState::End;
538 return Err(DecompressError::InvalidHeader);
539 }
540 self.dyn_len_cb = Some(ret.unwrap());
541 self.cur_len_idx = 0;
542 self.state = InflateState::DynCodeLengths;
543 },
544 InflateState::DynCodeLengths => {
545 if let Some(ref len_cb) = self.dyn_len_cb {
546 while self.cur_len_idx < self.hlit + self.hdist {
547 let ret = csrc.read_cb(len_cb);
548 let val = match ret {
549 Ok(val) => val,
550 Err(CodebookError::MemoryError) => {
551 self.br = csrc.br;
552 return Err(DecompressError::ShortData);
553 },
554 Err(_) => {
555 self.state = InflateState::End;
556 return Err(DecompressError::InvalidHeader);
557 },
558 };
559 if val < 16 {
560 self.all_lengths[self.cur_len_idx] = val as u8;
561 self.cur_len_idx += 1;
562 } else {
563 let idx = (val as usize) - 16;
564 if idx > 2 {
565 self.state = InflateState::End;
566 return Err(DecompressError::InvalidHeader);
567 }
568 self.state = InflateState::DynCodeLengthsAdd(idx);
569 continue 'main;
570 }
571 }
572 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
573
574 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
575 lengths_to_codes(&lit_lengths, &mut lit_codes)?;
576 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
577 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
578 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
579 self.dyn_lit_cb = Some(ret.unwrap());
580
581 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
582 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
583 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
584 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
585 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
586 self.dyn_dist_cb = Some(ret.unwrap());
587
588 self.state = InflateState::DynBlock;
589 } else {
590 unreachable!();
591 }
592 },
593 InflateState::DynCodeLengthsAdd(mode) => {
594 let base = REPEAT_BASE[mode] as usize;
595 let bits = REPEAT_BITS[mode];
596 let len = base + read_bits!(self, csrc, bits) as usize;
597 if self.cur_len_idx + len > self.hlit + self.hdist {
598 self.state = InflateState::End;
599 return Err(DecompressError::InvalidHeader);
600 }
601 let rpt = if mode == 0 {
602 if self.cur_len_idx == 0 {
603 self.state = InflateState::End;
604 return Err(DecompressError::InvalidHeader);
605 }
606 self.all_lengths[self.cur_len_idx - 1]
607 } else {
608 0
609 };
610 for _ in 0..len {
611 self.all_lengths[self.cur_len_idx] = rpt;
612 self.cur_len_idx += 1;
613 }
614 self.state = InflateState::DynCodeLengths;
615 },
616 InflateState::DynBlock => {
617 if let Some(ref lit_cb) = self.dyn_lit_cb {
618 let val = read_cb!(self, csrc, lit_cb);
619 if val < 256 {
620 if self.output_idx >= dst.len() {
621 self.br = csrc.br;
622 self.state = InflateState::DynBlockLiteral(val as u8);
623 return Err(DecompressError::OutputFull);
624 }
625 self.put_literal(val as u8);
626 dst[self.output_idx] = val as u8;
627 self.output_idx += 1;
628 } else if val == 256 {
629 if self.final_block {
630 self.state = InflateState::End;
631 return Ok(self.output_idx);
632 } else {
633 self.state = InflateState::BlockStart;
634 }
635 } else {
636 let len_idx = (val - 257) as usize;
637 if len_idx >= LENGTH_BASE.len() {
638 self.state = InflateState::End;
639 return Err(DecompressError::InvalidData);
640 }
641 let len_bits = LENGTH_ADD_BITS[len_idx];
642 let add_base = LENGTH_BASE[len_idx] as usize;
643 if len_bits > 0 {
644 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
645 } else {
646 self.state = InflateState::DynBlockDist(add_base);
647 }
648 }
649 } else {
650 unreachable!();
651 }
652 },
653 InflateState::DynBlockLiteral(sym) => {
654 if self.output_idx >= dst.len() {
655 self.br = csrc.br;
656 return Err(DecompressError::OutputFull);
657 }
658 self.put_literal(sym);
659 dst[self.output_idx] = sym;
660 self.output_idx += 1;
661 self.state = InflateState::DynBlock;
662 },
663 InflateState::DynBlockLengthExt(base, bits) => {
664 let add = read_bits!(self, csrc, bits) as usize;
665 self.state = InflateState::DynBlockDist(base + add);
666 },
667 InflateState::DynBlockDist(length) => {
668 if let Some(ref dist_cb) = self.dyn_dist_cb {
669 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
670 if dist_idx >= DIST_BASE.len() {
671 self.state = InflateState::End;
672 return Err(DecompressError::InvalidData);
673 }
674 let dist_bits = DIST_ADD_BITS[dist_idx];
675 let dist_base = DIST_BASE[dist_idx] as usize;
676 if dist_bits == 0 {
677 self.state = InflateState::DynCopy(length, dist_base);
678 } else {
679 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
680 }
681 } else {
682 unreachable!();
683 }
684 },
685 InflateState::DynBlockDistExt(length, base, bits) => {
686 let add = read_bits!(self, csrc, bits) as usize;
687 self.state = InflateState::DynCopy(length, base + add);
688 },
689 InflateState::DynCopy(length, dist) => {
690 if self.output_idx + length > dst.len() {
691 let copy_size = dst.len() - self.output_idx;
692 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
693 if ret.is_err() {
694 self.state = InflateState::End;
695 return Err(DecompressError::InvalidData);
696 }
697 self.output_idx += copy_size;
698 self.br = csrc.br;
699 self.state = InflateState::DynCopy(length - copy_size, dist);
700 return Err(DecompressError::OutputFull);
701 }
702 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
703 if ret.is_err() {
704 self.state = InflateState::End;
705 return Err(DecompressError::InvalidData);
706 }
707 self.output_idx += length;
708 self.state = InflateState::DynBlock;
709 }
710 InflateState::End => {
711 return Ok(0);
712 },
713 }
714 }
715 }
716 ///! Decompresses input data into output returning the uncompressed data length.
717 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
718 let mut inflate = Self::new();
719 inflate.decompress_data(src, dst, false)
720 }
721 }
722
723 impl Default for Inflate {
724 fn default() -> Self {
725 Self::new()
726 }
727 }
728
729 fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
730 let mut bits = [0u32; 32];
731 let mut pfx = [0u32; 33];
732 for len in lens.iter() {
733 let len = *len as usize;
734 if len >= bits.len() {
735 return Err(DecompressError::InvalidHeader);
736 }
737 bits[len] += 1;
738 }
739 bits[0] = 0;
740 let mut code = 0;
741 for i in 0..bits.len() {
742 code = (code + bits[i]) << 1;
743 pfx[i + 1] = code;
744 }
745
746 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
747 let len = *len as usize;
748 if len != 0 {
749 let bits = len as u8;
750 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
751 pfx[len] += 1;
752 } else {
753 *codes = ShortCodebookDesc { code: 0, bits: 0 };
754 }
755 }
756
757 Ok(())
758 }
759
760 struct GzipCRC32 {
761 tab: [u32; 256],
762 crc: u32,
763 }
764
765 impl GzipCRC32 {
766 #[allow(clippy::unreadable_literal)]
767 fn new() -> Self {
768 let mut tab = [0u32; 256];
769 for i in 0..256 {
770 let mut c = i as u32;
771 for _ in 0..8 {
772 if (c & 1) != 0 {
773 c = 0xEDB88320 ^ (c >> 1);
774 } else {
775 c >>= 1;
776 }
777 }
778 tab[i] = c;
779 }
780 Self { tab, crc: 0 }
781 }
782 fn update_crc(&mut self, src: &[u8]) {
783 let mut c = !self.crc;
784 for el in src.iter() {
785 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
786 }
787 self.crc = !c;
788 }
789 }
790
791 ///! Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
792 pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
793 const FLAG_HCRC: u8 = 0x02;
794 const FLAG_EXTRA: u8 = 0x04;
795 const FLAG_NAME: u8 = 0x08;
796 const FLAG_COMMENT: u8 = 0x10;
797
798 let id1 = br.read_byte()?;
799 let id2 = br.read_byte()?;
800 let cm = br.read_byte()?;
801 let flg = br.read_byte()?;
802 let _mtime = br.read_u32le()?;
803 let _xfl = br.read_byte()?;
804 let _os = br.read_byte()?;
805 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
806 return Err(DecompressError::InvalidHeader);
807 }
808
809 if (flg & FLAG_EXTRA) != 0 {
810 let xlen = br.read_u16le()? as usize;
811 br.read_skip(xlen)?;
812 }
813 if (flg & FLAG_NAME) != 0 {
814 loop {
815 let b = br.read_byte()?;
816 if b == 0 {
817 break;
818 }
819 }
820 }
821 if (flg & FLAG_COMMENT) != 0 {
822 loop {
823 let b = br.read_byte()?;
824 if b == 0 {
825 break;
826 }
827 }
828 }
829 let _hcrc = if (flg & FLAG_HCRC) != 0 {
830 br.read_u16le()?
831 } else {
832 0
833 };
834 if (flg & 0xE0) != 0 {
835 return Err(DecompressError::Unsupported);
836 }
837
838 let mut output: Vec<u8> = Vec::new();
839 let mut tail = [0u8; 8];
840 let mut inblk = [0u8; 1024];
841 let mut oblk = [0u8; 4096];
842 let mut inflate = Inflate::new();
843 let mut checker = GzipCRC32::new();
844
845 loop {
846 let ret = br.read_buf_some(&mut inblk);
847 if let Err(ByteIOError::EOF) = ret {
848 break;
849 }
850 let inlen = match ret {
851 Ok(val) => val,
852 Err(_) => return Err(DecompressError::IOError),
853 };
854 let mut repeat = false;
855 loop {
856 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
857 match ret {
858 Ok(outlen) => {
859 checker.update_crc(&oblk[..outlen]);
860 output.extend_from_slice(&oblk[..outlen]);
861 break;
862 },
863 Err(DecompressError::ShortData) => {
864 break;
865 },
866 Err(DecompressError::OutputFull) => {
867 repeat = true;
868 checker.update_crc(&oblk);
869 output.extend_from_slice(&oblk);
870 },
871 Err(err) => {
872 return Err(err);
873 },
874 }
875 }
876 // Save last 8 bytes for CRC and size.
877 if inlen >= 8 {
878 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
879 } else {
880 let shift_len = 8 - inlen;
881 for i in 0..shift_len {
882 tail[i] = tail[i + inlen];
883 }
884 for i in shift_len..8 {
885 tail[i] = inblk[i - shift_len];
886 }
887 }
888 }
889 if !skip_crc {
890 if !inflate.is_finished() { println!("???"); }
891 let crc = read_u32le(&tail[0..4])?;
892 let size = read_u32le(&tail[4..8])?;
893 if size != (output.len() as u32) {
894 return Err(DecompressError::CRCError);
895 }
896 if crc != checker.crc {
897 return Err(DecompressError::CRCError);
898 }
899 }
900
901 Ok(output)
902 }
903
904 #[cfg(test)]
905 mod test {
906 use super::*;
907
908 #[test]
909 fn test_inflate1() {
910 const TEST_DATA: &[u8] = &[
911 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
912 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
913 const TEST_REF: &[u8] = b"Hello, world!";
914 let mut dst_buf = [0u8; 13];
915 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
916 assert_eq!(len, 13);
917 for i in 0..len {
918 assert_eq!(dst_buf[i], TEST_REF[i]);
919 }
920 }
921 #[test]
922 fn test_inflate2() {
923 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
924 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
925 let mut dst_buf = [0u8; 39];
926
927 let mut inflate = Inflate::new();
928 let mut output_chunk = [0u8; 7];
929 let mut output_pos = 0;
930 for input in TEST_DATA3.chunks(3) {
931 let mut repeat = false;
932 loop {
933 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
934 match ret {
935 Ok(len) => {
936 for i in 0..len {
937 dst_buf[output_pos + i] = output_chunk[i];
938 }
939 output_pos += len;
940 break;
941 },
942 Err(DecompressError::ShortData) => {
943 break;
944 },
945 Err(DecompressError::OutputFull) => {
946 repeat = true;
947 for i in 0..output_chunk.len() {
948 dst_buf[output_pos + i] = output_chunk[i];
949 }
950 output_pos += output_chunk.len();
951 },
952 _ => {
953 panic!("decompress error {:?}", ret.err().unwrap());
954 },
955 }
956 }
957 }
958
959 assert_eq!(output_pos, dst_buf.len());
960 for i in 0..output_pos {
961 assert_eq!(dst_buf[i], TEST_REF3[i]);
962 }
963 }
964 #[test]
965 fn test_inflate3() {
966 const TEST_DATA: &[u8] = &[
967 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
968 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
969 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
970 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
971 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
972 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
973 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
974 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
975 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
976 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
977 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
978 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
979 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
980 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
981 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
982 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
983 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
984 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
985 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
986 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
987 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
988 ];
989
990 let mut mr = MemoryReader::new_read(TEST_DATA);
991 let mut br = ByteReader::new(&mut mr);
992 let _dst_buf = gzip_decode(&mut br, false).unwrap();
993
994 // println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
995 }
996 }