deflate: fix zlib stream magic detection
[nihav.git] / nihav-core / src / compr / deflate.rs
1 //! Deflate format (RFC 1951) support.
2 //!
3 //! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`].
4 //!
5 //! [`Inflate`]: ./struct.Inflate.html
6 //! [`gzip_decode`]: ./fn.gzip_decode.html
7 //!
8 //! # Examples
9 //!
10 //! Decompressing full input buffer into sufficiently large output buffer:
11 //! ```
12 //! # use nihav_core::compr::DecompressError;
13 //! use nihav_core::compr::deflate::Inflate;
14 //!
15 //! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
16 //! # let mut output_buffer = [0u8; 16];
17 //! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
18 //! # Ok(())
19 //! # }
20 //! ```
21 //!
22 //! Decompressing input chunks into portions of output:
23 //! ```
24 //! use nihav_core::compr::DecompressError;
25 //! use nihav_core::compr::deflate::Inflate;
26 //!
27 //! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
28 //! let mut inflate = Inflate::new();
29 //! let mut dst_buf: Vec<u8> = Vec::new();
30 //! let mut output_chunk = [0u8; 1024];
31 //! for src in input_data.chunks(512) {
32 //! let mut repeat = false;
33 //! loop {
34 //! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
35 //! match ret {
36 //! Ok(len) => { // we got a buffer decoded successfully to the end
37 //! dst_buf.extend_from_slice(&output_chunk[..len]);
38 //! break;
39 //! },
40 //! Err(DecompressError::ShortData) => { // this block of data was fully read
41 //! break;
42 //! },
43 //! Err(DecompressError::OutputFull) => {
44 //! // the output buffer is full, flush it and continue decoding the same block
45 //! repeat = true;
46 //! dst_buf.extend_from_slice(&output_chunk);
47 //! },
48 //! Err(err) => {
49 //! return Err(err);
50 //! },
51 //! }
52 //! }
53 //! }
54 //! # Ok(())
55 //! # }
56 //! ```
57
58 use crate::io::byteio::*;
59 use crate::io::bitreader::*;
60 use crate::io::codebook::*;
61 use super::*;
62
63 const NUM_LITERALS: usize = 287;
64 const NUM_DISTS: usize = 32;
65
66 struct FixedLenCodeReader {}
67
68 impl CodebookDescReader<u16> for FixedLenCodeReader {
69 fn bits(&mut self, idx: usize) -> u8 {
70 if idx < 144 { 8 }
71 else if idx < 256 { 9 }
72 else if idx < 280 { 7 }
73 else { 8 }
74 }
75 #[allow(clippy::identity_op)]
76 fn code(&mut self, idx: usize) -> u32 {
77 let base = idx as u32;
78 let bits = self.bits(idx);
79 if idx < 144 { reverse_bits(base + 0x30, bits) }
80 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
81 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
82 else { reverse_bits(base + 0xC0 - 280, bits) }
83 }
84 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
85 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
86 }
87
88 #[derive(Clone,Copy,Default)]
89 struct BitReaderState {
90 pos: usize,
91 bitbuf: u32,
92 bits: u8,
93 }
94
95 struct CurrentSource<'a> {
96 src: &'a [u8],
97 br: BitReaderState,
98 }
99
100 impl<'a> CurrentSource<'a> {
101 fn new(src: &'a [u8], br: BitReaderState) -> Self {
102 let mut newsrc = Self { src, br };
103 newsrc.br.pos = 0;
104 newsrc.refill();
105 newsrc
106 }
107 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
108 let mut newsrc = Self { src, br };
109 newsrc.refill();
110 newsrc
111 }
112 fn refill(&mut self) {
113 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
114 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
115 self.br.bits += 8;
116 self.br.pos += 1;
117 }
118 }
119 fn skip_cache(&mut self, nbits: u8) {
120 self.br.bitbuf >>= nbits;
121 self.br.bits -= nbits;
122 }
123 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
124 if nbits == 0 { return Ok(0); }
125 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
126 if self.br.bits < nbits {
127 self.refill();
128 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
129 }
130 let ret = self.br.bitbuf & ((1 << nbits) - 1);
131 self.skip_cache(nbits);
132 Ok(ret)
133 }
134 fn read_bool(&mut self) -> BitReaderResult<bool> {
135 if self.br.bits == 0 {
136 self.refill();
137 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
138 }
139 let ret = (self.br.bitbuf & 1) != 0;
140 self.skip_cache(1);
141 Ok(ret)
142 }
143 fn peek(&mut self, nbits: u8) -> u32 {
144 if nbits == 0 || nbits > 16 { return 0; }
145 if self.br.bits < nbits {
146 self.refill();
147 }
148 self.br.bitbuf & ((1 << nbits) - 1)
149 }
150 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
151 if u32::from(self.br.bits) >= nbits {
152 self.skip_cache(nbits as u8);
153 } else {
154 unreachable!();
155 }
156 Ok(())
157 }
158 fn align(&mut self) {
159 let b = self.br.bits & 7;
160 if b != 0 {
161 self.skip_cache(8 - (b as u8));
162 }
163 }
164 fn left(&self) -> isize {
165 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
166 }
167 }
168
169 impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
170 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
171 let mut esc = true;
172 let mut idx = 0;
173 let mut lut_bits = cb.lut_bits;
174 let orig_br = self.br;
175 while esc {
176 let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize);
177 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
178 let bits = cb.table[lut_idx] & 0x7F;
179 esc = (cb.table[lut_idx] & 0x80) != 0;
180 idx = (cb.table[lut_idx] >> 8) as usize;
181 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
182 if (skip_bits as isize) > self.left() {
183 self.br = orig_br;
184 self.refill();
185 return Err(CodebookError::MemoryError);
186 }
187 self.skip(skip_bits as u32).unwrap();
188 lut_bits = bits as u8;
189 }
190 Ok(cb.syms[idx])
191 }
192 }
193
194 enum InflateState {
195 Start,
196 BlockStart,
197 BlockMode,
198 StaticBlockLen,
199 StaticBlockInvLen(u32),
200 StaticBlockCopy(usize),
201 FixedBlock,
202 FixedBlockLengthExt(usize, u8),
203 FixedBlockDist(usize),
204 FixedBlockDistExt(usize, usize, u8),
205 FixedBlockCopy(usize, usize),
206 FixedBlockLiteral(u8),
207 DynBlockHlit,
208 DynBlockHdist,
209 DynBlockHclen,
210 DynLengths(usize),
211 DynCodeLengths,
212 DynCodeLengthsAdd(usize),
213 DynBlock,
214 DynBlockLengthExt(usize, u8),
215 DynBlockDist(usize),
216 DynBlockDistExt(usize, usize, u8),
217 DynCopy(usize, usize),
218 DynBlockLiteral(u8),
219 End,
220 }
221
222 ///! The decompressor for deflated streams (RFC 1951).
223 pub struct Inflate {
224 br: BitReaderState,
225 fix_len_cb: Codebook<u16>,
226
227 buf: [u8; 65536],
228 bpos: usize,
229 output_idx: usize,
230 full_pos: usize,
231
232 state: InflateState,
233 final_block: bool,
234 hlit: usize,
235 hdist: usize,
236 dyn_len_cb: Option<Codebook<u32>>,
237 dyn_lit_cb: Option<Codebook<u32>>,
238 dyn_dist_cb: Option<Codebook<u32>>,
239 len_lengths: [u8; 19],
240 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
241 cur_len_idx: usize,
242 }
243
244 const LENGTH_ADD_BITS: [u8; 29] = [
245 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
246 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
247 4, 4, 4, 4, 5, 5, 5, 5, 0
248 ];
249 const LENGTH_BASE: [u16; 29] = [
250 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
251 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
252 67, 83, 99, 115, 131, 163, 195, 227, 258
253 ];
254 const DIST_ADD_BITS: [u8; 30] = [
255 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
256 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
257 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
258 ];
259 const DIST_BASE: [u16; 30] = [
260 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
261 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
262 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
263 ];
264 const LEN_RECODE: [usize; 19] = [
265 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
266 ];
267 const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
268 const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
269
270 macro_rules! read_bits {
271 ($self: expr, $csrc: expr, $bits: expr) => ({
272 if $csrc.left() < $bits as isize {
273 $self.br = $csrc.br;
274 return Err(DecompressError::ShortData);
275 }
276 $csrc.read($bits).unwrap()
277 })
278 }
279
280 macro_rules! read_cb {
281 ($self: expr, $csrc: expr, $cb: expr) => ({
282 let ret = $csrc.read_cb($cb);
283 if let Err(CodebookError::MemoryError) = ret {
284 $self.br = $csrc.br;
285 return Err(DecompressError::ShortData);
286 }
287 match ret {
288 Ok(val) => val,
289 Err(_) => {
290 $self.state = InflateState::End;
291 return Err(DecompressError::InvalidData);
292 },
293 }
294 })
295 }
296
297 impl Inflate {
298 ///! Creates a new instance of `Inflate` struct.
299 pub fn new() -> Self {
300 let mut cr = FixedLenCodeReader {};
301 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
302 Self {
303 br: BitReaderState::default(),
304 fix_len_cb,
305
306 buf: [0; 65536],
307 bpos: 0,
308 output_idx: 0,
309 full_pos: 0,
310
311 state: InflateState::Start,
312 final_block: false,
313 dyn_len_cb: None,
314 dyn_lit_cb: None,
315 dyn_dist_cb: None,
316 hlit: 0,
317 hdist: 0,
318 len_lengths: [0; 19],
319 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
320 cur_len_idx: 0,
321 }
322 }
323 fn put_literal(&mut self, val: u8) {
324 self.buf[self.bpos] = val;
325 self.bpos = (self.bpos + 1) & (self.buf.len() - 1);
326 self.full_pos += 1;
327 }
328 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
329 let mask = self.buf.len() - 1;
330 if offset > self.full_pos {
331 return Err(DecompressError::InvalidData);
332 }
333 let cstart = (self.bpos.wrapping_sub(offset)) & mask;
334 for i in 0..len {
335 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
336 dst[i] = self.buf[(cstart + i) & mask];
337 }
338 self.bpos = (self.bpos + len) & mask;
339 self.full_pos += len;
340 Ok(())
341 }
342 ///! Reports whether decoder has finished decoding the input.
343 pub fn is_finished(&self) -> bool {
344 match self.state {
345 InflateState::End => true,
346 _ => false,
347 }
348 }
349 ///! Reports the current amount of bytes output into the destination buffer after the last run.
350 pub fn get_current_output_size(&self) -> usize { self.output_idx }
351 ///! Reports the total amount of bytes decoded so far.
352 pub fn get_total_output_size(&self) -> usize { self.bpos }
353 ///! Tries to decompress input data and write it to the output buffer.
354 ///!
355 ///! Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
356 ///! * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
357 ///! * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
358 ///! * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
359 ///!
360 ///! [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
361 ///! [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
362 #[allow(clippy::comparison_chain)]
363 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
364 if src.is_empty() || dst.is_empty() {
365 return Err(DecompressError::InvalidArgument);
366 }
367 let mut csrc = if !continue_block {
368 CurrentSource::new(src, self.br)
369 } else {
370 self.output_idx = 0;
371 CurrentSource::reinit(src, self.br)
372 };
373 'main: loop {
374 match self.state {
375 InflateState::Start | InflateState::BlockStart => {
376 if csrc.left() == 0 {
377 self.br = csrc.br;
378 return Err(DecompressError::ShortData);
379 }
380 self.final_block = csrc.read_bool().unwrap();
381 self.state = InflateState::BlockMode;
382 },
383 InflateState::BlockMode => {
384 let bmode = read_bits!(self, csrc, 2);
385 match bmode {
386 0 => {
387 csrc.align();
388 self.state = InflateState::StaticBlockLen;
389 },
390 1 => { self.state = InflateState::FixedBlock; },
391 2 => { self.state = InflateState::DynBlockHlit; },
392 _ => {
393 self.state = InflateState::End;
394 return Err(DecompressError::InvalidHeader);
395 },
396 };
397 },
398 InflateState::StaticBlockLen => {
399 let len = read_bits!(self, csrc, 16);
400 self.state = InflateState::StaticBlockInvLen(len);
401 },
402 InflateState::StaticBlockInvLen(len) => {
403 let inv_len = read_bits!(self, csrc, 16);
404 if len != !inv_len {
405 self.state = InflateState::End;
406 return Err(DecompressError::InvalidHeader);
407 }
408 self.state = InflateState::StaticBlockCopy(len as usize);
409 },
410 InflateState::StaticBlockCopy(len) => {
411 for i in 0..len {
412 if csrc.left() < 8 {
413 self.br = csrc.br;
414 self.state = InflateState::StaticBlockCopy(len - i);
415 return Err(DecompressError::ShortData);
416 }
417 let val = csrc.read(8).unwrap() as u8;
418 self.put_literal(val);
419 }
420 self.state = InflateState::BlockStart;
421 }
422 InflateState::FixedBlock => {
423 let val = read_cb!(self, csrc, &self.fix_len_cb);
424 if val < 256 {
425 if self.output_idx >= dst.len() {
426 self.br = csrc.br;
427 self.state = InflateState::FixedBlockLiteral(val as u8);
428 return Err(DecompressError::OutputFull);
429 }
430 self.put_literal(val as u8);
431 dst[self.output_idx] = val as u8;
432 self.output_idx += 1;
433 } else if val == 256 {
434 if self.final_block {
435 self.state = InflateState::End;
436 return Ok(self.output_idx);
437 } else {
438 self.state = InflateState::BlockStart;
439 }
440 } else {
441 let len_idx = (val - 257) as usize;
442 if len_idx >= LENGTH_BASE.len() {
443 self.state = InflateState::End;
444 return Err(DecompressError::InvalidData);
445 }
446 let len_bits = LENGTH_ADD_BITS[len_idx];
447 let add_base = LENGTH_BASE[len_idx] as usize;
448 if len_bits > 0 {
449 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
450 } else {
451 self.state = InflateState::FixedBlockDist(add_base);
452 }
453 }
454 },
455 InflateState::FixedBlockLiteral(sym) => {
456 if self.output_idx >= dst.len() {
457 self.br = csrc.br;
458 return Err(DecompressError::OutputFull);
459 }
460 self.put_literal(sym);
461 dst[self.output_idx] = sym;
462 self.output_idx += 1;
463 self.state = InflateState::FixedBlock;
464 },
465 InflateState::FixedBlockLengthExt(base, bits) => {
466 let add = read_bits!(self, csrc, bits) as usize;
467 self.state = InflateState::FixedBlockDist(base + add);
468 },
469 InflateState::FixedBlockDist(length) => {
470 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
471 if dist_idx >= DIST_BASE.len() {
472 self.state = InflateState::End;
473 return Err(DecompressError::InvalidData);
474 }
475 let dist_bits = DIST_ADD_BITS[dist_idx];
476 let dist_base = DIST_BASE[dist_idx] as usize;
477 if dist_bits == 0 {
478 self.state = InflateState::FixedBlockCopy(length, dist_base);
479 } else {
480 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
481 }
482 },
483 InflateState::FixedBlockDistExt(length, base, bits) => {
484 let add = read_bits!(self, csrc, bits) as usize;
485 self.state = InflateState::FixedBlockCopy(length, base + add);
486 },
487 InflateState::FixedBlockCopy(length, dist) => {
488 if self.output_idx + length > dst.len() {
489 let copy_size = dst.len() - self.output_idx;
490 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
491 if ret.is_err() {
492 self.state = InflateState::End;
493 return Err(DecompressError::InvalidData);
494 }
495 self.output_idx += copy_size;
496 self.br = csrc.br;
497 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
498 return Err(DecompressError::OutputFull);
499 }
500 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
501 if ret.is_err() {
502 self.state = InflateState::End;
503 return Err(DecompressError::InvalidData);
504 }
505 self.output_idx += length;
506 self.state = InflateState::FixedBlock;
507 }
508 InflateState::DynBlockHlit => {
509 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
510 if self.hlit >= 287 {
511 self.state = InflateState::End;
512 return Err(DecompressError::InvalidHeader);
513 }
514 self.state = InflateState::DynBlockHdist;
515 }
516 InflateState::DynBlockHdist => {
517 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
518 self.state = InflateState::DynBlockHclen;
519 },
520 InflateState::DynBlockHclen => {
521 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
522 self.cur_len_idx = 0;
523 self.len_lengths = [0; 19];
524 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
525 self.state = InflateState::DynLengths(hclen);
526 },
527 InflateState::DynLengths(len) => {
528 for i in 0..len {
529 if csrc.left() < 3 {
530 self.br = csrc.br;
531 self.state = InflateState::DynLengths(len - i);
532 return Err(DecompressError::ShortData);
533 }
534 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
535 self.cur_len_idx += 1;
536 }
537 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
538 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
539 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
540 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
541 if ret.is_err() {
542 self.state = InflateState::End;
543 return Err(DecompressError::InvalidHeader);
544 }
545 self.dyn_len_cb = Some(ret.unwrap());
546 self.cur_len_idx = 0;
547 self.state = InflateState::DynCodeLengths;
548 },
549 InflateState::DynCodeLengths => {
550 if let Some(ref len_cb) = self.dyn_len_cb {
551 while self.cur_len_idx < self.hlit + self.hdist {
552 let ret = csrc.read_cb(len_cb);
553 let val = match ret {
554 Ok(val) => val,
555 Err(CodebookError::MemoryError) => {
556 self.br = csrc.br;
557 return Err(DecompressError::ShortData);
558 },
559 Err(_) => {
560 self.state = InflateState::End;
561 return Err(DecompressError::InvalidHeader);
562 },
563 };
564 if val < 16 {
565 self.all_lengths[self.cur_len_idx] = val as u8;
566 self.cur_len_idx += 1;
567 } else {
568 let idx = (val as usize) - 16;
569 if idx > 2 {
570 self.state = InflateState::End;
571 return Err(DecompressError::InvalidHeader);
572 }
573 self.state = InflateState::DynCodeLengthsAdd(idx);
574 continue 'main;
575 }
576 }
577 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
578
579 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
580 lengths_to_codes(&lit_lengths, &mut lit_codes)?;
581 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
582 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
583 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
584 self.dyn_lit_cb = Some(ret.unwrap());
585
586 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
587 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
588 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
589 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
590 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
591 self.dyn_dist_cb = Some(ret.unwrap());
592
593 self.state = InflateState::DynBlock;
594 } else {
595 unreachable!();
596 }
597 },
598 InflateState::DynCodeLengthsAdd(mode) => {
599 let base = REPEAT_BASE[mode] as usize;
600 let bits = REPEAT_BITS[mode];
601 let len = base + read_bits!(self, csrc, bits) as usize;
602 if self.cur_len_idx + len > self.hlit + self.hdist {
603 self.state = InflateState::End;
604 return Err(DecompressError::InvalidHeader);
605 }
606 let rpt = if mode == 0 {
607 if self.cur_len_idx == 0 {
608 self.state = InflateState::End;
609 return Err(DecompressError::InvalidHeader);
610 }
611 self.all_lengths[self.cur_len_idx - 1]
612 } else {
613 0
614 };
615 for _ in 0..len {
616 self.all_lengths[self.cur_len_idx] = rpt;
617 self.cur_len_idx += 1;
618 }
619 self.state = InflateState::DynCodeLengths;
620 },
621 InflateState::DynBlock => {
622 if let Some(ref lit_cb) = self.dyn_lit_cb {
623 let val = read_cb!(self, csrc, lit_cb);
624 if val < 256 {
625 if self.output_idx >= dst.len() {
626 self.br = csrc.br;
627 self.state = InflateState::DynBlockLiteral(val as u8);
628 return Err(DecompressError::OutputFull);
629 }
630 self.put_literal(val as u8);
631 dst[self.output_idx] = val as u8;
632 self.output_idx += 1;
633 } else if val == 256 {
634 if self.final_block {
635 self.state = InflateState::End;
636 return Ok(self.output_idx);
637 } else {
638 self.state = InflateState::BlockStart;
639 }
640 } else {
641 let len_idx = (val - 257) as usize;
642 if len_idx >= LENGTH_BASE.len() {
643 self.state = InflateState::End;
644 return Err(DecompressError::InvalidData);
645 }
646 let len_bits = LENGTH_ADD_BITS[len_idx];
647 let add_base = LENGTH_BASE[len_idx] as usize;
648 if len_bits > 0 {
649 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
650 } else {
651 self.state = InflateState::DynBlockDist(add_base);
652 }
653 }
654 } else {
655 unreachable!();
656 }
657 },
658 InflateState::DynBlockLiteral(sym) => {
659 if self.output_idx >= dst.len() {
660 self.br = csrc.br;
661 return Err(DecompressError::OutputFull);
662 }
663 self.put_literal(sym);
664 dst[self.output_idx] = sym;
665 self.output_idx += 1;
666 self.state = InflateState::DynBlock;
667 },
668 InflateState::DynBlockLengthExt(base, bits) => {
669 let add = read_bits!(self, csrc, bits) as usize;
670 self.state = InflateState::DynBlockDist(base + add);
671 },
672 InflateState::DynBlockDist(length) => {
673 if let Some(ref dist_cb) = self.dyn_dist_cb {
674 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
675 if dist_idx >= DIST_BASE.len() {
676 self.state = InflateState::End;
677 return Err(DecompressError::InvalidData);
678 }
679 let dist_bits = DIST_ADD_BITS[dist_idx];
680 let dist_base = DIST_BASE[dist_idx] as usize;
681 if dist_bits == 0 {
682 self.state = InflateState::DynCopy(length, dist_base);
683 } else {
684 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
685 }
686 } else {
687 unreachable!();
688 }
689 },
690 InflateState::DynBlockDistExt(length, base, bits) => {
691 let add = read_bits!(self, csrc, bits) as usize;
692 self.state = InflateState::DynCopy(length, base + add);
693 },
694 InflateState::DynCopy(length, dist) => {
695 if self.output_idx + length > dst.len() {
696 let copy_size = dst.len() - self.output_idx;
697 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
698 if ret.is_err() {
699 self.state = InflateState::End;
700 return Err(DecompressError::InvalidData);
701 }
702 self.output_idx += copy_size;
703 self.br = csrc.br;
704 self.state = InflateState::DynCopy(length - copy_size, dist);
705 return Err(DecompressError::OutputFull);
706 }
707 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
708 if ret.is_err() {
709 self.state = InflateState::End;
710 return Err(DecompressError::InvalidData);
711 }
712 self.output_idx += length;
713 self.state = InflateState::DynBlock;
714 }
715 InflateState::End => {
716 return Ok(0);
717 },
718 }
719 }
720 }
721 ///! Decompresses input data into output returning the uncompressed data length.
722 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
723 let mut inflate = Self::new();
724 let off = if src.len() > 2 && src[0] == 0x78 && (src[1] != 0 && ((src[1] - 1) % 31) == 0) { 2 } else { 0 };
725 inflate.decompress_data(&src[off..], dst, false)
726 }
727 }
728
729 impl Default for Inflate {
730 fn default() -> Self {
731 Self::new()
732 }
733 }
734
735 fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
736 let mut bits = [0u32; 32];
737 let mut pfx = [0u32; 33];
738 for len in lens.iter() {
739 let len = *len as usize;
740 if len >= bits.len() {
741 return Err(DecompressError::InvalidHeader);
742 }
743 bits[len] += 1;
744 }
745 bits[0] = 0;
746 let mut code = 0;
747 for i in 0..bits.len() {
748 code = (code + bits[i]) << 1;
749 pfx[i + 1] = code;
750 }
751
752 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
753 let len = *len as usize;
754 if len != 0 {
755 let bits = len as u8;
756 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
757 pfx[len] += 1;
758 } else {
759 *codes = ShortCodebookDesc { code: 0, bits: 0 };
760 }
761 }
762
763 Ok(())
764 }
765
766 struct GzipCRC32 {
767 tab: [u32; 256],
768 crc: u32,
769 }
770
771 impl GzipCRC32 {
772 #[allow(clippy::unreadable_literal)]
773 fn new() -> Self {
774 let mut tab = [0u32; 256];
775 for i in 0..256 {
776 let mut c = i as u32;
777 for _ in 0..8 {
778 if (c & 1) != 0 {
779 c = 0xEDB88320 ^ (c >> 1);
780 } else {
781 c >>= 1;
782 }
783 }
784 tab[i] = c;
785 }
786 Self { tab, crc: 0 }
787 }
788 fn update_crc(&mut self, src: &[u8]) {
789 let mut c = !self.crc;
790 for el in src.iter() {
791 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
792 }
793 self.crc = !c;
794 }
795 }
796
797 ///! Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
798 pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
799 const FLAG_HCRC: u8 = 0x02;
800 const FLAG_EXTRA: u8 = 0x04;
801 const FLAG_NAME: u8 = 0x08;
802 const FLAG_COMMENT: u8 = 0x10;
803
804 let id1 = br.read_byte()?;
805 let id2 = br.read_byte()?;
806 let cm = br.read_byte()?;
807 let flg = br.read_byte()?;
808 let _mtime = br.read_u32le()?;
809 let _xfl = br.read_byte()?;
810 let _os = br.read_byte()?;
811 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
812 return Err(DecompressError::InvalidHeader);
813 }
814
815 if (flg & FLAG_EXTRA) != 0 {
816 let xlen = br.read_u16le()? as usize;
817 br.read_skip(xlen)?;
818 }
819 if (flg & FLAG_NAME) != 0 {
820 loop {
821 let b = br.read_byte()?;
822 if b == 0 {
823 break;
824 }
825 }
826 }
827 if (flg & FLAG_COMMENT) != 0 {
828 loop {
829 let b = br.read_byte()?;
830 if b == 0 {
831 break;
832 }
833 }
834 }
835 let _hcrc = if (flg & FLAG_HCRC) != 0 {
836 br.read_u16le()?
837 } else {
838 0
839 };
840 if (flg & 0xE0) != 0 {
841 return Err(DecompressError::Unsupported);
842 }
843
844 let mut output: Vec<u8> = Vec::new();
845 let mut tail = [0u8; 8];
846 let mut inblk = [0u8; 1024];
847 let mut oblk = [0u8; 4096];
848 let mut inflate = Inflate::new();
849 let mut checker = GzipCRC32::new();
850
851 loop {
852 let ret = br.read_buf_some(&mut inblk);
853 if let Err(ByteIOError::EOF) = ret {
854 break;
855 }
856 let inlen = match ret {
857 Ok(val) => val,
858 Err(_) => return Err(DecompressError::IOError),
859 };
860 let mut repeat = false;
861 loop {
862 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
863 match ret {
864 Ok(outlen) => {
865 checker.update_crc(&oblk[..outlen]);
866 output.extend_from_slice(&oblk[..outlen]);
867 break;
868 },
869 Err(DecompressError::ShortData) => {
870 break;
871 },
872 Err(DecompressError::OutputFull) => {
873 repeat = true;
874 checker.update_crc(&oblk);
875 output.extend_from_slice(&oblk);
876 },
877 Err(err) => {
878 return Err(err);
879 },
880 }
881 }
882 // Save last 8 bytes for CRC and size.
883 if inlen >= 8 {
884 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
885 } else {
886 let shift_len = 8 - inlen;
887 for i in 0..shift_len {
888 tail[i] = tail[i + inlen];
889 }
890 for i in shift_len..8 {
891 tail[i] = inblk[i - shift_len];
892 }
893 }
894 }
895 if !skip_crc {
896 if !inflate.is_finished() { println!("???"); }
897 let crc = read_u32le(&tail[0..4])?;
898 let size = read_u32le(&tail[4..8])?;
899 if size != (output.len() as u32) {
900 return Err(DecompressError::CRCError);
901 }
902 if crc != checker.crc {
903 return Err(DecompressError::CRCError);
904 }
905 }
906
907 Ok(output)
908 }
909
910 #[cfg(test)]
911 mod test {
912 use super::*;
913
914 #[test]
915 fn test_inflate1() {
916 const TEST_DATA: &[u8] = &[
917 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
918 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
919 const TEST_REF: &[u8] = b"Hello, world!";
920 let mut dst_buf = [0u8; 13];
921 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
922 assert_eq!(len, 13);
923 for i in 0..len {
924 assert_eq!(dst_buf[i], TEST_REF[i]);
925 }
926 }
927 #[test]
928 fn test_inflate2() {
929 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
930 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
931 let mut dst_buf = [0u8; 39];
932
933 let mut inflate = Inflate::new();
934 let mut output_chunk = [0u8; 7];
935 let mut output_pos = 0;
936 for input in TEST_DATA3.chunks(3) {
937 let mut repeat = false;
938 loop {
939 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
940 match ret {
941 Ok(len) => {
942 for i in 0..len {
943 dst_buf[output_pos + i] = output_chunk[i];
944 }
945 output_pos += len;
946 break;
947 },
948 Err(DecompressError::ShortData) => {
949 break;
950 },
951 Err(DecompressError::OutputFull) => {
952 repeat = true;
953 for i in 0..output_chunk.len() {
954 dst_buf[output_pos + i] = output_chunk[i];
955 }
956 output_pos += output_chunk.len();
957 },
958 _ => {
959 panic!("decompress error {:?}", ret.err().unwrap());
960 },
961 }
962 }
963 }
964
965 assert_eq!(output_pos, dst_buf.len());
966 for i in 0..output_pos {
967 assert_eq!(dst_buf[i], TEST_REF3[i]);
968 }
969 }
970 #[test]
971 fn test_inflate3() {
972 const TEST_DATA: &[u8] = &[
973 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
974 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
975 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
976 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
977 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
978 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
979 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
980 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
981 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
982 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
983 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
984 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
985 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
986 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
987 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
988 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
989 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
990 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
991 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
992 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
993 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
994 ];
995
996 let mut mr = MemoryReader::new_read(TEST_DATA);
997 let mut br = ByteReader::new(&mut mr);
998 let _dst_buf = gzip_decode(&mut br, false).unwrap();
999
1000 // println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
1001 }
1002 }