nihav_core/deflate: fix handling large buffers
[nihav.git] / nihav-core / src / compr / deflate.rs
1 //! Deflate format (RFC 1951) support.
2 //!
3 //! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`].
4 //!
5 //! [`Inflate`]: ./struct.Inflate.html
6 //! [`gzip_decode`]: ./fn.gzip_decode.html
7 //!
8 //! # Examples
9 //!
10 //! Decompressing full input buffer into sufficiently large output buffer:
11 //! ```
12 //! # use nihav_core::compr::DecompressError;
13 //! use nihav_core::compr::deflate::Inflate;
14 //!
15 //! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
16 //! # let mut output_buffer = [0u8; 16];
17 //! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
18 //! # Ok(())
19 //! # }
20 //! ```
21 //!
22 //! Decompressing input chunks into portions of output:
23 //! ```
24 //! use nihav_core::compr::DecompressError;
25 //! use nihav_core::compr::deflate::Inflate;
26 //!
27 //! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
28 //! let mut inflate = Inflate::new();
29 //! let mut dst_buf: Vec<u8> = Vec::new();
30 //! let mut output_chunk = [0u8; 1024];
31 //! for src in input_data.chunks(512) {
32 //! let mut repeat = false;
33 //! loop {
34 //! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
35 //! match ret {
36 //! Ok(len) => { // we got a buffer decoded successfully to the end
37 //! dst_buf.extend_from_slice(&output_chunk[..len]);
38 //! break;
39 //! },
40 //! Err(DecompressError::ShortData) => { // this block of data was fully read
41 //! break;
42 //! },
43 //! Err(DecompressError::OutputFull) => {
44 //! // the output buffer is full, flush it and continue decoding the same block
45 //! repeat = true;
46 //! dst_buf.extend_from_slice(&output_chunk);
47 //! },
48 //! Err(err) => {
49 //! return Err(err);
50 //! },
51 //! }
52 //! }
53 //! }
54 //! # Ok(())
55 //! # }
56 //! ```
57
58 use crate::io::byteio::*;
59 use crate::io::bitreader::*;
60 use crate::io::codebook::*;
61 use super::*;
62
63 const NUM_LITERALS: usize = 287;
64 const NUM_DISTS: usize = 32;
65
66 struct FixedLenCodeReader {}
67
68 impl CodebookDescReader<u16> for FixedLenCodeReader {
69 fn bits(&mut self, idx: usize) -> u8 {
70 if idx < 144 { 8 }
71 else if idx < 256 { 9 }
72 else if idx < 280 { 7 }
73 else { 8 }
74 }
75 #[allow(clippy::identity_op)]
76 fn code(&mut self, idx: usize) -> u32 {
77 let base = idx as u32;
78 let bits = self.bits(idx);
79 if idx < 144 { reverse_bits(base + 0x30, bits) }
80 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
81 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
82 else { reverse_bits(base + 0xC0 - 280, bits) }
83 }
84 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
85 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
86 }
87
88 #[derive(Clone,Copy,Default)]
89 struct BitReaderState {
90 pos: usize,
91 bitbuf: u32,
92 bits: u8,
93 }
94
95 struct CurrentSource<'a> {
96 src: &'a [u8],
97 br: BitReaderState,
98 }
99
100 impl<'a> CurrentSource<'a> {
101 fn new(src: &'a [u8], br: BitReaderState) -> Self {
102 let mut newsrc = Self { src, br };
103 newsrc.br.pos = 0;
104 newsrc.refill();
105 newsrc
106 }
107 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
108 let mut newsrc = Self { src, br };
109 newsrc.refill();
110 newsrc
111 }
112 fn refill(&mut self) {
113 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
114 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
115 self.br.bits += 8;
116 self.br.pos += 1;
117 }
118 }
119 fn skip_cache(&mut self, nbits: u8) {
120 self.br.bitbuf >>= nbits;
121 self.br.bits -= nbits;
122 }
123 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
124 if nbits == 0 { return Ok(0); }
125 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
126 if self.br.bits < nbits {
127 self.refill();
128 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
129 }
130 let ret = self.br.bitbuf & ((1 << nbits) - 1);
131 self.skip_cache(nbits);
132 Ok(ret)
133 }
134 fn read_bool(&mut self) -> BitReaderResult<bool> {
135 if self.br.bits == 0 {
136 self.refill();
137 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
138 }
139 let ret = (self.br.bitbuf & 1) != 0;
140 self.skip_cache(1);
141 Ok(ret)
142 }
143 fn peek(&mut self, nbits: u8) -> u32 {
144 if nbits == 0 || nbits > 16 { return 0; }
145 if self.br.bits < nbits {
146 self.refill();
147 }
148 self.br.bitbuf & ((1 << nbits) - 1)
149 }
150 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
151 if u32::from(self.br.bits) >= nbits {
152 self.skip_cache(nbits as u8);
153 } else {
154 unreachable!();
155 }
156 Ok(())
157 }
158 fn align(&mut self) {
159 let b = self.br.bits & 7;
160 if b != 0 {
161 self.skip_cache(8 - (b as u8));
162 }
163 }
164 fn left(&self) -> isize {
165 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
166 }
167 }
168
169 impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
170 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
171 let mut esc = true;
172 let mut idx = 0;
173 let mut lut_bits = cb.lut_bits;
174 let orig_br = self.br;
175 while esc {
176 let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize);
177 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
178 let bits = cb.table[lut_idx] & 0x7F;
179 esc = (cb.table[lut_idx] & 0x80) != 0;
180 idx = (cb.table[lut_idx] >> 8) as usize;
181 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
182 if (skip_bits as isize) > self.left() {
183 self.br = orig_br;
184 self.refill();
185 return Err(CodebookError::MemoryError);
186 }
187 self.skip(skip_bits as u32).unwrap();
188 lut_bits = bits as u8;
189 }
190 Ok(cb.syms[idx])
191 }
192 }
193
194 enum InflateState {
195 Start,
196 BlockStart,
197 BlockMode,
198 StaticBlockLen,
199 StaticBlockInvLen(u32),
200 StaticBlockCopy(usize),
201 FixedBlock,
202 FixedBlockLengthExt(usize, u8),
203 FixedBlockDist(usize),
204 FixedBlockDistExt(usize, usize, u8),
205 FixedBlockCopy(usize, usize),
206 FixedBlockLiteral(u8),
207 DynBlockHlit,
208 DynBlockHdist,
209 DynBlockHclen,
210 DynLengths(usize),
211 DynCodeLengths,
212 DynCodeLengthsAdd(usize),
213 DynBlock,
214 DynBlockLengthExt(usize, u8),
215 DynBlockDist(usize),
216 DynBlockDistExt(usize, usize, u8),
217 DynCopy(usize, usize),
218 DynBlockLiteral(u8),
219 End,
220 }
221
222 ///! The decompressor for deflated streams (RFC 1951).
223 pub struct Inflate {
224 br: BitReaderState,
225 fix_len_cb: Codebook<u16>,
226
227 buf: [u8; 65536],
228 bpos: usize,
229 output_idx: usize,
230
231 state: InflateState,
232 final_block: bool,
233 hlit: usize,
234 hdist: usize,
235 dyn_len_cb: Option<Codebook<u32>>,
236 dyn_lit_cb: Option<Codebook<u32>>,
237 dyn_dist_cb: Option<Codebook<u32>>,
238 len_lengths: [u8; 19],
239 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
240 cur_len_idx: usize,
241 }
242
243 const LENGTH_ADD_BITS: [u8; 29] = [
244 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
245 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
246 4, 4, 4, 4, 5, 5, 5, 5, 0
247 ];
248 const LENGTH_BASE: [u16; 29] = [
249 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
250 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
251 67, 83, 99, 115, 131, 163, 195, 227, 258
252 ];
253 const DIST_ADD_BITS: [u8; 30] = [
254 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
255 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
256 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
257 ];
258 const DIST_BASE: [u16; 30] = [
259 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
260 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
261 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
262 ];
263 const LEN_RECODE: [usize; 19] = [
264 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
265 ];
266 const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
267 const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
268
269 macro_rules! read_bits {
270 ($self: expr, $csrc: expr, $bits: expr) => ({
271 if $csrc.left() < $bits as isize {
272 $self.br = $csrc.br;
273 return Err(DecompressError::ShortData);
274 }
275 $csrc.read($bits).unwrap()
276 })
277 }
278
279 macro_rules! read_cb {
280 ($self: expr, $csrc: expr, $cb: expr) => ({
281 let ret = $csrc.read_cb($cb);
282 if let Err(CodebookError::MemoryError) = ret {
283 $self.br = $csrc.br;
284 return Err(DecompressError::ShortData);
285 }
286 match ret {
287 Ok(val) => val,
288 Err(_) => {
289 $self.state = InflateState::End;
290 return Err(DecompressError::InvalidData);
291 },
292 }
293 })
294 }
295
296 impl Inflate {
297 ///! Creates a new instance of `Inflate` struct.
298 pub fn new() -> Self {
299 let mut cr = FixedLenCodeReader {};
300 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
301 Self {
302 br: BitReaderState::default(),
303 fix_len_cb,
304
305 buf: [0; 65536],
306 bpos: 0,
307 output_idx: 0,
308
309 state: InflateState::Start,
310 final_block: false,
311 dyn_len_cb: None,
312 dyn_lit_cb: None,
313 dyn_dist_cb: None,
314 hlit: 0,
315 hdist: 0,
316 len_lengths: [0; 19],
317 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
318 cur_len_idx: 0,
319 }
320 }
321 fn put_literal(&mut self, val: u8) {
322 self.buf[self.bpos] = val;
323 self.bpos = (self.bpos + 1) & (self.buf.len() - 1);
324 }
325 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
326 let mask = self.buf.len() - 1;
327 if offset > self.output_idx {
328 return Err(DecompressError::InvalidData);
329 }
330 let cstart = (self.bpos.wrapping_sub(offset)) & mask;
331 for i in 0..len {
332 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
333 dst[i] = self.buf[(cstart + i) & mask];
334 }
335 self.bpos = (self.bpos + len) & mask;
336 Ok(())
337 }
338 ///! Reports whether decoder has finished decoding the input.
339 pub fn is_finished(&self) -> bool {
340 match self.state {
341 InflateState::End => true,
342 _ => false,
343 }
344 }
345 ///! Reports the current amount of bytes output into the destination buffer after the last run.
346 pub fn get_current_output_size(&self) -> usize { self.output_idx }
347 ///! Reports the total amount of bytes decoded so far.
348 pub fn get_total_output_size(&self) -> usize { self.bpos }
349 ///! Tries to decompress input data and write it to the output buffer.
350 ///!
351 ///! Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
352 ///! * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
353 ///! * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
354 ///! * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
355 ///!
356 ///! [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
357 ///! [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
358 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
359 if src.is_empty() || dst.is_empty() {
360 return Err(DecompressError::InvalidArgument);
361 }
362 let mut csrc = if !continue_block {
363 CurrentSource::new(src, self.br)
364 } else {
365 self.output_idx = 0;
366 CurrentSource::reinit(src, self.br)
367 };
368 'main: loop {
369 match self.state {
370 InflateState::Start | InflateState::BlockStart => {
371 if csrc.left() == 0 {
372 self.br = csrc.br;
373 return Err(DecompressError::ShortData);
374 }
375 self.final_block = csrc.read_bool().unwrap();
376 self.state = InflateState::BlockMode;
377 },
378 InflateState::BlockMode => {
379 let bmode = read_bits!(self, csrc, 2);
380 match bmode {
381 0 => {
382 csrc.align();
383 self.state = InflateState::StaticBlockLen;
384 },
385 1 => { self.state = InflateState::FixedBlock; },
386 2 => { self.state = InflateState::DynBlockHlit; },
387 _ => {
388 self.state = InflateState::End;
389 return Err(DecompressError::InvalidHeader);
390 },
391 };
392 },
393 InflateState::StaticBlockLen => {
394 let len = read_bits!(self, csrc, 16);
395 self.state = InflateState::StaticBlockInvLen(len);
396 },
397 InflateState::StaticBlockInvLen(len) => {
398 let inv_len = read_bits!(self, csrc, 16);
399 if len != !inv_len {
400 self.state = InflateState::End;
401 return Err(DecompressError::InvalidHeader);
402 }
403 self.state = InflateState::StaticBlockCopy(len as usize);
404 },
405 InflateState::StaticBlockCopy(len) => {
406 for i in 0..len {
407 if csrc.left() < 8 {
408 self.br = csrc.br;
409 self.state = InflateState::StaticBlockCopy(len - i);
410 return Err(DecompressError::ShortData);
411 }
412 let val = csrc.read(8).unwrap() as u8;
413 self.put_literal(val);
414 }
415 self.state = InflateState::BlockStart;
416 }
417 InflateState::FixedBlock => {
418 let val = read_cb!(self, csrc, &self.fix_len_cb);
419 if val < 256 {
420 if self.output_idx >= dst.len() {
421 self.br = csrc.br;
422 self.state = InflateState::FixedBlockLiteral(val as u8);
423 return Err(DecompressError::OutputFull);
424 }
425 self.put_literal(val as u8);
426 dst[self.output_idx] = val as u8;
427 self.output_idx += 1;
428 } else if val == 256 {
429 if self.final_block {
430 self.state = InflateState::End;
431 return Ok(self.output_idx);
432 } else {
433 self.state = InflateState::BlockStart;
434 }
435 } else {
436 let len_idx = (val - 257) as usize;
437 if len_idx >= LENGTH_BASE.len() {
438 self.state = InflateState::End;
439 return Err(DecompressError::InvalidData);
440 }
441 let len_bits = LENGTH_ADD_BITS[len_idx];
442 let add_base = LENGTH_BASE[len_idx] as usize;
443 if len_bits > 0 {
444 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
445 } else {
446 self.state = InflateState::FixedBlockDist(add_base);
447 }
448 }
449 },
450 InflateState::FixedBlockLiteral(sym) => {
451 if self.output_idx >= dst.len() {
452 self.br = csrc.br;
453 return Err(DecompressError::OutputFull);
454 }
455 self.put_literal(sym);
456 dst[self.output_idx] = sym;
457 self.output_idx += 1;
458 self.state = InflateState::FixedBlock;
459 },
460 InflateState::FixedBlockLengthExt(base, bits) => {
461 let add = read_bits!(self, csrc, bits) as usize;
462 self.state = InflateState::FixedBlockDist(base + add);
463 },
464 InflateState::FixedBlockDist(length) => {
465 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
466 if dist_idx >= DIST_BASE.len() {
467 self.state = InflateState::End;
468 return Err(DecompressError::InvalidData);
469 }
470 let dist_bits = DIST_ADD_BITS[dist_idx];
471 let dist_base = DIST_BASE[dist_idx] as usize;
472 if dist_bits == 0 {
473 self.state = InflateState::FixedBlockCopy(length, dist_base);
474 } else {
475 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
476 }
477 },
478 InflateState::FixedBlockDistExt(length, base, bits) => {
479 let add = read_bits!(self, csrc, bits) as usize;
480 self.state = InflateState::FixedBlockCopy(length, base + add);
481 },
482 InflateState::FixedBlockCopy(length, dist) => {
483 if self.output_idx + length > dst.len() {
484 let copy_size = dst.len() - self.output_idx;
485 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
486 if ret.is_err() {
487 self.state = InflateState::End;
488 return Err(DecompressError::InvalidData);
489 }
490 self.output_idx += copy_size;
491 self.br = csrc.br;
492 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
493 return Err(DecompressError::OutputFull);
494 }
495 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
496 if ret.is_err() {
497 self.state = InflateState::End;
498 return Err(DecompressError::InvalidData);
499 }
500 self.output_idx += length;
501 self.state = InflateState::FixedBlock;
502 }
503 InflateState::DynBlockHlit => {
504 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
505 if self.hlit >= 287 {
506 self.state = InflateState::End;
507 return Err(DecompressError::InvalidHeader);
508 }
509 self.state = InflateState::DynBlockHdist;
510 }
511 InflateState::DynBlockHdist => {
512 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
513 self.state = InflateState::DynBlockHclen;
514 },
515 InflateState::DynBlockHclen => {
516 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
517 self.cur_len_idx = 0;
518 self.len_lengths = [0; 19];
519 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
520 self.state = InflateState::DynLengths(hclen);
521 },
522 InflateState::DynLengths(len) => {
523 for i in 0..len {
524 if csrc.left() < 3 {
525 self.br = csrc.br;
526 self.state = InflateState::DynLengths(len - i);
527 return Err(DecompressError::ShortData);
528 }
529 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
530 self.cur_len_idx += 1;
531 }
532 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
533 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
534 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
535 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
536 if ret.is_err() {
537 self.state = InflateState::End;
538 return Err(DecompressError::InvalidHeader);
539 }
540 self.dyn_len_cb = Some(ret.unwrap());
541 self.cur_len_idx = 0;
542 self.state = InflateState::DynCodeLengths;
543 },
544 InflateState::DynCodeLengths => {
545 if let Some(ref len_cb) = self.dyn_len_cb {
546 while self.cur_len_idx < self.hlit + self.hdist {
547 let ret = csrc.read_cb(len_cb);
548 let val = match ret {
549 Ok(val) => val,
550 Err(CodebookError::MemoryError) => {
551 self.br = csrc.br;
552 return Err(DecompressError::ShortData);
553 },
554 Err(_) => {
555 self.state = InflateState::End;
556 return Err(DecompressError::InvalidHeader);
557 },
558 };
559 if val < 16 {
560 self.all_lengths[self.cur_len_idx] = val as u8;
561 self.cur_len_idx += 1;
562 } else {
563 let idx = (val as usize) - 16;
564 if idx > 2 {
565 self.state = InflateState::End;
566 return Err(DecompressError::InvalidHeader);
567 }
568 self.state = InflateState::DynCodeLengthsAdd(idx);
569 continue 'main;
570 }
571 }
572 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
573
574 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
575 lengths_to_codes(&lit_lengths, &mut lit_codes)?;
576 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
577 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
578 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
579 self.dyn_lit_cb = Some(ret.unwrap());
580
581 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
582 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
583 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
584 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
585 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
586 self.dyn_dist_cb = Some(ret.unwrap());
587
588 self.state = InflateState::DynBlock;
589 } else {
590 unreachable!();
591 }
592 },
593 InflateState::DynCodeLengthsAdd(mode) => {
594 let base = REPEAT_BASE[mode] as usize;
595 let bits = REPEAT_BITS[mode];
596 let len = base + read_bits!(self, csrc, bits) as usize;
597 if self.cur_len_idx + len > self.hlit + self.hdist {
598 self.state = InflateState::End;
599 return Err(DecompressError::InvalidHeader);
600 }
601 let rpt = if mode == 0 {
602 if self.cur_len_idx == 0 {
603 self.state = InflateState::End;
604 return Err(DecompressError::InvalidHeader);
605 }
606 self.all_lengths[self.cur_len_idx - 1]
607 } else {
608 0
609 };
610 for _ in 0..len {
611 self.all_lengths[self.cur_len_idx] = rpt;
612 self.cur_len_idx += 1;
613 }
614 self.state = InflateState::DynCodeLengths;
615 },
616 InflateState::DynBlock => {
617 if let Some(ref lit_cb) = self.dyn_lit_cb {
618 let val = read_cb!(self, csrc, lit_cb);
619 if val < 256 {
620 if self.output_idx >= dst.len() {
621 self.br = csrc.br;
622 self.state = InflateState::DynBlockLiteral(val as u8);
623 return Err(DecompressError::OutputFull);
624 }
625 self.put_literal(val as u8);
626 dst[self.output_idx] = val as u8;
627 self.output_idx += 1;
628 } else if val == 256 {
629 if self.final_block {
630 self.state = InflateState::End;
631 return Ok(self.output_idx);
632 } else {
633 self.state = InflateState::BlockStart;
634 }
635 } else {
636 let len_idx = (val - 257) as usize;
637 if len_idx >= LENGTH_BASE.len() {
638 self.state = InflateState::End;
639 return Err(DecompressError::InvalidData);
640 }
641 let len_bits = LENGTH_ADD_BITS[len_idx];
642 let add_base = LENGTH_BASE[len_idx] as usize;
643 if len_bits > 0 {
644 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
645 } else {
646 self.state = InflateState::DynBlockDist(add_base);
647 }
648 }
649 } else {
650 unreachable!();
651 }
652 },
653 InflateState::DynBlockLiteral(sym) => {
654 if self.output_idx >= dst.len() {
655 self.br = csrc.br;
656 return Err(DecompressError::OutputFull);
657 }
658 self.put_literal(sym);
659 dst[self.output_idx] = sym;
660 self.output_idx += 1;
661 self.state = InflateState::DynBlock;
662 },
663 InflateState::DynBlockLengthExt(base, bits) => {
664 let add = read_bits!(self, csrc, bits) as usize;
665 self.state = InflateState::DynBlockDist(base + add);
666 },
667 InflateState::DynBlockDist(length) => {
668 if let Some(ref dist_cb) = self.dyn_dist_cb {
669 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
670 if dist_idx >= DIST_BASE.len() {
671 self.state = InflateState::End;
672 return Err(DecompressError::InvalidData);
673 }
674 let dist_bits = DIST_ADD_BITS[dist_idx];
675 let dist_base = DIST_BASE[dist_idx] as usize;
676 if dist_bits == 0 {
677 self.state = InflateState::DynCopy(length, dist_base);
678 } else {
679 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
680 }
681 } else {
682 unreachable!();
683 }
684 },
685 InflateState::DynBlockDistExt(length, base, bits) => {
686 let add = read_bits!(self, csrc, bits) as usize;
687 self.state = InflateState::DynCopy(length, base + add);
688 },
689 InflateState::DynCopy(length, dist) => {
690 if self.output_idx + length > dst.len() {
691 let copy_size = dst.len() - self.output_idx;
692 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
693 if ret.is_err() {
694 self.state = InflateState::End;
695 return Err(DecompressError::InvalidData);
696 }
697 self.output_idx += copy_size;
698 self.br = csrc.br;
699 self.state = InflateState::DynCopy(length - copy_size, dist);
700 return Err(DecompressError::OutputFull);
701 }
702 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
703 if ret.is_err() {
704 self.state = InflateState::End;
705 return Err(DecompressError::InvalidData);
706 }
707 self.output_idx += length;
708 self.state = InflateState::DynBlock;
709 }
710 InflateState::End => {
711 return Ok(0);
712 },
713 }
714 }
715 }
716 ///! Decompresses input data into output returning the uncompressed data length.
717 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
718 let mut inflate = Self::new();
719 let off = if src.len() > 2 && src[0] == 0x78 && src[1] == 0x9C { 2 } else { 0 };
720 inflate.decompress_data(&src[off..], dst, false)
721 }
722 }
723
724 impl Default for Inflate {
725 fn default() -> Self {
726 Self::new()
727 }
728 }
729
730 fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
731 let mut bits = [0u32; 32];
732 let mut pfx = [0u32; 33];
733 for len in lens.iter() {
734 let len = *len as usize;
735 if len >= bits.len() {
736 return Err(DecompressError::InvalidHeader);
737 }
738 bits[len] += 1;
739 }
740 bits[0] = 0;
741 let mut code = 0;
742 for i in 0..bits.len() {
743 code = (code + bits[i]) << 1;
744 pfx[i + 1] = code;
745 }
746
747 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
748 let len = *len as usize;
749 if len != 0 {
750 let bits = len as u8;
751 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
752 pfx[len] += 1;
753 } else {
754 *codes = ShortCodebookDesc { code: 0, bits: 0 };
755 }
756 }
757
758 Ok(())
759 }
760
761 struct GzipCRC32 {
762 tab: [u32; 256],
763 crc: u32,
764 }
765
766 impl GzipCRC32 {
767 #[allow(clippy::unreadable_literal)]
768 fn new() -> Self {
769 let mut tab = [0u32; 256];
770 for i in 0..256 {
771 let mut c = i as u32;
772 for _ in 0..8 {
773 if (c & 1) != 0 {
774 c = 0xEDB88320 ^ (c >> 1);
775 } else {
776 c >>= 1;
777 }
778 }
779 tab[i] = c;
780 }
781 Self { tab, crc: 0 }
782 }
783 fn update_crc(&mut self, src: &[u8]) {
784 let mut c = !self.crc;
785 for el in src.iter() {
786 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
787 }
788 self.crc = !c;
789 }
790 }
791
792 ///! Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
793 pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
794 const FLAG_HCRC: u8 = 0x02;
795 const FLAG_EXTRA: u8 = 0x04;
796 const FLAG_NAME: u8 = 0x08;
797 const FLAG_COMMENT: u8 = 0x10;
798
799 let id1 = br.read_byte()?;
800 let id2 = br.read_byte()?;
801 let cm = br.read_byte()?;
802 let flg = br.read_byte()?;
803 let _mtime = br.read_u32le()?;
804 let _xfl = br.read_byte()?;
805 let _os = br.read_byte()?;
806 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
807 return Err(DecompressError::InvalidHeader);
808 }
809
810 if (flg & FLAG_EXTRA) != 0 {
811 let xlen = br.read_u16le()? as usize;
812 br.read_skip(xlen)?;
813 }
814 if (flg & FLAG_NAME) != 0 {
815 loop {
816 let b = br.read_byte()?;
817 if b == 0 {
818 break;
819 }
820 }
821 }
822 if (flg & FLAG_COMMENT) != 0 {
823 loop {
824 let b = br.read_byte()?;
825 if b == 0 {
826 break;
827 }
828 }
829 }
830 let _hcrc = if (flg & FLAG_HCRC) != 0 {
831 br.read_u16le()?
832 } else {
833 0
834 };
835 if (flg & 0xE0) != 0 {
836 return Err(DecompressError::Unsupported);
837 }
838
839 let mut output: Vec<u8> = Vec::new();
840 let mut tail = [0u8; 8];
841 let mut inblk = [0u8; 1024];
842 let mut oblk = [0u8; 4096];
843 let mut inflate = Inflate::new();
844 let mut checker = GzipCRC32::new();
845
846 loop {
847 let ret = br.read_buf_some(&mut inblk);
848 if let Err(ByteIOError::EOF) = ret {
849 break;
850 }
851 let inlen = match ret {
852 Ok(val) => val,
853 Err(_) => return Err(DecompressError::IOError),
854 };
855 let mut repeat = false;
856 loop {
857 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
858 match ret {
859 Ok(outlen) => {
860 checker.update_crc(&oblk[..outlen]);
861 output.extend_from_slice(&oblk[..outlen]);
862 break;
863 },
864 Err(DecompressError::ShortData) => {
865 break;
866 },
867 Err(DecompressError::OutputFull) => {
868 repeat = true;
869 checker.update_crc(&oblk);
870 output.extend_from_slice(&oblk);
871 },
872 Err(err) => {
873 return Err(err);
874 },
875 }
876 }
877 // Save last 8 bytes for CRC and size.
878 if inlen >= 8 {
879 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
880 } else {
881 let shift_len = 8 - inlen;
882 for i in 0..shift_len {
883 tail[i] = tail[i + inlen];
884 }
885 for i in shift_len..8 {
886 tail[i] = inblk[i - shift_len];
887 }
888 }
889 }
890 if !skip_crc {
891 if !inflate.is_finished() { println!("???"); }
892 let crc = read_u32le(&tail[0..4])?;
893 let size = read_u32le(&tail[4..8])?;
894 if size != (output.len() as u32) {
895 return Err(DecompressError::CRCError);
896 }
897 if crc != checker.crc {
898 return Err(DecompressError::CRCError);
899 }
900 }
901
902 Ok(output)
903 }
904
905 #[cfg(test)]
906 mod test {
907 use super::*;
908
909 #[test]
910 fn test_inflate1() {
911 const TEST_DATA: &[u8] = &[
912 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
913 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
914 const TEST_REF: &[u8] = b"Hello, world!";
915 let mut dst_buf = [0u8; 13];
916 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
917 assert_eq!(len, 13);
918 for i in 0..len {
919 assert_eq!(dst_buf[i], TEST_REF[i]);
920 }
921 }
922 #[test]
923 fn test_inflate2() {
924 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
925 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
926 let mut dst_buf = [0u8; 39];
927
928 let mut inflate = Inflate::new();
929 let mut output_chunk = [0u8; 7];
930 let mut output_pos = 0;
931 for input in TEST_DATA3.chunks(3) {
932 let mut repeat = false;
933 loop {
934 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
935 match ret {
936 Ok(len) => {
937 for i in 0..len {
938 dst_buf[output_pos + i] = output_chunk[i];
939 }
940 output_pos += len;
941 break;
942 },
943 Err(DecompressError::ShortData) => {
944 break;
945 },
946 Err(DecompressError::OutputFull) => {
947 repeat = true;
948 for i in 0..output_chunk.len() {
949 dst_buf[output_pos + i] = output_chunk[i];
950 }
951 output_pos += output_chunk.len();
952 },
953 _ => {
954 panic!("decompress error {:?}", ret.err().unwrap());
955 },
956 }
957 }
958 }
959
960 assert_eq!(output_pos, dst_buf.len());
961 for i in 0..output_pos {
962 assert_eq!(dst_buf[i], TEST_REF3[i]);
963 }
964 }
965 #[test]
966 fn test_inflate3() {
967 const TEST_DATA: &[u8] = &[
968 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
969 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
970 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
971 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
972 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
973 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
974 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
975 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
976 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
977 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
978 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
979 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
980 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
981 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
982 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
983 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
984 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
985 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
986 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
987 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
988 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
989 ];
990
991 let mut mr = MemoryReader::new_read(TEST_DATA);
992 let mut br = ByteReader::new(&mut mr);
993 let _dst_buf = gzip_decode(&mut br, false).unwrap();
994
995 // println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
996 }
997 }