deflate: fix output indexing
[nihav.git] / nihav-core / src / compr / deflate.rs
1 //! Deflate format (RFC 1951) support.
2 //!
3 //! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`].
4 //!
5 //! [`Inflate`]: ./struct.Inflate.html
6 //! [`gzip_decode`]: ./fn.gzip_decode.html
7 //!
8 //! # Examples
9 //!
10 //! Decompressing full input buffer into sufficiently large output buffer:
11 //! ```
12 //! # use nihav_core::compr::DecompressError;
13 //! use nihav_core::compr::deflate::Inflate;
14 //!
15 //! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
16 //! # let mut output_buffer = [0u8; 16];
17 //! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
18 //! # Ok(())
19 //! # }
20 //! ```
21 //!
22 //! Decompressing input chunks into portions of output:
23 //! ```
24 //! use nihav_core::compr::DecompressError;
25 //! use nihav_core::compr::deflate::Inflate;
26 //!
27 //! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
28 //! let mut inflate = Inflate::new();
29 //! let mut dst_buf: Vec<u8> = Vec::new();
30 //! let mut output_chunk = [0u8; 1024];
31 //! for src in input_data.chunks(512) {
32 //! let mut repeat = false;
33 //! loop {
34 //! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
35 //! match ret {
36 //! Ok(len) => { // we got a buffer decoded successfully to the end
37 //! dst_buf.extend_from_slice(&output_chunk[..len]);
38 //! break;
39 //! },
40 //! Err(DecompressError::ShortData) => { // this block of data was fully read
41 //! break;
42 //! },
43 //! Err(DecompressError::OutputFull) => {
44 //! // the output buffer is full, flush it and continue decoding the same block
45 //! repeat = true;
46 //! dst_buf.extend_from_slice(&output_chunk);
47 //! },
48 //! Err(err) => {
49 //! return Err(err);
50 //! },
51 //! }
52 //! }
53 //! }
54 //! # Ok(())
55 //! # }
56 //! ```
57
58 use crate::io::byteio::*;
59 use crate::io::bitreader::*;
60 use crate::io::codebook::*;
61 use super::*;
62
63 const NUM_LITERALS: usize = 287;
64 const NUM_DISTS: usize = 32;
65
66 struct FixedLenCodeReader {}
67
68 impl CodebookDescReader<u16> for FixedLenCodeReader {
69 fn bits(&mut self, idx: usize) -> u8 {
70 if idx < 144 { 8 }
71 else if idx < 256 { 9 }
72 else if idx < 280 { 7 }
73 else { 8 }
74 }
75 #[allow(clippy::identity_op)]
76 fn code(&mut self, idx: usize) -> u32 {
77 let base = idx as u32;
78 let bits = self.bits(idx);
79 if idx < 144 { reverse_bits(base + 0x30, bits) }
80 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
81 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
82 else { reverse_bits(base + 0xC0 - 280, bits) }
83 }
84 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
85 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
86 }
87
88 #[derive(Clone,Copy,Default)]
89 struct BitReaderState {
90 pos: usize,
91 bitbuf: u32,
92 bits: u8,
93 }
94
95 struct CurrentSource<'a> {
96 src: &'a [u8],
97 br: BitReaderState,
98 }
99
100 impl<'a> CurrentSource<'a> {
101 fn new(src: &'a [u8], br: BitReaderState) -> Self {
102 let mut newsrc = Self { src, br };
103 newsrc.br.pos = 0;
104 newsrc.refill();
105 newsrc
106 }
107 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
108 let mut newsrc = Self { src, br };
109 newsrc.refill();
110 newsrc
111 }
112 fn refill(&mut self) {
113 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
114 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
115 self.br.bits += 8;
116 self.br.pos += 1;
117 }
118 }
119 fn skip_cache(&mut self, nbits: u8) {
120 self.br.bitbuf >>= nbits;
121 self.br.bits -= nbits;
122 }
123 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
124 if nbits == 0 { return Ok(0); }
125 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
126 if self.br.bits < nbits {
127 self.refill();
128 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
129 }
130 let ret = self.br.bitbuf & ((1 << nbits) - 1);
131 self.skip_cache(nbits);
132 Ok(ret)
133 }
134 fn read_bool(&mut self) -> BitReaderResult<bool> {
135 if self.br.bits == 0 {
136 self.refill();
137 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
138 }
139 let ret = (self.br.bitbuf & 1) != 0;
140 self.skip_cache(1);
141 Ok(ret)
142 }
143 fn peek(&mut self, nbits: u8) -> u32 {
144 if nbits == 0 || nbits > 16 { return 0; }
145 if self.br.bits < nbits {
146 self.refill();
147 }
148 self.br.bitbuf & ((1 << nbits) - 1)
149 }
150 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
151 if u32::from(self.br.bits) >= nbits {
152 self.skip_cache(nbits as u8);
153 } else {
154 unreachable!();
155 }
156 Ok(())
157 }
158 fn align(&mut self) {
159 let b = self.br.bits & 7;
160 if b != 0 {
161 self.skip_cache(8 - (b as u8));
162 }
163 }
164 fn left(&self) -> isize {
165 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
166 }
167 }
168
169 impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
170 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
171 let mut esc = true;
172 let mut idx = 0;
173 let mut lut_bits = cb.lut_bits;
174 let orig_br = self.br;
175 while esc {
176 let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize);
177 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
178 let bits = cb.table[lut_idx] & 0x7F;
179 esc = (cb.table[lut_idx] & 0x80) != 0;
180 idx = (cb.table[lut_idx] >> 8) as usize;
181 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
182 if (skip_bits as isize) > self.left() {
183 self.br = orig_br;
184 self.refill();
185 return Err(CodebookError::MemoryError);
186 }
187 self.skip(skip_bits as u32).unwrap();
188 lut_bits = bits as u8;
189 }
190 Ok(cb.syms[idx])
191 }
192 }
193
194 enum InflateState {
195 Start,
196 BlockStart,
197 BlockMode,
198 StaticBlockLen,
199 StaticBlockInvLen(u32),
200 StaticBlockCopy(usize),
201 FixedBlock,
202 FixedBlockLengthExt(usize, u8),
203 FixedBlockDist(usize),
204 FixedBlockDistExt(usize, usize, u8),
205 FixedBlockCopy(usize, usize),
206 FixedBlockLiteral(u8),
207 DynBlockHlit,
208 DynBlockHdist,
209 DynBlockHclen,
210 DynLengths(usize),
211 DynCodeLengths,
212 DynCodeLengthsAdd(usize),
213 DynBlock,
214 DynBlockLengthExt(usize, u8),
215 DynBlockDist(usize),
216 DynBlockDistExt(usize, usize, u8),
217 DynCopy(usize, usize),
218 DynBlockLiteral(u8),
219 End,
220 }
221
222 ///! The decompressor for deflated streams (RFC 1951).
223 pub struct Inflate {
224 br: BitReaderState,
225 fix_len_cb: Codebook<u16>,
226
227 buf: [u8; 65536],
228 bpos: usize,
229 output_idx: usize,
230 full_pos: usize,
231
232 state: InflateState,
233 final_block: bool,
234 hlit: usize,
235 hdist: usize,
236 dyn_len_cb: Option<Codebook<u32>>,
237 dyn_lit_cb: Option<Codebook<u32>>,
238 dyn_dist_cb: Option<Codebook<u32>>,
239 len_lengths: [u8; 19],
240 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
241 cur_len_idx: usize,
242 }
243
244 const LENGTH_ADD_BITS: [u8; 29] = [
245 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
246 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
247 4, 4, 4, 4, 5, 5, 5, 5, 0
248 ];
249 const LENGTH_BASE: [u16; 29] = [
250 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
251 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
252 67, 83, 99, 115, 131, 163, 195, 227, 258
253 ];
254 const DIST_ADD_BITS: [u8; 30] = [
255 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
256 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
257 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
258 ];
259 const DIST_BASE: [u16; 30] = [
260 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
261 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
262 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
263 ];
264 const LEN_RECODE: [usize; 19] = [
265 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
266 ];
267 const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
268 const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
269
270 macro_rules! read_bits {
271 ($self: expr, $csrc: expr, $bits: expr) => ({
272 if $csrc.left() < $bits as isize {
273 $self.br = $csrc.br;
274 return Err(DecompressError::ShortData);
275 }
276 $csrc.read($bits).unwrap()
277 })
278 }
279
280 macro_rules! read_cb {
281 ($self: expr, $csrc: expr, $cb: expr) => ({
282 let ret = $csrc.read_cb($cb);
283 if let Err(CodebookError::MemoryError) = ret {
284 $self.br = $csrc.br;
285 return Err(DecompressError::ShortData);
286 }
287 match ret {
288 Ok(val) => val,
289 Err(_) => {
290 $self.state = InflateState::End;
291 return Err(DecompressError::InvalidData);
292 },
293 }
294 })
295 }
296
297 impl Inflate {
298 ///! Creates a new instance of `Inflate` struct.
299 pub fn new() -> Self {
300 let mut cr = FixedLenCodeReader {};
301 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
302 Self {
303 br: BitReaderState::default(),
304 fix_len_cb,
305
306 buf: [0; 65536],
307 bpos: 0,
308 output_idx: 0,
309 full_pos: 0,
310
311 state: InflateState::Start,
312 final_block: false,
313 dyn_len_cb: None,
314 dyn_lit_cb: None,
315 dyn_dist_cb: None,
316 hlit: 0,
317 hdist: 0,
318 len_lengths: [0; 19],
319 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
320 cur_len_idx: 0,
321 }
322 }
323 fn put_literal(&mut self, val: u8) {
324 self.buf[self.bpos] = val;
325 self.bpos = (self.bpos + 1) & (self.buf.len() - 1);
326 self.full_pos += 1;
327 }
328 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
329 let mask = self.buf.len() - 1;
330 if offset > self.full_pos {
331 return Err(DecompressError::InvalidData);
332 }
333 let cstart = (self.bpos.wrapping_sub(offset)) & mask;
334 for i in 0..len {
335 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
336 dst[i] = self.buf[(cstart + i) & mask];
337 }
338 self.bpos = (self.bpos + len) & mask;
339 self.full_pos += len;
340 Ok(())
341 }
342 ///! Reports whether decoder has finished decoding the input.
343 pub fn is_finished(&self) -> bool {
344 match self.state {
345 InflateState::End => true,
346 _ => false,
347 }
348 }
349 ///! Reports the current amount of bytes output into the destination buffer after the last run.
350 pub fn get_current_output_size(&self) -> usize { self.output_idx }
351 ///! Reports the total amount of bytes decoded so far.
352 pub fn get_total_output_size(&self) -> usize { self.bpos }
353 ///! Tries to decompress input data and write it to the output buffer.
354 ///!
355 ///! Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
356 ///! * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
357 ///! * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
358 ///! * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
359 ///!
360 ///! [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
361 ///! [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
362 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
363 if src.is_empty() || dst.is_empty() {
364 return Err(DecompressError::InvalidArgument);
365 }
366 let mut csrc = if !continue_block {
367 CurrentSource::new(src, self.br)
368 } else {
369 self.output_idx = 0;
370 CurrentSource::reinit(src, self.br)
371 };
372 'main: loop {
373 match self.state {
374 InflateState::Start | InflateState::BlockStart => {
375 if csrc.left() == 0 {
376 self.br = csrc.br;
377 return Err(DecompressError::ShortData);
378 }
379 self.final_block = csrc.read_bool().unwrap();
380 self.state = InflateState::BlockMode;
381 },
382 InflateState::BlockMode => {
383 let bmode = read_bits!(self, csrc, 2);
384 match bmode {
385 0 => {
386 csrc.align();
387 self.state = InflateState::StaticBlockLen;
388 },
389 1 => { self.state = InflateState::FixedBlock; },
390 2 => { self.state = InflateState::DynBlockHlit; },
391 _ => {
392 self.state = InflateState::End;
393 return Err(DecompressError::InvalidHeader);
394 },
395 };
396 },
397 InflateState::StaticBlockLen => {
398 let len = read_bits!(self, csrc, 16);
399 self.state = InflateState::StaticBlockInvLen(len);
400 },
401 InflateState::StaticBlockInvLen(len) => {
402 let inv_len = read_bits!(self, csrc, 16);
403 if len != !inv_len {
404 self.state = InflateState::End;
405 return Err(DecompressError::InvalidHeader);
406 }
407 self.state = InflateState::StaticBlockCopy(len as usize);
408 },
409 InflateState::StaticBlockCopy(len) => {
410 for i in 0..len {
411 if csrc.left() < 8 {
412 self.br = csrc.br;
413 self.state = InflateState::StaticBlockCopy(len - i);
414 return Err(DecompressError::ShortData);
415 }
416 let val = csrc.read(8).unwrap() as u8;
417 self.put_literal(val);
418 }
419 self.state = InflateState::BlockStart;
420 }
421 InflateState::FixedBlock => {
422 let val = read_cb!(self, csrc, &self.fix_len_cb);
423 if val < 256 {
424 if self.output_idx >= dst.len() {
425 self.br = csrc.br;
426 self.state = InflateState::FixedBlockLiteral(val as u8);
427 return Err(DecompressError::OutputFull);
428 }
429 self.put_literal(val as u8);
430 dst[self.output_idx] = val as u8;
431 self.output_idx += 1;
432 } else if val == 256 {
433 if self.final_block {
434 self.state = InflateState::End;
435 return Ok(self.output_idx);
436 } else {
437 self.state = InflateState::BlockStart;
438 }
439 } else {
440 let len_idx = (val - 257) as usize;
441 if len_idx >= LENGTH_BASE.len() {
442 self.state = InflateState::End;
443 return Err(DecompressError::InvalidData);
444 }
445 let len_bits = LENGTH_ADD_BITS[len_idx];
446 let add_base = LENGTH_BASE[len_idx] as usize;
447 if len_bits > 0 {
448 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
449 } else {
450 self.state = InflateState::FixedBlockDist(add_base);
451 }
452 }
453 },
454 InflateState::FixedBlockLiteral(sym) => {
455 if self.output_idx >= dst.len() {
456 self.br = csrc.br;
457 return Err(DecompressError::OutputFull);
458 }
459 self.put_literal(sym);
460 dst[self.output_idx] = sym;
461 self.output_idx += 1;
462 self.state = InflateState::FixedBlock;
463 },
464 InflateState::FixedBlockLengthExt(base, bits) => {
465 let add = read_bits!(self, csrc, bits) as usize;
466 self.state = InflateState::FixedBlockDist(base + add);
467 },
468 InflateState::FixedBlockDist(length) => {
469 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
470 if dist_idx >= DIST_BASE.len() {
471 self.state = InflateState::End;
472 return Err(DecompressError::InvalidData);
473 }
474 let dist_bits = DIST_ADD_BITS[dist_idx];
475 let dist_base = DIST_BASE[dist_idx] as usize;
476 if dist_bits == 0 {
477 self.state = InflateState::FixedBlockCopy(length, dist_base);
478 } else {
479 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
480 }
481 },
482 InflateState::FixedBlockDistExt(length, base, bits) => {
483 let add = read_bits!(self, csrc, bits) as usize;
484 self.state = InflateState::FixedBlockCopy(length, base + add);
485 },
486 InflateState::FixedBlockCopy(length, dist) => {
487 if self.output_idx + length > dst.len() {
488 let copy_size = dst.len() - self.output_idx;
489 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
490 if ret.is_err() {
491 self.state = InflateState::End;
492 return Err(DecompressError::InvalidData);
493 }
494 self.output_idx += copy_size;
495 self.br = csrc.br;
496 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
497 return Err(DecompressError::OutputFull);
498 }
499 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
500 if ret.is_err() {
501 self.state = InflateState::End;
502 return Err(DecompressError::InvalidData);
503 }
504 self.output_idx += length;
505 self.state = InflateState::FixedBlock;
506 }
507 InflateState::DynBlockHlit => {
508 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
509 if self.hlit >= 287 {
510 self.state = InflateState::End;
511 return Err(DecompressError::InvalidHeader);
512 }
513 self.state = InflateState::DynBlockHdist;
514 }
515 InflateState::DynBlockHdist => {
516 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
517 self.state = InflateState::DynBlockHclen;
518 },
519 InflateState::DynBlockHclen => {
520 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
521 self.cur_len_idx = 0;
522 self.len_lengths = [0; 19];
523 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
524 self.state = InflateState::DynLengths(hclen);
525 },
526 InflateState::DynLengths(len) => {
527 for i in 0..len {
528 if csrc.left() < 3 {
529 self.br = csrc.br;
530 self.state = InflateState::DynLengths(len - i);
531 return Err(DecompressError::ShortData);
532 }
533 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
534 self.cur_len_idx += 1;
535 }
536 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
537 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
538 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
539 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
540 if ret.is_err() {
541 self.state = InflateState::End;
542 return Err(DecompressError::InvalidHeader);
543 }
544 self.dyn_len_cb = Some(ret.unwrap());
545 self.cur_len_idx = 0;
546 self.state = InflateState::DynCodeLengths;
547 },
548 InflateState::DynCodeLengths => {
549 if let Some(ref len_cb) = self.dyn_len_cb {
550 while self.cur_len_idx < self.hlit + self.hdist {
551 let ret = csrc.read_cb(len_cb);
552 let val = match ret {
553 Ok(val) => val,
554 Err(CodebookError::MemoryError) => {
555 self.br = csrc.br;
556 return Err(DecompressError::ShortData);
557 },
558 Err(_) => {
559 self.state = InflateState::End;
560 return Err(DecompressError::InvalidHeader);
561 },
562 };
563 if val < 16 {
564 self.all_lengths[self.cur_len_idx] = val as u8;
565 self.cur_len_idx += 1;
566 } else {
567 let idx = (val as usize) - 16;
568 if idx > 2 {
569 self.state = InflateState::End;
570 return Err(DecompressError::InvalidHeader);
571 }
572 self.state = InflateState::DynCodeLengthsAdd(idx);
573 continue 'main;
574 }
575 }
576 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
577
578 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
579 lengths_to_codes(&lit_lengths, &mut lit_codes)?;
580 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
581 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
582 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
583 self.dyn_lit_cb = Some(ret.unwrap());
584
585 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
586 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
587 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
588 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
589 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
590 self.dyn_dist_cb = Some(ret.unwrap());
591
592 self.state = InflateState::DynBlock;
593 } else {
594 unreachable!();
595 }
596 },
597 InflateState::DynCodeLengthsAdd(mode) => {
598 let base = REPEAT_BASE[mode] as usize;
599 let bits = REPEAT_BITS[mode];
600 let len = base + read_bits!(self, csrc, bits) as usize;
601 if self.cur_len_idx + len > self.hlit + self.hdist {
602 self.state = InflateState::End;
603 return Err(DecompressError::InvalidHeader);
604 }
605 let rpt = if mode == 0 {
606 if self.cur_len_idx == 0 {
607 self.state = InflateState::End;
608 return Err(DecompressError::InvalidHeader);
609 }
610 self.all_lengths[self.cur_len_idx - 1]
611 } else {
612 0
613 };
614 for _ in 0..len {
615 self.all_lengths[self.cur_len_idx] = rpt;
616 self.cur_len_idx += 1;
617 }
618 self.state = InflateState::DynCodeLengths;
619 },
620 InflateState::DynBlock => {
621 if let Some(ref lit_cb) = self.dyn_lit_cb {
622 let val = read_cb!(self, csrc, lit_cb);
623 if val < 256 {
624 if self.output_idx >= dst.len() {
625 self.br = csrc.br;
626 self.state = InflateState::DynBlockLiteral(val as u8);
627 return Err(DecompressError::OutputFull);
628 }
629 self.put_literal(val as u8);
630 dst[self.output_idx] = val as u8;
631 self.output_idx += 1;
632 } else if val == 256 {
633 if self.final_block {
634 self.state = InflateState::End;
635 return Ok(self.output_idx);
636 } else {
637 self.state = InflateState::BlockStart;
638 }
639 } else {
640 let len_idx = (val - 257) as usize;
641 if len_idx >= LENGTH_BASE.len() {
642 self.state = InflateState::End;
643 return Err(DecompressError::InvalidData);
644 }
645 let len_bits = LENGTH_ADD_BITS[len_idx];
646 let add_base = LENGTH_BASE[len_idx] as usize;
647 if len_bits > 0 {
648 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
649 } else {
650 self.state = InflateState::DynBlockDist(add_base);
651 }
652 }
653 } else {
654 unreachable!();
655 }
656 },
657 InflateState::DynBlockLiteral(sym) => {
658 if self.output_idx >= dst.len() {
659 self.br = csrc.br;
660 return Err(DecompressError::OutputFull);
661 }
662 self.put_literal(sym);
663 dst[self.output_idx] = sym;
664 self.output_idx += 1;
665 self.state = InflateState::DynBlock;
666 },
667 InflateState::DynBlockLengthExt(base, bits) => {
668 let add = read_bits!(self, csrc, bits) as usize;
669 self.state = InflateState::DynBlockDist(base + add);
670 },
671 InflateState::DynBlockDist(length) => {
672 if let Some(ref dist_cb) = self.dyn_dist_cb {
673 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
674 if dist_idx >= DIST_BASE.len() {
675 self.state = InflateState::End;
676 return Err(DecompressError::InvalidData);
677 }
678 let dist_bits = DIST_ADD_BITS[dist_idx];
679 let dist_base = DIST_BASE[dist_idx] as usize;
680 if dist_bits == 0 {
681 self.state = InflateState::DynCopy(length, dist_base);
682 } else {
683 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
684 }
685 } else {
686 unreachable!();
687 }
688 },
689 InflateState::DynBlockDistExt(length, base, bits) => {
690 let add = read_bits!(self, csrc, bits) as usize;
691 self.state = InflateState::DynCopy(length, base + add);
692 },
693 InflateState::DynCopy(length, dist) => {
694 if self.output_idx + length > dst.len() {
695 let copy_size = dst.len() - self.output_idx;
696 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
697 if ret.is_err() {
698 self.state = InflateState::End;
699 return Err(DecompressError::InvalidData);
700 }
701 self.output_idx += copy_size;
702 self.br = csrc.br;
703 self.state = InflateState::DynCopy(length - copy_size, dist);
704 return Err(DecompressError::OutputFull);
705 }
706 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
707 if ret.is_err() {
708 self.state = InflateState::End;
709 return Err(DecompressError::InvalidData);
710 }
711 self.output_idx += length;
712 self.state = InflateState::DynBlock;
713 }
714 InflateState::End => {
715 return Ok(0);
716 },
717 }
718 }
719 }
720 ///! Decompresses input data into output returning the uncompressed data length.
721 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
722 let mut inflate = Self::new();
723 let off = if src.len() > 2 && src[0] == 0x78 && src[1] == 0x9C { 2 } else { 0 };
724 inflate.decompress_data(&src[off..], dst, false)
725 }
726 }
727
728 impl Default for Inflate {
729 fn default() -> Self {
730 Self::new()
731 }
732 }
733
734 fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
735 let mut bits = [0u32; 32];
736 let mut pfx = [0u32; 33];
737 for len in lens.iter() {
738 let len = *len as usize;
739 if len >= bits.len() {
740 return Err(DecompressError::InvalidHeader);
741 }
742 bits[len] += 1;
743 }
744 bits[0] = 0;
745 let mut code = 0;
746 for i in 0..bits.len() {
747 code = (code + bits[i]) << 1;
748 pfx[i + 1] = code;
749 }
750
751 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
752 let len = *len as usize;
753 if len != 0 {
754 let bits = len as u8;
755 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
756 pfx[len] += 1;
757 } else {
758 *codes = ShortCodebookDesc { code: 0, bits: 0 };
759 }
760 }
761
762 Ok(())
763 }
764
765 struct GzipCRC32 {
766 tab: [u32; 256],
767 crc: u32,
768 }
769
770 impl GzipCRC32 {
771 #[allow(clippy::unreadable_literal)]
772 fn new() -> Self {
773 let mut tab = [0u32; 256];
774 for i in 0..256 {
775 let mut c = i as u32;
776 for _ in 0..8 {
777 if (c & 1) != 0 {
778 c = 0xEDB88320 ^ (c >> 1);
779 } else {
780 c >>= 1;
781 }
782 }
783 tab[i] = c;
784 }
785 Self { tab, crc: 0 }
786 }
787 fn update_crc(&mut self, src: &[u8]) {
788 let mut c = !self.crc;
789 for el in src.iter() {
790 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
791 }
792 self.crc = !c;
793 }
794 }
795
796 ///! Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
797 pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
798 const FLAG_HCRC: u8 = 0x02;
799 const FLAG_EXTRA: u8 = 0x04;
800 const FLAG_NAME: u8 = 0x08;
801 const FLAG_COMMENT: u8 = 0x10;
802
803 let id1 = br.read_byte()?;
804 let id2 = br.read_byte()?;
805 let cm = br.read_byte()?;
806 let flg = br.read_byte()?;
807 let _mtime = br.read_u32le()?;
808 let _xfl = br.read_byte()?;
809 let _os = br.read_byte()?;
810 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
811 return Err(DecompressError::InvalidHeader);
812 }
813
814 if (flg & FLAG_EXTRA) != 0 {
815 let xlen = br.read_u16le()? as usize;
816 br.read_skip(xlen)?;
817 }
818 if (flg & FLAG_NAME) != 0 {
819 loop {
820 let b = br.read_byte()?;
821 if b == 0 {
822 break;
823 }
824 }
825 }
826 if (flg & FLAG_COMMENT) != 0 {
827 loop {
828 let b = br.read_byte()?;
829 if b == 0 {
830 break;
831 }
832 }
833 }
834 let _hcrc = if (flg & FLAG_HCRC) != 0 {
835 br.read_u16le()?
836 } else {
837 0
838 };
839 if (flg & 0xE0) != 0 {
840 return Err(DecompressError::Unsupported);
841 }
842
843 let mut output: Vec<u8> = Vec::new();
844 let mut tail = [0u8; 8];
845 let mut inblk = [0u8; 1024];
846 let mut oblk = [0u8; 4096];
847 let mut inflate = Inflate::new();
848 let mut checker = GzipCRC32::new();
849
850 loop {
851 let ret = br.read_buf_some(&mut inblk);
852 if let Err(ByteIOError::EOF) = ret {
853 break;
854 }
855 let inlen = match ret {
856 Ok(val) => val,
857 Err(_) => return Err(DecompressError::IOError),
858 };
859 let mut repeat = false;
860 loop {
861 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
862 match ret {
863 Ok(outlen) => {
864 checker.update_crc(&oblk[..outlen]);
865 output.extend_from_slice(&oblk[..outlen]);
866 break;
867 },
868 Err(DecompressError::ShortData) => {
869 break;
870 },
871 Err(DecompressError::OutputFull) => {
872 repeat = true;
873 checker.update_crc(&oblk);
874 output.extend_from_slice(&oblk);
875 },
876 Err(err) => {
877 return Err(err);
878 },
879 }
880 }
881 // Save last 8 bytes for CRC and size.
882 if inlen >= 8 {
883 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
884 } else {
885 let shift_len = 8 - inlen;
886 for i in 0..shift_len {
887 tail[i] = tail[i + inlen];
888 }
889 for i in shift_len..8 {
890 tail[i] = inblk[i - shift_len];
891 }
892 }
893 }
894 if !skip_crc {
895 if !inflate.is_finished() { println!("???"); }
896 let crc = read_u32le(&tail[0..4])?;
897 let size = read_u32le(&tail[4..8])?;
898 if size != (output.len() as u32) {
899 return Err(DecompressError::CRCError);
900 }
901 if crc != checker.crc {
902 return Err(DecompressError::CRCError);
903 }
904 }
905
906 Ok(output)
907 }
908
909 #[cfg(test)]
910 mod test {
911 use super::*;
912
913 #[test]
914 fn test_inflate1() {
915 const TEST_DATA: &[u8] = &[
916 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
917 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
918 const TEST_REF: &[u8] = b"Hello, world!";
919 let mut dst_buf = [0u8; 13];
920 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
921 assert_eq!(len, 13);
922 for i in 0..len {
923 assert_eq!(dst_buf[i], TEST_REF[i]);
924 }
925 }
926 #[test]
927 fn test_inflate2() {
928 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
929 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
930 let mut dst_buf = [0u8; 39];
931
932 let mut inflate = Inflate::new();
933 let mut output_chunk = [0u8; 7];
934 let mut output_pos = 0;
935 for input in TEST_DATA3.chunks(3) {
936 let mut repeat = false;
937 loop {
938 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
939 match ret {
940 Ok(len) => {
941 for i in 0..len {
942 dst_buf[output_pos + i] = output_chunk[i];
943 }
944 output_pos += len;
945 break;
946 },
947 Err(DecompressError::ShortData) => {
948 break;
949 },
950 Err(DecompressError::OutputFull) => {
951 repeat = true;
952 for i in 0..output_chunk.len() {
953 dst_buf[output_pos + i] = output_chunk[i];
954 }
955 output_pos += output_chunk.len();
956 },
957 _ => {
958 panic!("decompress error {:?}", ret.err().unwrap());
959 },
960 }
961 }
962 }
963
964 assert_eq!(output_pos, dst_buf.len());
965 for i in 0..output_pos {
966 assert_eq!(dst_buf[i], TEST_REF3[i]);
967 }
968 }
969 #[test]
970 fn test_inflate3() {
971 const TEST_DATA: &[u8] = &[
972 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
973 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
974 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
975 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
976 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
977 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
978 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
979 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
980 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
981 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
982 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
983 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
984 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
985 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
986 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
987 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
988 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
989 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
990 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
991 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
992 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
993 ];
994
995 let mut mr = MemoryReader::new_read(TEST_DATA);
996 let mut br = ByteReader::new(&mut mr);
997 let _dst_buf = gzip_decode(&mut br, false).unwrap();
998
999 // println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
1000 }
1001 }