core: add a new (de)compression module
[nihav.git] / nihav-core / src / compr / deflate.rs
1 //! Deflate format (RFC 1951) support.
2 //!
3 //! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`].
4 //!
5 //! [`Inflate`]: ./struct.Inflate.html
6 //! [`gzip_decode`]: ./fn.gzip_decode.html
7 //!
8 //! # Examples
9 //!
10 //! Decompressing full input buffer into sufficiently large output buffer:
11 //! ```
12 //! # use nihav_core::compr::DecompressError;
13 //! use nihav_core::compr::deflate::Inflate;
14 //!
15 //! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
16 //! # let mut output_buffer = [0u8; 16];
17 //! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
18 //! # Ok(())
19 //! # }
20 //! ```
21 //!
22 //! Decompressing input chunks into portions of output:
23 //! ```
24 //! use nihav_core::compr::DecompressError;
25 //! use nihav_core::compr::deflate::Inflate;
26 //!
27 //! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
28 //! let mut inflate = Inflate::new();
29 //! let mut dst_buf: Vec<u8> = Vec::new();
30 //! let mut output_chunk = [0u8; 1024];
31 //! for src in input_data.chunks(512) {
32 //! let mut repeat = false;
33 //! loop {
34 //! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
35 //! match ret {
36 //! Ok(len) => { // we got a buffer decoded successfully to the end
37 //! dst_buf.extend_from_slice(&output_chunk[..len]);
38 //! break;
39 //! },
40 //! Err(DecompressError::ShortData) => { // this block of data was fully read
41 //! break;
42 //! },
43 //! Err(DecompressError::OutputFull) => {
44 //! // the output buffer is full, flush it and continue decoding the same block
45 //! repeat = true;
46 //! dst_buf.extend_from_slice(&output_chunk);
47 //! },
48 //! Err(err) => {
49 //! return Err(err);
50 //! },
51 //! }
52 //! }
53 //! }
54 //! # Ok(())
55 //! # }
56 //! ```
57
58 use crate::io::byteio::*;
59 use crate::io::bitreader::*;
60 use crate::io::codebook::*;
61 use super::*;
62
63 const NUM_LITERALS: usize = 287;
64 const NUM_DISTS: usize = 32;
65
66 struct FixedLenCodeReader {}
67
68 impl CodebookDescReader<u16> for FixedLenCodeReader {
69 fn bits(&mut self, idx: usize) -> u8 {
70 if idx < 144 { 8 }
71 else if idx < 256 { 9 }
72 else if idx < 280 { 7 }
73 else { 8 }
74 }
75 fn code(&mut self, idx: usize) -> u32 {
76 let base = idx as u32;
77 let bits = self.bits(idx);
78 if idx < 144 { reverse_bits(base + 0x30, bits) }
79 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
80 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
81 else { reverse_bits(base + 0xC0 - 280, bits) }
82 }
83 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
84 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
85 }
86
87 #[derive(Clone,Copy,Default)]
88 struct BitReaderState {
89 pos: usize,
90 bitbuf: u32,
91 bits: u8,
92 }
93
94 struct CurrentSource<'a> {
95 src: &'a [u8],
96 br: BitReaderState,
97 }
98
99 impl<'a> CurrentSource<'a> {
100 fn new(src: &'a [u8], br: BitReaderState) -> Self {
101 let mut newsrc = Self { src, br };
102 newsrc.br.pos = 0;
103 newsrc.refill();
104 newsrc
105 }
106 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
107 let mut newsrc = Self { src, br };
108 newsrc.refill();
109 newsrc
110 }
111 fn refill(&mut self) {
112 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
113 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
114 self.br.bits += 8;
115 self.br.pos += 1;
116 }
117 }
118 fn skip_cache(&mut self, nbits: u8) {
119 self.br.bitbuf >>= nbits;
120 self.br.bits -= nbits;
121 }
122 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
123 if nbits == 0 { return Ok(0); }
124 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
125 if self.br.bits < nbits {
126 self.refill();
127 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
128 }
129 let ret = self.br.bitbuf & ((1 << nbits) - 1);
130 self.skip_cache(nbits);
131 Ok(ret)
132 }
133 fn read_bool(&mut self) -> BitReaderResult<bool> {
134 if self.br.bits == 0 {
135 self.refill();
136 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
137 }
138 let ret = (self.br.bitbuf & 1) != 0;
139 self.skip_cache(1);
140 Ok(ret)
141 }
142 fn peek(&mut self, nbits: u8) -> u32 {
143 if nbits == 0 || nbits > 16 { return 0; }
144 if self.br.bits < nbits {
145 self.refill();
146 }
147 self.br.bitbuf & ((1 << nbits) - 1)
148 }
149 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
150 if u32::from(self.br.bits) >= nbits {
151 self.skip_cache(nbits as u8);
152 } else {
153 unreachable!();
154 }
155 Ok(())
156 }
157 fn align(&mut self) {
158 let b = self.br.bits & 7;
159 if b != 0 {
160 self.skip_cache(8 - (b as u8));
161 }
162 }
163 fn left(&self) -> isize {
164 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
165 }
166 }
167
168 impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
169 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
170 let mut esc = true;
171 let mut idx = 0;
172 let mut lut_bits = cb.lut_bits;
173 let orig_br = self.br;
174 while esc {
175 let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize);
176 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
177 let bits = cb.table[lut_idx] & 0x7F;
178 esc = (cb.table[lut_idx] & 0x80) != 0;
179 idx = (cb.table[lut_idx] >> 8) as usize;
180 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
181 if (skip_bits as isize) > self.left() {
182 self.br = orig_br;
183 self.refill();
184 return Err(CodebookError::MemoryError);
185 }
186 self.skip(skip_bits as u32).unwrap();
187 lut_bits = bits as u8;
188 }
189 Ok(cb.syms[idx])
190 }
191 }
192
193 enum InflateState {
194 Start,
195 BlockStart,
196 BlockMode,
197 StaticBlockLen,
198 StaticBlockInvLen(u32),
199 StaticBlockCopy(usize),
200 FixedBlock,
201 FixedBlockLengthExt(usize, u8),
202 FixedBlockDist(usize),
203 FixedBlockDistExt(usize, usize, u8),
204 FixedBlockCopy(usize, usize),
205 FixedBlockLiteral(u8),
206 DynBlockHlit,
207 DynBlockHdist,
208 DynBlockHclen,
209 DynLengths(usize),
210 DynCodeLengths,
211 DynCodeLengthsAdd(usize),
212 DynBlock,
213 DynBlockLengthExt(usize, u8),
214 DynBlockDist(usize),
215 DynBlockDistExt(usize, usize, u8),
216 DynCopy(usize, usize),
217 DynBlockLiteral(u8),
218 End,
219 }
220
221 ///! The decompressor for deflated streams (RFC 1951).
222 pub struct Inflate {
223 br: BitReaderState,
224 fix_len_cb: Codebook<u16>,
225
226 buf: [u8; 65536],
227 bpos: usize,
228 output_idx: usize,
229
230 state: InflateState,
231 final_block: bool,
232 hlit: usize,
233 hdist: usize,
234 dyn_len_cb: Option<Codebook<u32>>,
235 dyn_lit_cb: Option<Codebook<u32>>,
236 dyn_dist_cb: Option<Codebook<u32>>,
237 len_lengths: [u8; 19],
238 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
239 cur_len_idx: usize,
240 }
241
242 const LENGTH_ADD_BITS: [u8; 29] = [
243 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
244 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
245 4, 4, 4, 4, 5, 5, 5, 5, 0
246 ];
247 const LENGTH_BASE: [u16; 29] = [
248 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
249 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
250 67, 83, 99, 115, 131, 163, 195, 227, 258
251 ];
252 const DIST_ADD_BITS: [u8; 30] = [
253 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
254 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
255 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
256 ];
257 const DIST_BASE: [u16; 30] = [
258 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
259 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
260 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
261 ];
262 const LEN_RECODE: [usize; 19] = [
263 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
264 ];
265 const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
266 const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
267
268 macro_rules! read_bits {
269 ($self: expr, $csrc: expr, $bits: expr) => ({
270 if $csrc.left() < $bits as isize {
271 $self.br = $csrc.br;
272 return Err(DecompressError::ShortData);
273 }
274 $csrc.read($bits).unwrap()
275 })
276 }
277
278 macro_rules! read_cb {
279 ($self: expr, $csrc: expr, $cb: expr) => ({
280 let ret = $csrc.read_cb($cb);
281 if let Err(CodebookError::MemoryError) = ret {
282 $self.br = $csrc.br;
283 return Err(DecompressError::ShortData);
284 }
285 match ret {
286 Ok(val) => val,
287 Err(_) => {
288 $self.state = InflateState::End;
289 return Err(DecompressError::InvalidData);
290 },
291 }
292 })
293 }
294
295 impl Inflate {
296 ///! Creates a new instance of `Inflate` struct.
297 pub fn new() -> Self {
298 let mut cr = FixedLenCodeReader {};
299 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
300 Self {
301 br: BitReaderState::default(),
302 fix_len_cb,
303
304 buf: [0; 65536],
305 bpos: 0,
306 output_idx: 0,
307
308 state: InflateState::Start,
309 final_block: false,
310 dyn_len_cb: None,
311 dyn_lit_cb: None,
312 dyn_dist_cb: None,
313 hlit: 0,
314 hdist: 0,
315 len_lengths: [0; 19],
316 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
317 cur_len_idx: 0,
318 }
319 }
320 fn put_literal(&mut self, val: u8) {
321 self.buf[self.bpos] = val;
322 self.bpos += 1;
323 }
324 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
325 let mask = self.buf.len() - 1;
326 if self.bpos < offset {
327 return Err(DecompressError::InvalidData);
328 }
329 let cstart = (self.bpos - offset) & mask;
330 for i in 0..len {
331 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
332 dst[i] = self.buf[(cstart + i) & mask];
333 }
334 self.bpos += len;
335 Ok(())
336 }
337 ///! Reports whether decoder has finished decoding the input.
338 pub fn is_finished(&self) -> bool {
339 match self.state {
340 InflateState::End => true,
341 _ => false,
342 }
343 }
344 ///! Reports the current amount of bytes output into the destination buffer after the last run.
345 pub fn get_current_output_size(&self) -> usize { self.output_idx }
346 ///! Reports the total amount of bytes decoded so far.
347 pub fn get_total_output_size(&self) -> usize { self.bpos }
348 ///! Tries to decompress input data and write it to the output buffer.
349 ///!
350 ///! Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
351 ///! * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
352 ///! * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
353 ///! * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
354 ///!
355 ///! [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
356 ///! [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
357 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
358 if src.len() == 0 || dst.len() == 0 {
359 return Err(DecompressError::InvalidArgument);
360 }
361 let mut csrc = if !continue_block {
362 CurrentSource::new(src, self.br)
363 } else {
364 self.output_idx = 0;
365 CurrentSource::reinit(src, self.br)
366 };
367 'main: loop {
368 match self.state {
369 InflateState::Start | InflateState::BlockStart => {
370 if csrc.left() == 0 {
371 self.br = csrc.br;
372 return Err(DecompressError::ShortData);
373 }
374 self.final_block = csrc.read_bool().unwrap();
375 self.state = InflateState::BlockMode;
376 },
377 InflateState::BlockMode => {
378 let bmode = read_bits!(self, csrc, 2);
379 match bmode {
380 0 => {
381 csrc.align();
382 self.state = InflateState::StaticBlockLen;
383 },
384 1 => { self.state = InflateState::FixedBlock; },
385 2 => { self.state = InflateState::DynBlockHlit; },
386 _ => {
387 self.state = InflateState::End;
388 return Err(DecompressError::InvalidHeader);
389 },
390 };
391 },
392 InflateState::StaticBlockLen => {
393 let len = read_bits!(self, csrc, 16);
394 self.state = InflateState::StaticBlockInvLen(len);
395 },
396 InflateState::StaticBlockInvLen(len) => {
397 let inv_len = read_bits!(self, csrc, 16);
398 if len != !inv_len {
399 self.state = InflateState::End;
400 return Err(DecompressError::InvalidHeader);
401 }
402 self.state = InflateState::StaticBlockCopy(len as usize);
403 },
404 InflateState::StaticBlockCopy(len) => {
405 for i in 0..len {
406 if csrc.left() < 8 {
407 self.br = csrc.br;
408 self.state = InflateState::StaticBlockCopy(len - i);
409 return Err(DecompressError::ShortData);
410 }
411 let val = csrc.read(8).unwrap() as u8;
412 self.put_literal(val);
413 }
414 self.state = InflateState::BlockStart;
415 }
416 InflateState::FixedBlock => {
417 let val = read_cb!(self, csrc, &self.fix_len_cb);
418 if val < 256 {
419 if self.output_idx >= dst.len() {
420 self.br = csrc.br;
421 self.state = InflateState::FixedBlockLiteral(val as u8);
422 return Err(DecompressError::OutputFull);
423 }
424 self.put_literal(val as u8);
425 dst[self.output_idx] = val as u8;
426 self.output_idx += 1;
427 } else if val == 256 {
428 if self.final_block {
429 self.state = InflateState::End;
430 return Ok(self.output_idx);
431 } else {
432 self.state = InflateState::BlockStart;
433 }
434 } else {
435 let len_idx = (val - 257) as usize;
436 if len_idx >= LENGTH_BASE.len() {
437 self.state = InflateState::End;
438 return Err(DecompressError::InvalidData);
439 }
440 let len_bits = LENGTH_ADD_BITS[len_idx];
441 let add_base = LENGTH_BASE[len_idx] as usize;
442 if len_bits > 0 {
443 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
444 } else {
445 self.state = InflateState::FixedBlockDist(add_base);
446 }
447 }
448 },
449 InflateState::FixedBlockLiteral(sym) => {
450 if self.output_idx >= dst.len() {
451 self.br = csrc.br;
452 return Err(DecompressError::OutputFull);
453 }
454 self.put_literal(sym);
455 dst[self.output_idx] = sym;
456 self.output_idx += 1;
457 self.state = InflateState::FixedBlock;
458 },
459 InflateState::FixedBlockLengthExt(base, bits) => {
460 let add = read_bits!(self, csrc, bits) as usize;
461 self.state = InflateState::FixedBlockDist(base + add);
462 },
463 InflateState::FixedBlockDist(length) => {
464 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
465 if dist_idx >= DIST_BASE.len() {
466 self.state = InflateState::End;
467 return Err(DecompressError::InvalidData);
468 }
469 let dist_bits = DIST_ADD_BITS[dist_idx];
470 let dist_base = DIST_BASE[dist_idx] as usize;
471 if dist_bits == 0 {
472 self.state = InflateState::FixedBlockCopy(length, dist_base);
473 } else {
474 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
475 }
476 },
477 InflateState::FixedBlockDistExt(length, base, bits) => {
478 let add = read_bits!(self, csrc, bits) as usize;
479 self.state = InflateState::FixedBlockCopy(length, base + add);
480 },
481 InflateState::FixedBlockCopy(length, dist) => {
482 if self.output_idx + length > dst.len() {
483 let copy_size = dst.len() - self.output_idx;
484 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
485 if ret.is_err() {
486 self.state = InflateState::End;
487 return Err(DecompressError::InvalidData);
488 }
489 self.output_idx += copy_size;
490 self.br = csrc.br;
491 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
492 return Err(DecompressError::OutputFull);
493 }
494 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
495 if ret.is_err() {
496 self.state = InflateState::End;
497 return Err(DecompressError::InvalidData);
498 }
499 self.output_idx += length;
500 self.state = InflateState::FixedBlock;
501 }
502 InflateState::DynBlockHlit => {
503 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
504 if self.hlit >= 287 {
505 self.state = InflateState::End;
506 return Err(DecompressError::InvalidHeader);
507 }
508 self.state = InflateState::DynBlockHdist;
509 }
510 InflateState::DynBlockHdist => {
511 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
512 self.state = InflateState::DynBlockHclen;
513 },
514 InflateState::DynBlockHclen => {
515 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
516 self.cur_len_idx = 0;
517 self.len_lengths = [0; 19];
518 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
519 self.state = InflateState::DynLengths(hclen);
520 },
521 InflateState::DynLengths(len) => {
522 for i in 0..len {
523 if csrc.left() < 3 {
524 self.br = csrc.br;
525 self.state = InflateState::DynLengths(len - i);
526 return Err(DecompressError::ShortData);
527 }
528 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
529 self.cur_len_idx += 1;
530 }
531 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
532 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
533 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
534 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
535 if ret.is_err() {
536 self.state = InflateState::End;
537 return Err(DecompressError::InvalidHeader);
538 }
539 self.dyn_len_cb = Some(ret.unwrap());
540 self.cur_len_idx = 0;
541 self.state = InflateState::DynCodeLengths;
542 },
543 InflateState::DynCodeLengths => {
544 if let Some(ref len_cb) = self.dyn_len_cb {
545 while self.cur_len_idx < self.hlit + self.hdist {
546 let ret = csrc.read_cb(len_cb);
547 let val = match ret {
548 Ok(val) => val,
549 Err(CodebookError::MemoryError) => {
550 self.br = csrc.br;
551 return Err(DecompressError::ShortData);
552 },
553 Err(_) => {
554 self.state = InflateState::End;
555 return Err(DecompressError::InvalidHeader);
556 },
557 };
558 if val < 16 {
559 self.all_lengths[self.cur_len_idx] = val as u8;
560 self.cur_len_idx += 1;
561 } else {
562 let idx = (val as usize) - 16;
563 if idx > 2 {
564 self.state = InflateState::End;
565 return Err(DecompressError::InvalidHeader);
566 }
567 self.state = InflateState::DynCodeLengthsAdd(idx);
568 continue 'main;
569 }
570 }
571 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
572
573 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
574 lengths_to_codes(&lit_lengths, &mut lit_codes)?;
575 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
576 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
577 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
578 self.dyn_lit_cb = Some(ret.unwrap());
579
580 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
581 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
582 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
583 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
584 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
585 self.dyn_dist_cb = Some(ret.unwrap());
586
587 self.state = InflateState::DynBlock;
588 } else {
589 unreachable!();
590 }
591 },
592 InflateState::DynCodeLengthsAdd(mode) => {
593 let base = REPEAT_BASE[mode] as usize;
594 let bits = REPEAT_BITS[mode];
595 let len = base + read_bits!(self, csrc, bits) as usize;
596 if self.cur_len_idx + len > self.hlit + self.hdist {
597 self.state = InflateState::End;
598 return Err(DecompressError::InvalidHeader);
599 }
600 let rpt;
601 if mode == 0 {
602 if self.cur_len_idx == 0 {
603 self.state = InflateState::End;
604 return Err(DecompressError::InvalidHeader);
605 }
606 rpt = self.all_lengths[self.cur_len_idx - 1];
607 } else {
608 rpt = 0;
609 }
610 for _ in 0..len {
611 self.all_lengths[self.cur_len_idx] = rpt;
612 self.cur_len_idx += 1;
613 }
614 self.state = InflateState::DynCodeLengths;
615 },
616 InflateState::DynBlock => {
617 if let Some(ref lit_cb) = self.dyn_lit_cb {
618 let val = read_cb!(self, csrc, lit_cb);
619 if val < 256 {
620 if self.output_idx >= dst.len() {
621 self.br = csrc.br;
622 self.state = InflateState::DynBlockLiteral(val as u8);
623 return Err(DecompressError::OutputFull);
624 }
625 self.put_literal(val as u8);
626 dst[self.output_idx] = val as u8;
627 self.output_idx += 1;
628 } else if val == 256 {
629 if self.final_block {
630 self.state = InflateState::End;
631 return Ok(self.output_idx);
632 } else {
633 self.state = InflateState::BlockStart;
634 }
635 } else {
636 let len_idx = (val - 257) as usize;
637 if len_idx >= LENGTH_BASE.len() {
638 self.state = InflateState::End;
639 return Err(DecompressError::InvalidData);
640 }
641 let len_bits = LENGTH_ADD_BITS[len_idx];
642 let add_base = LENGTH_BASE[len_idx] as usize;
643 if len_bits > 0 {
644 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
645 } else {
646 self.state = InflateState::DynBlockDist(add_base);
647 }
648 }
649 } else {
650 unreachable!();
651 }
652 },
653 InflateState::DynBlockLiteral(sym) => {
654 if self.output_idx >= dst.len() {
655 self.br = csrc.br;
656 return Err(DecompressError::OutputFull);
657 }
658 self.put_literal(sym);
659 dst[self.output_idx] = sym;
660 self.output_idx += 1;
661 self.state = InflateState::DynBlock;
662 },
663 InflateState::DynBlockLengthExt(base, bits) => {
664 let add = read_bits!(self, csrc, bits) as usize;
665 self.state = InflateState::DynBlockDist(base + add);
666 },
667 InflateState::DynBlockDist(length) => {
668 if let Some(ref dist_cb) = self.dyn_dist_cb {
669 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
670 if dist_idx >= DIST_BASE.len() {
671 self.state = InflateState::End;
672 return Err(DecompressError::InvalidData);
673 }
674 let dist_bits = DIST_ADD_BITS[dist_idx];
675 let dist_base = DIST_BASE[dist_idx] as usize;
676 if dist_bits == 0 {
677 self.state = InflateState::DynCopy(length, dist_base);
678 } else {
679 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
680 }
681 } else {
682 unreachable!();
683 }
684 },
685 InflateState::DynBlockDistExt(length, base, bits) => {
686 let add = read_bits!(self, csrc, bits) as usize;
687 self.state = InflateState::DynCopy(length, base + add);
688 },
689 InflateState::DynCopy(length, dist) => {
690 if self.output_idx + length > dst.len() {
691 let copy_size = dst.len() - self.output_idx;
692 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
693 if ret.is_err() {
694 self.state = InflateState::End;
695 return Err(DecompressError::InvalidData);
696 }
697 self.output_idx += copy_size;
698 self.br = csrc.br;
699 self.state = InflateState::DynCopy(length - copy_size, dist);
700 return Err(DecompressError::OutputFull);
701 }
702 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
703 if ret.is_err() {
704 self.state = InflateState::End;
705 return Err(DecompressError::InvalidData);
706 }
707 self.output_idx += length;
708 self.state = InflateState::DynBlock;
709 }
710 InflateState::End => {
711 return Ok(0);
712 },
713 }
714 }
715 }
716 ///! Decompresses input data into output returning the uncompressed data length.
717 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
718 let mut inflate = Self::new();
719 inflate.decompress_data(src, dst, false)
720 }
721 }
722
723 fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
724 let mut bits = [0u32; 32];
725 let mut pfx = [0u32; 33];
726 for len in lens.iter() {
727 let len = *len as usize;
728 if len >= bits.len() {
729 return Err(DecompressError::InvalidHeader);
730 }
731 bits[len] += 1;
732 }
733 bits[0] = 0;
734 let mut code = 0;
735 for i in 0..bits.len() {
736 code = (code + bits[i]) << 1;
737 pfx[i + 1] = code;
738 }
739
740 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
741 let len = *len as usize;
742 if len != 0 {
743 let bits = len as u8;
744 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
745 pfx[len] += 1;
746 } else {
747 *codes = ShortCodebookDesc { code: 0, bits: 0 };
748 }
749 }
750
751 Ok(())
752 }
753
754 struct GzipCRC32 {
755 tab: [u32; 256],
756 crc: u32,
757 }
758
759 impl GzipCRC32 {
760 fn new() -> Self {
761 let mut tab = [0u32; 256];
762 for i in 0..256 {
763 let mut c = i as u32;
764 for _ in 0..8 {
765 if (c & 1) != 0 {
766 c = 0xEDB88320 ^ (c >> 1);
767 } else {
768 c >>= 1;
769 }
770 }
771 tab[i] = c;
772 }
773 Self { tab, crc: 0 }
774 }
775 fn update_crc(&mut self, src: &[u8]) {
776 let mut c = !self.crc;
777 for el in src.iter() {
778 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
779 }
780 self.crc = !c;
781 }
782 }
783
784 ///! Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
785 pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
786 const FLAG_HCRC: u8 = 0x02;
787 const FLAG_EXTRA: u8 = 0x04;
788 const FLAG_NAME: u8 = 0x08;
789 const FLAG_COMMENT: u8 = 0x10;
790
791 let id1 = br.read_byte()?;
792 let id2 = br.read_byte()?;
793 let cm = br.read_byte()?;
794 let flg = br.read_byte()?;
795 let _mtime = br.read_u32le()?;
796 let _xfl = br.read_byte()?;
797 let _os = br.read_byte()?;
798 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
799 return Err(DecompressError::InvalidHeader);
800 }
801
802 if (flg & FLAG_EXTRA) != 0 {
803 let xlen = br.read_u16le()? as usize;
804 br.read_skip(xlen)?;
805 }
806 if (flg & FLAG_NAME) != 0 {
807 loop {
808 let b = br.read_byte()?;
809 if b == 0 {
810 break;
811 }
812 }
813 }
814 if (flg & FLAG_COMMENT) != 0 {
815 loop {
816 let b = br.read_byte()?;
817 if b == 0 {
818 break;
819 }
820 }
821 }
822 let _hcrc = if (flg & FLAG_HCRC) != 0 {
823 br.read_u16le()?
824 } else {
825 0
826 };
827 if (flg & 0xE0) != 0 {
828 return Err(DecompressError::Unsupported);
829 }
830
831 let mut output: Vec<u8> = Vec::new();
832 let mut tail = [0u8; 8];
833 let mut inblk = [0u8; 1024];
834 let mut oblk = [0u8; 4096];
835 let mut inflate = Inflate::new();
836 let mut checker = GzipCRC32::new();
837
838 loop {
839 let ret = br.read_buf_some(&mut inblk);
840 if let Err(ByteIOError::EOF) = ret {
841 break;
842 }
843 let inlen = match ret {
844 Ok(val) => val,
845 Err(_) => return Err(DecompressError::IOError),
846 };
847 let mut repeat = false;
848 loop {
849 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
850 match ret {
851 Ok(outlen) => {
852 checker.update_crc(&oblk[..outlen]);
853 output.extend_from_slice(&oblk[..outlen]);
854 break;
855 },
856 Err(DecompressError::ShortData) => {
857 break;
858 },
859 Err(DecompressError::OutputFull) => {
860 repeat = true;
861 checker.update_crc(&oblk);
862 output.extend_from_slice(&oblk);
863 },
864 Err(err) => {
865 return Err(err);
866 },
867 }
868 }
869 // Save last 8 bytes for CRC and size.
870 if inlen >= 8 {
871 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
872 } else {
873 let shift_len = 8 - inlen;
874 for i in 0..shift_len {
875 tail[i] = tail[i + inlen];
876 }
877 for i in shift_len..8 {
878 tail[i] = inblk[i - shift_len];
879 }
880 }
881 }
882 if !skip_crc {
883 if !inflate.is_finished() { println!("???"); }
884 let crc = read_u32le(&tail[0..4])?;
885 let size = read_u32le(&tail[4..8])?;
886 if size != (output.len() as u32) {
887 return Err(DecompressError::CRCError);
888 }
889 if crc != checker.crc {
890 return Err(DecompressError::CRCError);
891 }
892 }
893
894 Ok(output)
895 }
896
897 #[cfg(test)]
898 mod test {
899 use super::*;
900
901 #[test]
902 fn test_inflate1() {
903 const TEST_DATA: &[u8] = &[
904 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
905 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
906 const TEST_REF: &[u8] = b"Hello, world!";
907 let mut dst_buf = [0u8; 13];
908 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
909 assert_eq!(len, 13);
910 for i in 0..len {
911 assert_eq!(dst_buf[i], TEST_REF[i]);
912 }
913 }
914 #[test]
915 fn test_inflate2() {
916 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
917 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
918 let mut dst_buf = [0u8; 39];
919
920 let mut inflate = Inflate::new();
921 let mut output_chunk = [0u8; 7];
922 let mut output_pos = 0;
923 for input in TEST_DATA3.chunks(3) {
924 let mut repeat = false;
925 loop {
926 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
927 match ret {
928 Ok(len) => {
929 for i in 0..len {
930 dst_buf[output_pos + i] = output_chunk[i];
931 }
932 output_pos += len;
933 break;
934 },
935 Err(DecompressError::ShortData) => {
936 break;
937 },
938 Err(DecompressError::OutputFull) => {
939 repeat = true;
940 for i in 0..output_chunk.len() {
941 dst_buf[output_pos + i] = output_chunk[i];
942 }
943 output_pos += output_chunk.len();
944 },
945 _ => {
946 panic!("decompress error {:?}", ret.err().unwrap());
947 },
948 }
949 }
950 }
951
952 assert_eq!(output_pos, dst_buf.len());
953 for i in 0..output_pos {
954 assert_eq!(dst_buf[i], TEST_REF3[i]);
955 }
956 }
957 #[test]
958 fn test_inflate3() {
959 const TEST_DATA: &[u8] = &[
960 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
961 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
962 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
963 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
964 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
965 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
966 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
967 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
968 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
969 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
970 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
971 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
972 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
973 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
974 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
975 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
976 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
977 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
978 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
979 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
980 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
981 ];
982
983 let mut mr = MemoryReader::new_read(TEST_DATA);
984 let mut br = ByteReader::new(&mut mr);
985 let _dst_buf = gzip_decode(&mut br, false).unwrap();
986
987 // println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
988 }
989 }