core/compr: make a faster version of Inflate::uncompress()
[nihav.git] / nihav-core / src / compr / deflate.rs
CommitLineData
0443d0c5
KS
1//! Deflate format (RFC 1951) support.
2//!
f8cdb949 3//! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`] and compressing raw or zlib streams via [`Deflate`].
0443d0c5 4//!
f8cdb949 5//! [`Deflate`]: ./struct.Deflate.html
0443d0c5
KS
6//! [`Inflate`]: ./struct.Inflate.html
7//! [`gzip_decode`]: ./fn.gzip_decode.html
8//!
9//! # Examples
10//!
11//! Decompressing full input buffer into sufficiently large output buffer:
12//! ```
13//! # use nihav_core::compr::DecompressError;
14//! use nihav_core::compr::deflate::Inflate;
15//!
16//! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
17//! # let mut output_buffer = [0u8; 16];
18//! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
19//! # Ok(())
20//! # }
21//! ```
22//!
23//! Decompressing input chunks into portions of output:
24//! ```
25//! use nihav_core::compr::DecompressError;
26//! use nihav_core::compr::deflate::Inflate;
27//!
28//! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
29//! let mut inflate = Inflate::new();
30//! let mut dst_buf: Vec<u8> = Vec::new();
31//! let mut output_chunk = [0u8; 1024];
32//! for src in input_data.chunks(512) {
33//! let mut repeat = false;
34//! loop {
35//! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
36//! match ret {
37//! Ok(len) => { // we got a buffer decoded successfully to the end
38//! dst_buf.extend_from_slice(&output_chunk[..len]);
39//! break;
40//! },
41//! Err(DecompressError::ShortData) => { // this block of data was fully read
42//! break;
43//! },
44//! Err(DecompressError::OutputFull) => {
45//! // the output buffer is full, flush it and continue decoding the same block
46//! repeat = true;
47//! dst_buf.extend_from_slice(&output_chunk);
48//! },
49//! Err(err) => {
50//! return Err(err);
51//! },
52//! }
53//! }
54//! }
55//! # Ok(())
56//! # }
57//! ```
f8cdb949
KS
58//!
59//! Compressing input buffer into zlib stream:
60//! ```
61//! use nihav_core::compr::deflate::{Deflate, DeflateMode, DeflateWriter};
62//!
63//! # fn compress(input: &[u8]) {
64//! let output = Vec::with_capacity(input.len() * 65540 / 65535 + 6);
65//! let mut writer = DeflateWriter::new(output);
66//! let mut compr = Deflate::new(DeflateMode::Fast);
67//! compr.write_zlib_header(&mut writer);
68//! compr.compress(input, &mut writer);
69//! compr.compress_end(&mut writer);
70//! let output = writer.end();
71//! # }
72//! ```
0443d0c5 73
8570a0b3 74use crate::options::NAOptionDefinitionType;
0443d0c5
KS
75use crate::io::byteio::*;
76use crate::io::bitreader::*;
77use crate::io::codebook::*;
78use super::*;
79
80const NUM_LITERALS: usize = 287;
81const NUM_DISTS: usize = 32;
82
83struct FixedLenCodeReader {}
84
85impl CodebookDescReader<u16> for FixedLenCodeReader {
86 fn bits(&mut self, idx: usize) -> u8 {
87 if idx < 144 { 8 }
88 else if idx < 256 { 9 }
89 else if idx < 280 { 7 }
90 else { 8 }
91 }
b36f412c 92 #[allow(clippy::identity_op)]
0443d0c5
KS
93 fn code(&mut self, idx: usize) -> u32 {
94 let base = idx as u32;
95 let bits = self.bits(idx);
96 if idx < 144 { reverse_bits(base + 0x30, bits) }
97 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
98 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
99 else { reverse_bits(base + 0xC0 - 280, bits) }
100 }
101 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
102 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
103}
104
105#[derive(Clone,Copy,Default)]
106struct BitReaderState {
107 pos: usize,
108 bitbuf: u32,
109 bits: u8,
110}
111
112struct CurrentSource<'a> {
113 src: &'a [u8],
114 br: BitReaderState,
115}
116
117impl<'a> CurrentSource<'a> {
118 fn new(src: &'a [u8], br: BitReaderState) -> Self {
119 let mut newsrc = Self { src, br };
120 newsrc.br.pos = 0;
121 newsrc.refill();
122 newsrc
123 }
124 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
125 let mut newsrc = Self { src, br };
126 newsrc.refill();
127 newsrc
128 }
129 fn refill(&mut self) {
130 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
131 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
132 self.br.bits += 8;
133 self.br.pos += 1;
134 }
135 }
136 fn skip_cache(&mut self, nbits: u8) {
137 self.br.bitbuf >>= nbits;
138 self.br.bits -= nbits;
139 }
140 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
141 if nbits == 0 { return Ok(0); }
142 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
143 if self.br.bits < nbits {
144 self.refill();
145 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
146 }
147 let ret = self.br.bitbuf & ((1 << nbits) - 1);
148 self.skip_cache(nbits);
149 Ok(ret)
150 }
151 fn read_bool(&mut self) -> BitReaderResult<bool> {
152 if self.br.bits == 0 {
153 self.refill();
154 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
155 }
156 let ret = (self.br.bitbuf & 1) != 0;
157 self.skip_cache(1);
158 Ok(ret)
159 }
160 fn peek(&mut self, nbits: u8) -> u32 {
161 if nbits == 0 || nbits > 16 { return 0; }
162 if self.br.bits < nbits {
163 self.refill();
164 }
165 self.br.bitbuf & ((1 << nbits) - 1)
166 }
167 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
168 if u32::from(self.br.bits) >= nbits {
169 self.skip_cache(nbits as u8);
170 } else {
171 unreachable!();
172 }
173 Ok(())
174 }
7bf82b33
KS
175 fn skip_bytes(&mut self, nbytes: usize) -> BitReaderResult<()> {
176 self.align();
177 let cached = usize::from(self.br.bits / 8);
178 if nbytes <= cached {
179 self.skip((nbytes as u32) * 8)?;
180 } else {
181 self.skip((cached as u32) * 8)?;
182 self.br.bits = 0;
183 self.br.bitbuf = 0;
184 self.br.pos += nbytes - cached;
185 if self.br.pos > self.src.len() {
186 return Err(BitReaderError::BitstreamEnd);
187 }
188 self.refill();
189 }
190 Ok(())
191 }
0443d0c5
KS
192 fn align(&mut self) {
193 let b = self.br.bits & 7;
194 if b != 0 {
11f1d51e 195 self.skip_cache(b);
0443d0c5
KS
196 }
197 }
198 fn left(&self) -> isize {
199 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
200 }
7bf82b33
KS
201 fn tell(&self) -> usize {
202 self.br.pos - usize::from(self.br.bits / 8)
203 }
0443d0c5
KS
204}
205
206impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
207 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
208 let mut esc = true;
209 let mut idx = 0;
210 let mut lut_bits = cb.lut_bits;
211 let orig_br = self.br;
212 while esc {
213 let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize);
214 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
215 let bits = cb.table[lut_idx] & 0x7F;
216 esc = (cb.table[lut_idx] & 0x80) != 0;
217 idx = (cb.table[lut_idx] >> 8) as usize;
218 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
219 if (skip_bits as isize) > self.left() {
220 self.br = orig_br;
221 self.refill();
222 return Err(CodebookError::MemoryError);
223 }
224 self.skip(skip_bits as u32).unwrap();
225 lut_bits = bits as u8;
226 }
227 Ok(cb.syms[idx])
228 }
229}
230
231enum InflateState {
232 Start,
233 BlockStart,
234 BlockMode,
235 StaticBlockLen,
236 StaticBlockInvLen(u32),
237 StaticBlockCopy(usize),
238 FixedBlock,
239 FixedBlockLengthExt(usize, u8),
240 FixedBlockDist(usize),
241 FixedBlockDistExt(usize, usize, u8),
242 FixedBlockCopy(usize, usize),
243 FixedBlockLiteral(u8),
244 DynBlockHlit,
245 DynBlockHdist,
246 DynBlockHclen,
247 DynLengths(usize),
248 DynCodeLengths,
249 DynCodeLengthsAdd(usize),
250 DynBlock,
251 DynBlockLengthExt(usize, u8),
252 DynBlockDist(usize),
253 DynBlockDistExt(usize, usize, u8),
254 DynCopy(usize, usize),
255 DynBlockLiteral(u8),
256 End,
257}
258
259///! The decompressor for deflated streams (RFC 1951).
260pub struct Inflate {
261 br: BitReaderState,
262 fix_len_cb: Codebook<u16>,
263
264 buf: [u8; 65536],
265 bpos: usize,
266 output_idx: usize,
37d96dbb 267 full_pos: usize,
0443d0c5
KS
268
269 state: InflateState,
270 final_block: bool,
271 hlit: usize,
272 hdist: usize,
273 dyn_len_cb: Option<Codebook<u32>>,
274 dyn_lit_cb: Option<Codebook<u32>>,
275 dyn_dist_cb: Option<Codebook<u32>>,
276 len_lengths: [u8; 19],
277 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
278 cur_len_idx: usize,
279}
280
281const LENGTH_ADD_BITS: [u8; 29] = [
282 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
283 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
284 4, 4, 4, 4, 5, 5, 5, 5, 0
285];
286const LENGTH_BASE: [u16; 29] = [
287 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
288 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
289 67, 83, 99, 115, 131, 163, 195, 227, 258
290];
291const DIST_ADD_BITS: [u8; 30] = [
292 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
293 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
294 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
295];
296const DIST_BASE: [u16; 30] = [
297 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
298 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
299 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
300];
301const LEN_RECODE: [usize; 19] = [
302 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
303];
304const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
305const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
306
307macro_rules! read_bits {
308 ($self: expr, $csrc: expr, $bits: expr) => ({
309 if $csrc.left() < $bits as isize {
310 $self.br = $csrc.br;
311 return Err(DecompressError::ShortData);
312 }
313 $csrc.read($bits).unwrap()
314 })
315}
316
317macro_rules! read_cb {
318 ($self: expr, $csrc: expr, $cb: expr) => ({
319 let ret = $csrc.read_cb($cb);
320 if let Err(CodebookError::MemoryError) = ret {
321 $self.br = $csrc.br;
322 return Err(DecompressError::ShortData);
323 }
324 match ret {
325 Ok(val) => val,
326 Err(_) => {
327 $self.state = InflateState::End;
328 return Err(DecompressError::InvalidData);
329 },
330 }
331 })
332}
333
334impl Inflate {
335 ///! Creates a new instance of `Inflate` struct.
336 pub fn new() -> Self {
337 let mut cr = FixedLenCodeReader {};
338 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
339 Self {
340 br: BitReaderState::default(),
341 fix_len_cb,
342
343 buf: [0; 65536],
344 bpos: 0,
345 output_idx: 0,
37d96dbb 346 full_pos: 0,
0443d0c5
KS
347
348 state: InflateState::Start,
349 final_block: false,
350 dyn_len_cb: None,
351 dyn_lit_cb: None,
352 dyn_dist_cb: None,
353 hlit: 0,
354 hdist: 0,
355 len_lengths: [0; 19],
356 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
357 cur_len_idx: 0,
358 }
359 }
360 fn put_literal(&mut self, val: u8) {
361 self.buf[self.bpos] = val;
c96ad969 362 self.bpos = (self.bpos + 1) & (self.buf.len() - 1);
37d96dbb 363 self.full_pos += 1;
0443d0c5
KS
364 }
365 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
366 let mask = self.buf.len() - 1;
37d96dbb 367 if offset > self.full_pos {
0443d0c5
KS
368 return Err(DecompressError::InvalidData);
369 }
c96ad969 370 let cstart = (self.bpos.wrapping_sub(offset)) & mask;
0443d0c5
KS
371 for i in 0..len {
372 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
373 dst[i] = self.buf[(cstart + i) & mask];
374 }
c96ad969 375 self.bpos = (self.bpos + len) & mask;
37d96dbb 376 self.full_pos += len;
0443d0c5
KS
377 Ok(())
378 }
bc23de6b
KS
379 ///! Sets custom history for decoding an update for already decoded data.
380 pub fn set_dict(&mut self, dict: &[u8]) {
381 let len = dict.len().min(self.buf.len());
382 let start = dict.len() - len;
383 self.buf[..len].copy_from_slice(&dict[start..]);
384 self.bpos = len;
385 self.full_pos = len;
386 }
0443d0c5
KS
387 ///! Reports whether decoder has finished decoding the input.
388 pub fn is_finished(&self) -> bool {
6f263099 389 matches!(self.state, InflateState::End)
0443d0c5
KS
390 }
391 ///! Reports the current amount of bytes output into the destination buffer after the last run.
392 pub fn get_current_output_size(&self) -> usize { self.output_idx }
393 ///! Reports the total amount of bytes decoded so far.
394 pub fn get_total_output_size(&self) -> usize { self.bpos }
395 ///! Tries to decompress input data and write it to the output buffer.
396 ///!
397 ///! Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
398 ///! * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
399 ///! * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
400 ///! * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
401 ///!
402 ///! [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
403 ///! [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
404 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
de161d26
KS
405 self.decompress_data_internal(src, dst, continue_block, false)
406 }
407 ///! Tries to decompress whole input chunk to the output buffer.
408 pub fn decompress_block(&mut self, src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
409 self.decompress_data_internal(src, dst, false, true)
410 }
411 #[allow(clippy::comparison_chain)]
412 fn decompress_data_internal(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool, do_one_block: bool) -> DecompressResult<usize> {
b36f412c 413 if src.is_empty() || dst.is_empty() {
0443d0c5
KS
414 return Err(DecompressError::InvalidArgument);
415 }
416 let mut csrc = if !continue_block {
417 CurrentSource::new(src, self.br)
418 } else {
419 self.output_idx = 0;
420 CurrentSource::reinit(src, self.br)
421 };
de161d26
KS
422 if do_one_block {
423 self.output_idx = 0;
424 }
0c9b706a
KS
425 // check for zlib stream header
426 if let (&InflateState::Start, true) = (&self.state, src.len() > 2) {
427 let cm = src[0] & 0xF;
428 let cinfo = src[0] >> 4;
429 let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]);
430 if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 {
431 csrc.skip(16).unwrap();
432 }
433 }
0443d0c5
KS
434 'main: loop {
435 match self.state {
436 InflateState::Start | InflateState::BlockStart => {
437 if csrc.left() == 0 {
de161d26
KS
438 if do_one_block {
439 return Ok(self.output_idx);
440 }
0443d0c5
KS
441 self.br = csrc.br;
442 return Err(DecompressError::ShortData);
443 }
444 self.final_block = csrc.read_bool().unwrap();
445 self.state = InflateState::BlockMode;
446 },
447 InflateState::BlockMode => {
448 let bmode = read_bits!(self, csrc, 2);
449 match bmode {
450 0 => {
451 csrc.align();
452 self.state = InflateState::StaticBlockLen;
453 },
454 1 => { self.state = InflateState::FixedBlock; },
455 2 => { self.state = InflateState::DynBlockHlit; },
456 _ => {
457 self.state = InflateState::End;
458 return Err(DecompressError::InvalidHeader);
459 },
460 };
461 },
462 InflateState::StaticBlockLen => {
463 let len = read_bits!(self, csrc, 16);
464 self.state = InflateState::StaticBlockInvLen(len);
465 },
466 InflateState::StaticBlockInvLen(len) => {
467 let inv_len = read_bits!(self, csrc, 16);
6130c973 468 if (len ^ inv_len) != 0xFFFF {
0443d0c5
KS
469 self.state = InflateState::End;
470 return Err(DecompressError::InvalidHeader);
471 }
472 self.state = InflateState::StaticBlockCopy(len as usize);
473 },
474 InflateState::StaticBlockCopy(len) => {
475 for i in 0..len {
476 if csrc.left() < 8 {
477 self.br = csrc.br;
478 self.state = InflateState::StaticBlockCopy(len - i);
479 return Err(DecompressError::ShortData);
480 }
481 let val = csrc.read(8).unwrap() as u8;
482 self.put_literal(val);
483 }
484 self.state = InflateState::BlockStart;
485 }
486 InflateState::FixedBlock => {
487 let val = read_cb!(self, csrc, &self.fix_len_cb);
488 if val < 256 {
489 if self.output_idx >= dst.len() {
490 self.br = csrc.br;
491 self.state = InflateState::FixedBlockLiteral(val as u8);
492 return Err(DecompressError::OutputFull);
493 }
494 self.put_literal(val as u8);
495 dst[self.output_idx] = val as u8;
496 self.output_idx += 1;
497 } else if val == 256 {
498 if self.final_block {
499 self.state = InflateState::End;
500 return Ok(self.output_idx);
501 } else {
502 self.state = InflateState::BlockStart;
503 }
504 } else {
505 let len_idx = (val - 257) as usize;
506 if len_idx >= LENGTH_BASE.len() {
507 self.state = InflateState::End;
508 return Err(DecompressError::InvalidData);
509 }
510 let len_bits = LENGTH_ADD_BITS[len_idx];
511 let add_base = LENGTH_BASE[len_idx] as usize;
512 if len_bits > 0 {
513 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
514 } else {
515 self.state = InflateState::FixedBlockDist(add_base);
516 }
517 }
518 },
519 InflateState::FixedBlockLiteral(sym) => {
520 if self.output_idx >= dst.len() {
521 self.br = csrc.br;
522 return Err(DecompressError::OutputFull);
523 }
524 self.put_literal(sym);
525 dst[self.output_idx] = sym;
526 self.output_idx += 1;
527 self.state = InflateState::FixedBlock;
528 },
529 InflateState::FixedBlockLengthExt(base, bits) => {
530 let add = read_bits!(self, csrc, bits) as usize;
531 self.state = InflateState::FixedBlockDist(base + add);
532 },
533 InflateState::FixedBlockDist(length) => {
534 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
535 if dist_idx >= DIST_BASE.len() {
536 self.state = InflateState::End;
537 return Err(DecompressError::InvalidData);
538 }
539 let dist_bits = DIST_ADD_BITS[dist_idx];
540 let dist_base = DIST_BASE[dist_idx] as usize;
541 if dist_bits == 0 {
542 self.state = InflateState::FixedBlockCopy(length, dist_base);
543 } else {
544 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
545 }
546 },
547 InflateState::FixedBlockDistExt(length, base, bits) => {
548 let add = read_bits!(self, csrc, bits) as usize;
549 self.state = InflateState::FixedBlockCopy(length, base + add);
550 },
551 InflateState::FixedBlockCopy(length, dist) => {
552 if self.output_idx + length > dst.len() {
553 let copy_size = dst.len() - self.output_idx;
554 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
555 if ret.is_err() {
556 self.state = InflateState::End;
557 return Err(DecompressError::InvalidData);
558 }
559 self.output_idx += copy_size;
560 self.br = csrc.br;
561 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
562 return Err(DecompressError::OutputFull);
563 }
564 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
565 if ret.is_err() {
566 self.state = InflateState::End;
567 return Err(DecompressError::InvalidData);
568 }
569 self.output_idx += length;
570 self.state = InflateState::FixedBlock;
571 }
572 InflateState::DynBlockHlit => {
573 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
574 if self.hlit >= 287 {
575 self.state = InflateState::End;
576 return Err(DecompressError::InvalidHeader);
577 }
578 self.state = InflateState::DynBlockHdist;
579 }
580 InflateState::DynBlockHdist => {
581 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
582 self.state = InflateState::DynBlockHclen;
583 },
584 InflateState::DynBlockHclen => {
585 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
586 self.cur_len_idx = 0;
587 self.len_lengths = [0; 19];
588 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
589 self.state = InflateState::DynLengths(hclen);
590 },
591 InflateState::DynLengths(len) => {
592 for i in 0..len {
593 if csrc.left() < 3 {
594 self.br = csrc.br;
595 self.state = InflateState::DynLengths(len - i);
596 return Err(DecompressError::ShortData);
597 }
598 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
599 self.cur_len_idx += 1;
600 }
601 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
602 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
603 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
604 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
605 if ret.is_err() {
606 self.state = InflateState::End;
607 return Err(DecompressError::InvalidHeader);
608 }
609 self.dyn_len_cb = Some(ret.unwrap());
610 self.cur_len_idx = 0;
611 self.state = InflateState::DynCodeLengths;
612 },
613 InflateState::DynCodeLengths => {
614 if let Some(ref len_cb) = self.dyn_len_cb {
615 while self.cur_len_idx < self.hlit + self.hdist {
616 let ret = csrc.read_cb(len_cb);
617 let val = match ret {
618 Ok(val) => val,
619 Err(CodebookError::MemoryError) => {
620 self.br = csrc.br;
621 return Err(DecompressError::ShortData);
622 },
623 Err(_) => {
624 self.state = InflateState::End;
625 return Err(DecompressError::InvalidHeader);
626 },
627 };
628 if val < 16 {
629 self.all_lengths[self.cur_len_idx] = val as u8;
630 self.cur_len_idx += 1;
631 } else {
632 let idx = (val as usize) - 16;
633 if idx > 2 {
634 self.state = InflateState::End;
635 return Err(DecompressError::InvalidHeader);
636 }
637 self.state = InflateState::DynCodeLengthsAdd(idx);
638 continue 'main;
639 }
640 }
641 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
642
643 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
6f263099 644 lengths_to_codes(lit_lengths, &mut lit_codes)?;
0443d0c5
KS
645 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
646 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
647 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
648 self.dyn_lit_cb = Some(ret.unwrap());
649
650 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
651 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
652 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
653 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
654 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
655 self.dyn_dist_cb = Some(ret.unwrap());
656
657 self.state = InflateState::DynBlock;
658 } else {
659 unreachable!();
660 }
661 },
662 InflateState::DynCodeLengthsAdd(mode) => {
663 let base = REPEAT_BASE[mode] as usize;
664 let bits = REPEAT_BITS[mode];
665 let len = base + read_bits!(self, csrc, bits) as usize;
666 if self.cur_len_idx + len > self.hlit + self.hdist {
667 self.state = InflateState::End;
668 return Err(DecompressError::InvalidHeader);
669 }
b36f412c
KS
670 let rpt = if mode == 0 {
671 if self.cur_len_idx == 0 {
672 self.state = InflateState::End;
673 return Err(DecompressError::InvalidHeader);
674 }
675 self.all_lengths[self.cur_len_idx - 1]
676 } else {
677 0
678 };
0443d0c5
KS
679 for _ in 0..len {
680 self.all_lengths[self.cur_len_idx] = rpt;
681 self.cur_len_idx += 1;
682 }
683 self.state = InflateState::DynCodeLengths;
684 },
685 InflateState::DynBlock => {
686 if let Some(ref lit_cb) = self.dyn_lit_cb {
687 let val = read_cb!(self, csrc, lit_cb);
688 if val < 256 {
689 if self.output_idx >= dst.len() {
690 self.br = csrc.br;
691 self.state = InflateState::DynBlockLiteral(val as u8);
692 return Err(DecompressError::OutputFull);
693 }
694 self.put_literal(val as u8);
695 dst[self.output_idx] = val as u8;
696 self.output_idx += 1;
697 } else if val == 256 {
698 if self.final_block {
699 self.state = InflateState::End;
700 return Ok(self.output_idx);
701 } else {
702 self.state = InflateState::BlockStart;
703 }
704 } else {
705 let len_idx = (val - 257) as usize;
706 if len_idx >= LENGTH_BASE.len() {
707 self.state = InflateState::End;
708 return Err(DecompressError::InvalidData);
709 }
710 let len_bits = LENGTH_ADD_BITS[len_idx];
711 let add_base = LENGTH_BASE[len_idx] as usize;
712 if len_bits > 0 {
713 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
714 } else {
715 self.state = InflateState::DynBlockDist(add_base);
716 }
717 }
718 } else {
719 unreachable!();
720 }
721 },
722 InflateState::DynBlockLiteral(sym) => {
723 if self.output_idx >= dst.len() {
724 self.br = csrc.br;
725 return Err(DecompressError::OutputFull);
726 }
727 self.put_literal(sym);
728 dst[self.output_idx] = sym;
729 self.output_idx += 1;
730 self.state = InflateState::DynBlock;
731 },
732 InflateState::DynBlockLengthExt(base, bits) => {
733 let add = read_bits!(self, csrc, bits) as usize;
734 self.state = InflateState::DynBlockDist(base + add);
735 },
736 InflateState::DynBlockDist(length) => {
737 if let Some(ref dist_cb) = self.dyn_dist_cb {
738 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
739 if dist_idx >= DIST_BASE.len() {
740 self.state = InflateState::End;
741 return Err(DecompressError::InvalidData);
742 }
743 let dist_bits = DIST_ADD_BITS[dist_idx];
744 let dist_base = DIST_BASE[dist_idx] as usize;
745 if dist_bits == 0 {
746 self.state = InflateState::DynCopy(length, dist_base);
747 } else {
748 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
749 }
750 } else {
751 unreachable!();
752 }
753 },
754 InflateState::DynBlockDistExt(length, base, bits) => {
755 let add = read_bits!(self, csrc, bits) as usize;
756 self.state = InflateState::DynCopy(length, base + add);
757 },
758 InflateState::DynCopy(length, dist) => {
759 if self.output_idx + length > dst.len() {
760 let copy_size = dst.len() - self.output_idx;
761 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
762 if ret.is_err() {
763 self.state = InflateState::End;
764 return Err(DecompressError::InvalidData);
765 }
766 self.output_idx += copy_size;
767 self.br = csrc.br;
768 self.state = InflateState::DynCopy(length - copy_size, dist);
769 return Err(DecompressError::OutputFull);
770 }
771 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
772 if ret.is_err() {
773 self.state = InflateState::End;
774 return Err(DecompressError::InvalidData);
775 }
776 self.output_idx += length;
777 self.state = InflateState::DynBlock;
778 }
779 InflateState::End => {
780 return Ok(0);
781 },
782 }
783 }
784 }
de161d26
KS
785 ///! Resets decoder state.
786 pub fn reset(&mut self) {
787 self.bpos = 0;
788 self.output_idx = 0;
789 self.full_pos = 0;
790 self.state = InflateState::Start;
791 }
792
0443d0c5
KS
793 ///! Decompresses input data into output returning the uncompressed data length.
794 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
7bf82b33
KS
795 let mut csrc = CurrentSource::new(src, BitReaderState::default());
796 if src.len() > 2 {
797 let cm = src[0] & 0xF;
798 let cinfo = src[0] >> 4;
799 let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]);
800 if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 {
801 csrc.skip(16).unwrap();
802 }
803 }
804
805 let mut fix_len_cb = None;
806
807 let mut dst_idx = 0;
808 let mut final_block = false;
809 while !final_block {
810 final_block = csrc.read_bool()?;
811
812 let bmode = csrc.read(2)?;
813 match bmode {
814 0 => {
815 csrc.align();
816 let len = csrc.read(16)? as usize;
817 let inv_len = csrc.read(16)? as usize;
818 if (len ^ inv_len) != 0xFFFF {
819 return Err(DecompressError::InvalidHeader);
820 }
821 let src_pos = csrc.tell();
822 if src_pos + len > src.len() {
823 return Err(DecompressError::ShortData);
824 }
825 if dst_idx + len > dst.len() {
826 return Err(DecompressError::OutputFull);
827 }
828 dst[dst_idx..][..len].copy_from_slice(&src[src_pos..][..len]);
829 dst_idx += len;
830 csrc.skip_bytes(len)?;
831 },
832 1 => {
833 if fix_len_cb.is_none() {
834 let mut cr = FixedLenCodeReader {};
835 fix_len_cb = Some(Codebook::new(&mut cr, CodebookMode::LSB).unwrap());
836 }
837 if let Some(ref len_cb) = &fix_len_cb {
838 loop {
839 let val = csrc.read_cb(len_cb)?;
840 if val < 256 {
841 if dst_idx >= dst.len() {
842 return Err(DecompressError::OutputFull);
843 }
844 dst[dst_idx] = val as u8;
845 dst_idx += 1;
846 } else if val == 256 {
847 break;
848 } else {
849 let len_idx = (val - 257) as usize;
850 if len_idx >= LENGTH_BASE.len() {
851 return Err(DecompressError::InvalidData);
852 }
853 let len_bits = LENGTH_ADD_BITS[len_idx];
854 let mut length = LENGTH_BASE[len_idx] as usize;
855 if len_bits > 0 {
856 length += csrc.read(len_bits)? as usize;
857 }
858 let dist_idx = reverse_bits(csrc.read(5)?, 5) as usize;
859 if dist_idx >= DIST_BASE.len() {
860 return Err(DecompressError::InvalidData);
861 }
862 let dist_bits = DIST_ADD_BITS[dist_idx];
863 let mut dist = DIST_BASE[dist_idx] as usize;
864 if dist_bits > 0 {
865 dist += csrc.read(dist_bits)? as usize;
866 }
867
868 if dst_idx + length > dst.len() {
869 return Err(DecompressError::OutputFull);
870 }
871 if dist > dst_idx {
872 return Err(DecompressError::InvalidData);
873 }
874 lz_copy(dst, dst_idx, dist, length);
875 dst_idx += length;
876 }
877 }
878 } else {
879 unreachable!();
880 }
881 },
882 2 => {
883 let hlit = csrc.read(5)? as usize + 257;
884 if hlit >= 287 {
885 return Err(DecompressError::InvalidHeader);
886 }
887 let hdist = csrc.read(5)? as usize + 1;
888 let hclen = csrc.read(4)? as usize + 4;
889 let mut cur_len_idx = 0;
890 let mut len_lengths = [0; 19];
891 let mut all_lengths = [0; NUM_LITERALS + NUM_DISTS];
892
893 for _ in 0..hclen {
894 len_lengths[LEN_RECODE[cur_len_idx]] = csrc.read(3)? as u8;
895 cur_len_idx += 1;
896 }
897 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
898 lengths_to_codes(&len_lengths, &mut len_codes)?;
899 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
900 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
901 if ret.is_err() {
902 return Err(DecompressError::InvalidHeader);
903 }
904 let dyn_len_cb = ret.unwrap();
905
906 let mut cur_len_idx = 0;
907 while cur_len_idx < hlit + hdist {
908 let val = csrc.read_cb(&dyn_len_cb)?;
909 if val < 16 {
910 all_lengths[cur_len_idx] = val as u8;
911 cur_len_idx += 1;
912 } else {
913 let mode = (val as usize) - 16;
914 if mode > 2 {
915 return Err(DecompressError::InvalidHeader);
916 }
917 let base = REPEAT_BASE[mode] as usize;
918 let bits = REPEAT_BITS[mode];
919 let len = base + (csrc.read(bits)? as usize);
920 if cur_len_idx + len > hlit + hdist {
921 return Err(DecompressError::InvalidHeader);
922 }
923 let rpt = if mode == 0 {
924 if cur_len_idx == 0 {
925 return Err(DecompressError::InvalidHeader);
926 }
927 all_lengths[cur_len_idx - 1]
928 } else {
929 0
930 };
931 for _ in 0..len {
932 all_lengths[cur_len_idx] = rpt;
933 cur_len_idx += 1;
934 }
935 }
936 }
937 let (lit_lengths, dist_lengths) = all_lengths.split_at(hlit);
938
939 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
940 lengths_to_codes(lit_lengths, &mut lit_codes)?;
941 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
942 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
943 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
944 let dyn_lit_cb = ret.unwrap();
945
946 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
947 lengths_to_codes(&dist_lengths[..hdist], &mut dist_codes)?;
948 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
949 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
950 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
951 let dyn_dist_cb = ret.unwrap();
952
953 loop {
954 let val = csrc.read_cb(&dyn_lit_cb)?;
955 if val < 256 {
956 if dst_idx >= dst.len() {
957 return Err(DecompressError::OutputFull);
958 }
959 dst[dst_idx] = val as u8;
960 dst_idx += 1;
961 } else if val == 256 {
962 break;
963 } else {
964 let len_idx = (val - 257) as usize;
965 if len_idx >= LENGTH_BASE.len() {
966 return Err(DecompressError::InvalidData);
967 }
968 let len_bits = LENGTH_ADD_BITS[len_idx];
969 let mut length = LENGTH_BASE[len_idx] as usize;
970 if len_bits > 0 {
971 length += csrc.read(len_bits)? as usize;
972 }
973
974 let dist_idx = csrc.read_cb(&dyn_dist_cb)? as usize;
975 if dist_idx >= DIST_BASE.len() {
976 return Err(DecompressError::InvalidData);
977 }
978 let dist_bits = DIST_ADD_BITS[dist_idx];
979 let mut dist = DIST_BASE[dist_idx] as usize;
980 if dist_bits > 0 {
981 dist += csrc.read(dist_bits)? as usize;
982 }
983
984 if dst_idx + length > dst.len() {
985 return Err(DecompressError::OutputFull);
986 }
987 if dist > dst_idx {
988 return Err(DecompressError::InvalidData);
989 }
990 lz_copy(dst, dst_idx, dist, length);
991 dst_idx += length;
992 }
993 }
994 },
995 _ => return Err(DecompressError::InvalidHeader),
996 };
997 }
998 Ok(dst_idx)
0443d0c5
KS
999 }
1000}
1001
b36f412c
KS
1002impl Default for Inflate {
1003 fn default() -> Self {
1004 Self::new()
1005 }
1006}
1007
0443d0c5
KS
1008fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
1009 let mut bits = [0u32; 32];
1010 let mut pfx = [0u32; 33];
1011 for len in lens.iter() {
1012 let len = *len as usize;
1013 if len >= bits.len() {
1014 return Err(DecompressError::InvalidHeader);
1015 }
1016 bits[len] += 1;
1017 }
1018 bits[0] = 0;
1019 let mut code = 0;
1020 for i in 0..bits.len() {
1021 code = (code + bits[i]) << 1;
1022 pfx[i + 1] = code;
1023 }
1024
1025 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
1026 let len = *len as usize;
1027 if len != 0 {
1028 let bits = len as u8;
1029 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
1030 pfx[len] += 1;
1031 } else {
1032 *codes = ShortCodebookDesc { code: 0, bits: 0 };
1033 }
1034 }
e65c0040 1035
0443d0c5
KS
1036 Ok(())
1037}
1038
1039struct GzipCRC32 {
1040 tab: [u32; 256],
1041 crc: u32,
1042}
1043
1044impl GzipCRC32 {
b36f412c 1045 #[allow(clippy::unreadable_literal)]
0443d0c5
KS
1046 fn new() -> Self {
1047 let mut tab = [0u32; 256];
1048 for i in 0..256 {
1049 let mut c = i as u32;
1050 for _ in 0..8 {
1051 if (c & 1) != 0 {
1052 c = 0xEDB88320 ^ (c >> 1);
1053 } else {
1054 c >>= 1;
1055 }
1056 }
1057 tab[i] = c;
1058 }
1059 Self { tab, crc: 0 }
1060 }
1061 fn update_crc(&mut self, src: &[u8]) {
1062 let mut c = !self.crc;
1063 for el in src.iter() {
1064 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
1065 }
1066 self.crc = !c;
1067 }
1068}
1069
1070///! Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
1071pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
1072 const FLAG_HCRC: u8 = 0x02;
1073 const FLAG_EXTRA: u8 = 0x04;
1074 const FLAG_NAME: u8 = 0x08;
1075 const FLAG_COMMENT: u8 = 0x10;
1076
1077 let id1 = br.read_byte()?;
1078 let id2 = br.read_byte()?;
1079 let cm = br.read_byte()?;
1080 let flg = br.read_byte()?;
1081 let _mtime = br.read_u32le()?;
1082 let _xfl = br.read_byte()?;
1083 let _os = br.read_byte()?;
1084 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
1085 return Err(DecompressError::InvalidHeader);
1086 }
1087
1088 if (flg & FLAG_EXTRA) != 0 {
1089 let xlen = br.read_u16le()? as usize;
1090 br.read_skip(xlen)?;
1091 }
1092 if (flg & FLAG_NAME) != 0 {
1093 loop {
1094 let b = br.read_byte()?;
1095 if b == 0 {
1096 break;
1097 }
1098 }
1099 }
1100 if (flg & FLAG_COMMENT) != 0 {
1101 loop {
1102 let b = br.read_byte()?;
1103 if b == 0 {
1104 break;
1105 }
1106 }
1107 }
1108 let _hcrc = if (flg & FLAG_HCRC) != 0 {
1109 br.read_u16le()?
1110 } else {
1111 0
1112 };
1113 if (flg & 0xE0) != 0 {
1114 return Err(DecompressError::Unsupported);
1115 }
1116
1117 let mut output: Vec<u8> = Vec::new();
1118 let mut tail = [0u8; 8];
1119 let mut inblk = [0u8; 1024];
1120 let mut oblk = [0u8; 4096];
1121 let mut inflate = Inflate::new();
1122 let mut checker = GzipCRC32::new();
1123
1124 loop {
1125 let ret = br.read_buf_some(&mut inblk);
1126 if let Err(ByteIOError::EOF) = ret {
1127 break;
1128 }
1129 let inlen = match ret {
1130 Ok(val) => val,
1131 Err(_) => return Err(DecompressError::IOError),
1132 };
1133 let mut repeat = false;
1134 loop {
1135 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
1136 match ret {
1137 Ok(outlen) => {
1138 checker.update_crc(&oblk[..outlen]);
1139 output.extend_from_slice(&oblk[..outlen]);
1140 break;
1141 },
1142 Err(DecompressError::ShortData) => {
1143 break;
1144 },
1145 Err(DecompressError::OutputFull) => {
1146 repeat = true;
1147 checker.update_crc(&oblk);
1148 output.extend_from_slice(&oblk);
1149 },
1150 Err(err) => {
1151 return Err(err);
1152 },
1153 }
1154 }
1155 // Save last 8 bytes for CRC and size.
1156 if inlen >= 8 {
1157 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
1158 } else {
1159 let shift_len = 8 - inlen;
1160 for i in 0..shift_len {
1161 tail[i] = tail[i + inlen];
1162 }
1163 for i in shift_len..8 {
1164 tail[i] = inblk[i - shift_len];
1165 }
1166 }
1167 }
1168 if !skip_crc {
1169 if !inflate.is_finished() { println!("???"); }
1170 let crc = read_u32le(&tail[0..4])?;
1171 let size = read_u32le(&tail[4..8])?;
1172 if size != (output.len() as u32) {
1173 return Err(DecompressError::CRCError);
1174 }
1175 if crc != checker.crc {
1176 return Err(DecompressError::CRCError);
1177 }
1178 }
1179
1180 Ok(output)
1181}
1182
f8cdb949
KS
1183#[derive(Clone,Copy,Default)]
1184struct Token {
1185 sym: u16,
1186 distsym: u8,
1187 len: u16,
1188 dist: u16,
1189}
1190
1191const TOKEN_EOB: Token = Token { sym: 256, distsym: 0, len: 0, dist: 0 };
1192
1193impl Token {
1194 fn from_literal(sym: u8) -> Self {
1195 Self {
1196 sym: u16::from(sym),
1197 distsym: 0,
1198 dist: 0,
1199 len: 0,
1200 }
1201 }
1202 fn from_match(dist: u16, len: u16) -> Self {
1203 let sym = match len {
1204 3..= 10 => 257 + len - 3,
1205 11..= 18 => 265 + (len - 11) / 2,
1206 19..= 34 => 269 + (len - 19) / 4,
1207 35..= 66 => 273 + (len - 35) / 8,
1208 67..=130 => 277 + (len - 67) / 16,
1209 131..=257 => 281 + (len - 131) / 32,
1210 _ => 285,
1211 };
1212 let distsym = if dist <= 4 {
1213 (dist - 1) as u8
1214 } else {
1215 let bits = 16 - (dist - 1).leading_zeros();
1216 (bits as u8) * 2 - 2 + if ((dist - 1) & (1 << (bits - 2))) != 0 { 1 } else { 0 }
1217 };
1218 Self {
1219 sym, distsym, len, dist
1220 }
1221 }
1222}
1223
1224fn add_codes(lens: &[u8], stats: &mut [u32], toks: &mut Vec<(u8, u8)>) {
1225 let mut last = 42;
1226 let mut lcount = 0;
1227
1228 for &len in lens.iter() {
1229 if len == last {
1230 lcount += 1;
1231 } else {
1232 if last == 0 {
1233 while lcount > 10 {
1234 let run = lcount.min(138);
1235 stats[18] += 1;
1236 toks.push((18, run - 11));
1237 lcount -= run;
1238 }
1239 if lcount >= 3 {
1240 stats[17] += 1;
1241 toks.push((17, lcount - 3));
1242 lcount = 0;
1243 }
1244 for _ in 0..lcount {
1245 stats[0] += 1;
1246 toks.push((0, 0));
1247 }
1248 } else {
1249 while lcount >= 3 {
1250 let run = lcount.min(6);
1251 stats[16] += 1;
1252 toks.push((16, run - 3));
1253 lcount -= run;
1254 }
1255 for _ in 0..lcount {
1256 stats[last as usize] += 1;
1257 toks.push((last, 0));
1258 }
1259 }
1260 stats[len as usize] += 1;
1261 toks.push((len, 0));
1262 last = len;
1263 lcount = 0;
1264 }
1265 }
1266 if lcount > 0 {
1267 if last == 0 {
1268 while lcount > 10 {
1269 let run = lcount.min(138);
1270 stats[18] += 1;
1271 toks.push((18, run - 11));
1272 lcount -= run;
1273 }
1274 if lcount >= 3 {
1275 stats[17] += 1;
1276 toks.push((17, lcount - 3));
1277 lcount = 0;
1278 }
1279 for _ in 0..lcount {
1280 stats[0] += 1;
1281 toks.push((0, 0));
1282 }
1283 } else {
1284 while lcount >= 3 {
1285 let run = lcount.min(6);
1286 stats[16] += 1;
1287 toks.push((16, run - 3));
1288 lcount -= run;
1289 }
1290 for _ in 0..lcount {
1291 stats[last as usize] += 1;
1292 toks.push((last, 0));
1293 }
1294 }
1295 }
1296}
1297
1298///! Deflate stream writer.
1299pub struct DeflateWriter {
1300 dst: Vec<u8>,
1301 bits: u8,
1302 bbuf: u32,
1303}
1304
1305impl DeflateWriter {
1306 ///! Creates a new instance of `DeflateWriter` for a provided output.
1307 pub fn new(dst: Vec<u8>) -> Self {
1308 Self {
1309 dst,
1310 bits: 0,
1311 bbuf: 0,
1312 }
1313 }
1314 fn align(&mut self) {
1315 if (self.bits & 7) != 0 {
1316 self.bits += 8 - (self.bits & 7);
1317 }
1318 }
1319 fn flush(&mut self) {
1320 while self.bits >= 8 {
1321 self.dst.push(self.bbuf as u8);
1322 self.bbuf >>= 8;
1323 self.bits -= 8;
1324 }
1325 }
1326 fn write(&mut self, val: u16, len: u8) {
1327 self.flush();
1328 self.bbuf |= u32::from(val) << self.bits;
1329 self.bits += len;
1330 }
1331 ///! Finishes writing the stream and returns the output vector.
1332 pub fn end(mut self) -> Vec<u8> {
1333 self.flush();
1334 if self.bits > 0 {
1335 self.dst.push(self.bbuf as u8);
1336 }
1337 self.dst
1338 }
1339
1340 fn write_codes(&mut self, codes: &CodeHuff, dists: &DistHuff) {
1341 let mut stats = [0u32; 19];
1342 let mut toks = Vec::with_capacity(NUM_LITERALS + NUM_DISTS);
1343 let mut cw = [0u16; 19];
1344 let mut cl = [0u8; 19];
1345 let mut nc = 0;
1346
1347 add_codes(&codes.lens[..codes.num_codes], &mut stats, &mut toks);
1348 add_codes(&dists.lens[..dists.num_codes], &mut stats, &mut toks);
1349
1350 gen_tree(&mut cw, &mut cl, &mut nc, &mut stats, 7);
1351
1352 nc = cw.len();
1353 for &idx in LEN_RECODE.iter().rev() {
1354 if cl[idx] == 0 {
1355 nc -= 1;
1356 } else {
1357 break;
1358 }
1359 }
1360 if nc < 4 {
1361 nc = 4;
1362 }
1363 self.write((nc - 4) as u16, 4);
1364 for &idx in LEN_RECODE.iter().take(nc) {
1365 self.write(u16::from(cl[idx]), 3);
1366 }
1367 for &(sym, add) in toks.iter() {
1368 self.write(cw[sym as usize], cl[sym as usize]);
1369 match sym {
1370 16 => self.write(u16::from(add), 2),
1371 17 => self.write(u16::from(add), 3),
1372 18 => self.write(u16::from(add), 7),
1373 _ => {},
1374 };
1375 }
1376 }
1377 fn write_tokens(&mut self, src: &[Token], codes: &CodeHuff, dists: &DistHuff) {
1378 for &tok in src.iter() {
1379 self.write(codes.codes[tok.sym as usize], codes.lens[tok.sym as usize]);
1380 if tok.sym > 256 {
1381 self.write_len_bits(tok.len);
1382 self.write(dists.codes[tok.distsym as usize], dists.lens[tok.distsym as usize]);
1383 self.write_dist_bits(tok.dist);
1384 }
1385 }
1386 }
1387 fn write_len_bits(&mut self, len: u16) {
1388 let llen = len - 3;
1389 if llen >= 8 && llen < 255 {
1390 let bits = (16 - llen.leading_zeros() - 3) as u8;
1391 self.write(llen & ((1 << bits) - 1), bits);
1392 }
1393 }
1394 fn write_dist_bits(&mut self, dist: u16) {
1395 let ddist = dist - 1;
1396 if dist >= 4 {
1397 let bits = (16 - ddist.leading_zeros() - 2) as u8;
1398 self.write(ddist & ((1 << bits) - 1), bits);
1399 }
1400 }
1401}
1402
1403struct CodeHuff {
1404 is_fixed: bool,
1405 stats: [u32; NUM_LITERALS],
1406 codes: [u16; NUM_LITERALS],
1407 lens: [u8; NUM_LITERALS],
1408 num_codes: usize,
1409}
1410
1411impl CodeHuff {
1412 fn new(is_fixed: bool) -> Self {
1413 Self {
1414 is_fixed,
1415 stats: [0; NUM_LITERALS],
1416 codes: [0; NUM_LITERALS],
1417 lens: [0; NUM_LITERALS],
1418 num_codes: NUM_LITERALS,
1419 }
1420 }
1421 fn make_codes(&mut self, src: &[Token]) {
1422 if self.is_fixed {
1423 for i in 0..=143 {
1424 self.codes[i] = reverse_bits((i + 0x30) as u32, 8) as u16;
1425 self.lens[i] = 8;
1426 }
1427 for i in 144..=255 {
1428 self.codes[i] = reverse_bits((i + 0x100) as u32, 9) as u16;
1429 self.lens[i] = 9;
1430 }
1431 for i in 256..=279 {
1432 self.codes[i] = reverse_bits((i & 0x1F) as u32, 7) as u16;
1433 self.lens[i] = 7;
1434 }
1435 for i in 280..NUM_LITERALS {
1436 self.codes[i] = reverse_bits((i - 280 + 0xC0) as u32, 8) as u16;
1437 self.lens[i] = 8;
1438 }
1439 } else {
1440 for &tok in src.iter() {
1441 self.stats[tok.sym as usize] += 1;
1442 }
1443 gen_tree(&mut self.codes, &mut self.lens, &mut self.num_codes, &mut self.stats, 15);
1444 if self.num_codes < 257 {
1445 self.num_codes = 257;
1446 }
1447 }
1448 }
1449}
1450
1451struct DistHuff {
1452 is_fixed: bool,
1453 stats: [u32; NUM_DISTS],
1454 codes: [u16; NUM_DISTS],
1455 lens: [u8; NUM_DISTS],
1456 num_codes: usize,
1457}
1458
1459impl DistHuff {
1460 fn new(is_fixed: bool) -> Self {
1461 Self {
1462 is_fixed,
1463 stats: [0; NUM_DISTS],
1464 codes: [0; NUM_DISTS],
1465 lens: [0; NUM_DISTS],
1466 num_codes: NUM_DISTS,
1467 }
1468 }
1469 fn make_codes(&mut self, src: &[Token]) {
1470 if self.is_fixed {
1471 for i in 0..NUM_DISTS {
1472 self.codes[i] = reverse_bits(i as u32, 5) as u16;
1473 self.lens[i] = 5;
1474 }
1475 } else {
1476 for &tok in src.iter() {
1477 if tok.sym > 256 {
1478 self.stats[tok.distsym as usize] += 1;
1479 }
1480 }
1481 gen_tree(&mut self.codes, &mut self.lens, &mut self.num_codes, &mut self.stats, 15);
1482 if self.num_codes < 1 {
1483 self.num_codes = 1;
1484 }
1485 }
1486 }
1487}
1488
1489#[derive(Clone,Copy,Default)]
1490struct Node {
1491 sym: u16,
1492 w: u16,
1493 idx0: u16,
1494 idx1: u16,
1495}
1496
1497const NODE_SYM: u16 = 65500;
1498
1499struct Tree {
1500 nodes: [Node; NUM_LITERALS * 2],
1501 nnodes: usize,
1502}
1503
1504impl Tree {
1505 fn new() -> Self {
1506 Self {
1507 nodes: [Node::default(); NUM_LITERALS * 2],
1508 nnodes: 0,
1509 }
1510 }
1511 fn insert(&mut self, val: Node) {
1512 let mut idx = self.nnodes;
1513 for (i, nn) in self.nodes[..self.nnodes].iter().enumerate() {
1514 if nn.w > val.w {
1515 idx = i;
1516 break;
1517 }
1518 }
1519 if idx < self.nnodes {
1520 for i in (idx..self.nnodes).rev() {
1521 self.nodes[i + 1] = self.nodes[i];
1522 }
1523 }
1524 self.nodes[idx] = val;
1525 self.nnodes += 1;
1526 }
1527 fn trim(&mut self) {
1528 let mut start = self.nnodes;
1529 for (i, n) in self.nodes[..self.nnodes].iter().enumerate() {
1530 if n.w != 0 {
1531 start = i;
1532 break;
1533 }
1534 }
1535 if start != 0 {
1536 for i in 0..self.nnodes - start {
1537 self.nodes[i] = self.nodes[i + start];
1538 }
1539 self.nnodes -= start;
1540 }
1541 }
1542 fn build(&mut self) {
1543 if self.nnodes == 1 {
1544 self.nodes[0].w = 1;
1545 return;
1546 }
1547 let mut start = 0;
1548 while start + 1 < self.nnodes {
1549 let nn1 = self.nodes[start];
1550 let nn2 = self.nodes[start + 1];
1551 let n = Node {
1552 sym: NODE_SYM,
1553 w: nn1.w + nn2.w,
1554 idx0: start as u16,
1555 idx1: (start + 1) as u16,
1556 };
1557 self.nodes[start].w = 0;
1558 self.nodes[start + 1].w = 0;
1559 start += 2;
1560 self.insert(n);
1561 }
1562 if self.nnodes > 1 {
1563 self.assign_len(self.nnodes - 1, 0);
1564 }
1565 }
1566 fn assign_len(&mut self, idx: usize, len: u16) {
1567 if self.nodes[idx].sym == NODE_SYM {
1568 self.assign_len(self.nodes[idx].idx0 as usize, len + 1);
1569 self.assign_len(self.nodes[idx].idx1 as usize, len + 1);
1570 } else {
1571 self.nodes[idx].w = len;
1572 }
1573 }
1574}
1575
1576fn gen_tree(codes: &mut [u16], lens: &mut [u8], num_codes: &mut usize, stats: &mut [u32], max_bits: u8) {
1577 let mut tot_w = 0;
1578 for &w in stats.iter() {
1579 tot_w += w;
1580 }
1581 if tot_w == 0 {
1582 codes[0] = 0;
1583 lens[0] = 0;
1584 *num_codes = 0;
1585 return;
1586 }
1587 while tot_w > (1 << max_bits) {
1588 for w in stats.iter_mut() {
1589 *w = (*w + 1) >> 1;
1590 }
1591 tot_w = 0;
1592 for &w in stats.iter() {
1593 tot_w += w;
1594 }
1595 }
1596 let mut tree = Tree::new();
1597 for (sym, &w) in stats.iter().enumerate() {
1598 tree.insert(Node{ sym: sym as u16, w: w as u16, idx0: 64000, idx1: 64000 });
1599 }
1600 tree.trim();
1601 tree.build();
1602
1603 for n in tree.nodes[..tree.nnodes].iter() {
1604 if n.sym != NODE_SYM {
1605 lens[n.sym as usize] = n.w as u8;
1606 }
1607 }
1608 lengths_to_codes16(lens, codes);
1609 let mut sz = codes.len();
1610 for &len in lens.iter().rev() {
1611 if len != 0 {
1612 break;
1613 }
1614 sz -= 1;
1615 }
1616 *num_codes = sz;
1617}
1618
1619fn lengths_to_codes16(lens: &[u8], codes: &mut [u16]) {
1620 let mut bits = [0u32; 32];
1621 let mut pfx = [0u32; 33];
1622 for len in lens.iter() {
1623 let len = *len as usize;
1624 bits[len] += 1;
1625 }
1626 bits[0] = 0;
1627 let mut code = 0;
1628 for i in 0..bits.len() {
1629 code = (code + bits[i]) << 1;
1630 pfx[i + 1] = code;
1631 }
1632
1633 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
1634 let len = *len as usize;
1635 if len != 0 {
1636 let bits = len as u8;
1637 *codes = reverse_bits(pfx[len], bits) as u16;
1638 pfx[len] += 1;
1639 } else {
1640 *codes = 0;
1641 }
1642 }
1643}
1644
1645trait LZParse {
1646 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>);
1647}
1648
1649struct NoParser {}
1650impl LZParse for NoParser {
1651 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1652 dst.reserve(src.len());
1653 for &b in src.iter() {
1654 dst.push(Token::from_literal(b));
1655 }
1656 dst.push(TOKEN_EOB);
1657 }
1658}
1659
1660fn check_match(src1: &[u8], src2: &[u8]) -> u16 {
1661 let mut len = 0;
1662 for (&a, &b) in src1.iter().zip(src2.iter()) {
1663 if a != b {
1664 break;
1665 }
1666 len += 1;
1667 }
1668 len
1669}
1670
1671const HASH_SIZE: usize = 4096;
1672const MAX_MATCH_LEN: usize = 258;
1673const WINDOW_SIZE: usize = 32768 - MAX_MATCH_LEN;
1674const NONEXT: usize = WINDOW_SIZE * 2;
1675
1676struct MatchFinder<'a> {
1677 src: &'a [u8],
1678 pos: usize,
1679 hstart: [usize; HASH_SIZE],
1680 hend: [usize; HASH_SIZE],
1681 hnext: [usize; WINDOW_SIZE * 3],
1682}
1683
1684impl<'a> MatchFinder<'a> {
1685 fn new(src: &'a [u8]) -> Self {
1686 let mut obj = Self {
1687 src,
1688 pos: 0,
1689 hstart: [0; HASH_SIZE],
1690 hend: [0; HASH_SIZE],
1691 hnext: [0; WINDOW_SIZE * 3],
1692 };
1693 obj.build_hash();
1694 obj
1695 }
1696 fn hash(src: &[u8]) -> usize {
1697 let _ = src[2];
1698 (((u16::from(src[0]) << 10) ^ (u16::from(src[1]) << 5) ^ u16::from(src[2])) & ((HASH_SIZE as u16) - 1)) as usize
1699 }
1700 fn build_hash(&mut self) {
1701 for el in self.hstart.iter_mut() { *el = NONEXT; }
1702 for el in self.hend.iter_mut() { *el = NONEXT; }
1703 for el in self.hnext.iter_mut() { *el = NONEXT; }
1704 if self.pos + 3 >= self.src.len() {
1705 return;
1706 }
1707 let end = (self.src.len() - 3).min(self.pos + NONEXT);
1708 for i in (self.pos .. end).rev() {
1709 let key = Self::hash(&self.src[i..]);
1710 if self.hstart[key] == NONEXT {
1711 self.hstart[key] = i;
1712 self.hend[key] = i;
1713 self.hnext[key] = NONEXT;
1714 } else {
1715 self.hnext[self.hend[key]] = i;
1716 self.hend[key] = i;
1717 }
1718 }
1719 }
1720 fn find_match(&mut self) -> (u16, u16) {
1721 if self.pos == 0 || self.pos + 3 > self.src.len() {
1722 return (0, 0);
1723 }
1724 let key = Self::hash(&self.src[self.pos..]) as usize;
1725
1726 let mut best_pos = 0;
1727 let mut best_len = 0;
1728 let mut idx = self.hstart[key];
1729 while idx != NONEXT && idx + WINDOW_SIZE > self.pos {
1730 if idx < self.pos {
1731 let cur_len = check_match(&self.src[self.pos..], &self.src[idx..]);
1732 if cur_len > best_len {
1733 best_len = cur_len;
1734 best_pos = self.pos - idx;
1735 if best_len >= (MAX_MATCH_LEN as u16) {
1736 return (best_pos as u16, MAX_MATCH_LEN as u16);
1737 }
1738 }
1739 }
1740 idx = self.hnext[idx];
1741 }
1742 (best_pos as u16, best_len)
1743 }
1744 fn find_all_matches(&mut self, dists: &mut [u16; MAX_MATCH_LEN + 1]) {
1745 if self.pos == 0 || self.pos + 3 > self.src.len() {
1746 return;
1747 }
1748 let key = Self::hash(&self.src[self.pos..]) as usize;
1749 let mut idx = self.hstart[key];
1750 while idx != NONEXT && idx + WINDOW_SIZE > self.pos {
1751 if idx < self.pos {
1752 let cur_len = (check_match(&self.src[self.pos..], &self.src[idx..]) as usize).min(MAX_MATCH_LEN);
1753 if cur_len > 0 && dists[cur_len] == 0 {
1754 dists[cur_len] = (self.pos - idx) as u16;
1755 }
1756 }
1757 idx = self.hnext[idx];
1758 }
1759 }
1760 fn advance(&mut self, num: usize) {
1761 self.pos += num;
1762
1763 if self.pos >= NONEXT {
1764 let (_, tail) = self.src.split_at(self.pos - WINDOW_SIZE);
1765 self.src = tail;
1766 self.pos = WINDOW_SIZE;
1767 self.build_hash();
1768 } else {
1769 for (start, end) in self.hstart.iter_mut().zip(self.hend.iter_mut()) {
1770 let mut idx = *start;
1771 while idx != NONEXT && idx + WINDOW_SIZE < self.pos {
1772 idx = self.hnext[idx];
1773 *start = idx;
1774 }
1775 if idx == NONEXT {
1776 *end = NONEXT;
1777 }
1778 }
1779 }
1780 }
1781 fn get_sym(&self) -> u8 { self.src[self.pos] }
1782 fn is_end(&self) -> bool { self.pos >= self.src.len() }
1783}
1784
1785struct GreedyParser {}
1786impl LZParse for GreedyParser {
1787 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1788 dst.reserve(src.len());
1789
1790 let mut matcher = MatchFinder::new(src);
1791 while !matcher.is_end() {
1792 let (best_pos, best_len) = matcher.find_match();
1793
1794 if best_len >= 3 {
1795 dst.push(Token::from_match(best_pos, best_len));
1796 matcher.advance(best_len as usize);
1797 } else {
1798 dst.push(Token::from_literal(matcher.get_sym()));
1799 matcher.advance(1);
1800 }
1801 }
1802 dst.push(TOKEN_EOB);
1803 }
1804}
1805
1806struct LazyParser {}
1807impl LZParse for LazyParser {
1808 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1809 dst.reserve(src.len());
1810
1811 let mut matcher = MatchFinder::new(src);
1812 while !matcher.is_end() {
1813 let (best_pos, best_len) = matcher.find_match();
1814 if best_len >= 3 {
1815 let last_sym = matcher.get_sym();
1816 matcher.advance(1);
1817 if !matcher.is_end() {
1818 let (best_pos1, best_len1) = matcher.find_match();
1819 if best_len1 > best_len + 1 {
1820 dst.push(Token::from_literal(last_sym));
1821 dst.push(Token::from_match(best_pos1, best_len1));
1822 matcher.advance(best_len1 as usize);
1823 continue;
1824 }
1825 }
1826 dst.push(Token::from_match(best_pos, best_len));
1827 matcher.advance((best_len - 1) as usize);
1828 } else {
1829 dst.push(Token::from_literal(matcher.get_sym()));
1830 matcher.advance(1);
1831 }
1832 }
1833 dst.push(TOKEN_EOB);
1834 }
1835}
1836
1837#[derive(Clone,Copy)]
1838struct TNode {
1839 price: u32,
1840 dist: u16,
1841 link: usize,
1842}
1843
1844impl Default for TNode {
1845 fn default() -> Self {
1846 Self {
1847 price: std::u32::MAX,
1848 dist: 0,
1849 link: 0,
1850 }
1851 }
1852}
1853
1854struct OptimalParser {
1855 trellis: Vec<TNode>,
1856}
1857impl OptimalParser {
1858 fn new() -> Self { Self::default() }
1859 fn sym_price(_sym: u8) -> u32 { 9 }
1860 fn match_price(dist: u16, _len: u16) -> u32 {
1861 if dist <= 4 {
1862 9 + 5
1863 } else {
1864 let bits = 16 - (dist - 1).leading_zeros();
1865 9 + 3 + bits
1866 }
1867 }
1868}
1869impl Default for OptimalParser {
1870 fn default() -> Self {
1871 Self {
1872 trellis: Vec::with_capacity(WINDOW_SIZE),
1873 }
1874 }
1875}
1876impl LZParse for OptimalParser {
1877 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1878 if src.is_empty() {
1879 dst.push(TOKEN_EOB);
1880 return;
1881 }
1882 dst.reserve(src.len());
1883
b191eef3 1884 self.trellis.clear();
f8cdb949
KS
1885 self.trellis.reserve(src.len() + 1);
1886 for _ in 0..=src.len() {
1887 self.trellis.push(TNode::default());
1888 }
1889 self.trellis[0].price = 0;
1890
1891 let mut matcher = MatchFinder::new(src);
1892 for i in 0..self.trellis.len() - 1 {
1893 let mut dists = [0; MAX_MATCH_LEN + 1];
1894 matcher.find_all_matches(&mut dists);
1895
1896 let sym = matcher.get_sym();
1897 let lprice = Self::sym_price(sym) + self.trellis[i].price;
1898 if self.trellis[i + 1].price > lprice {
1899 self.trellis[i + 1].price = lprice;
1900 self.trellis[i + 1].link = i;
1901 }
1902 for (len, &dist) in dists.iter().enumerate() {
1903 if dist != 0 {
1904 let mprice = Self::match_price(dist, len as u16) + self.trellis[i].price;
1905 if self.trellis[i + len].price > mprice {
1906 self.trellis[i + len].price = mprice;
1907 self.trellis[i + len].link = i;
1908 self.trellis[i].dist = dist;
1909 }
1910 }
1911 }
1912
1913 matcher.advance(1);
1914 }
1915 let mut idx = self.trellis.len() - 1;
1916 let mut nidx = self.trellis[idx].link;
1917 while idx > 0 {
1918 let oidx = idx;
1919 idx = nidx;
1920 nidx = self.trellis[idx].link;
1921 self.trellis[idx].link = oidx;
1922 }
1923
1924 let mut idx = 0;
1925 while idx < self.trellis.len() - 1 {
1926 let len = self.trellis[idx].link - idx;
1927 if len == 1 {
1928 dst.push(Token::from_literal(src[idx]));
1929 } else {
1930 dst.push(Token::from_match(self.trellis[idx].dist, len as u16));
1931 }
1932 idx = self.trellis[idx].link;
1933 }
1934
1935 dst.push(TOKEN_EOB);
1936 }
1937}
1938
1939///! Deflate compression mode.
1940#[derive(Clone,Copy,Debug,PartialEq)]
1941pub enum DeflateMode {
1942 ///! No compression.
1943 NoCompr,
1944 ///! Fast compression.
1945 Fast,
1946 ///! Still fast but better compression.
1947 Better,
1948 ///! Slow but the best compression.
1949 Best,
1950}
1951
8570a0b3
KS
1952impl Default for DeflateMode {
1953 fn default() -> Self { DeflateMode::Better }
1954}
1955
1956pub const DEFLATE_MODE_DESCRIPTION: &str = "Deflate compression level.";
1957///! Deflate option for no compression.
1958pub const DEFLATE_MODE_NONE: &str = "none";
1959///! Deflate option for fast compression.
1960pub const DEFLATE_MODE_FAST: &str = "fast";
1961///! Deflate option for better compression.
1962pub const DEFLATE_MODE_BETTER: &str = "better";
1963///! Deflate option for best compression.
1964pub const DEFLATE_MODE_BEST: &str = "best";
1965
1966///! All possible option values for deflate compression.
1967pub const DEFLATE_OPTION_VALUES: NAOptionDefinitionType = NAOptionDefinitionType::String(Some(&[DEFLATE_MODE_NONE, DEFLATE_MODE_FAST, DEFLATE_MODE_BETTER, DEFLATE_MODE_BEST]));
1968
1969impl std::str::FromStr for DeflateMode {
1970 type Err = ();
1971
1972 fn from_str(s: &str) -> Result<Self, Self::Err> {
1973 match s {
1974 DEFLATE_MODE_NONE => Ok(DeflateMode::NoCompr),
1975 DEFLATE_MODE_FAST => Ok(DeflateMode::Fast),
1976 DEFLATE_MODE_BETTER => Ok(DeflateMode::Better),
1977 DEFLATE_MODE_BEST => Ok(DeflateMode::Best),
1978 _ => Err(()),
1979 }
1980 }
1981}
1982
1983impl ToString for DeflateMode {
1984 fn to_string(&self) -> String {
1985 match *self {
1986 DeflateMode::NoCompr => DEFLATE_MODE_NONE.to_string(),
1987 DeflateMode::Fast => DEFLATE_MODE_FAST.to_string(),
1988 DeflateMode::Better => DEFLATE_MODE_BETTER.to_string(),
1989 DeflateMode::Best => DEFLATE_MODE_BEST.to_string(),
1990 }
1991 }
1992}
1993
f8cdb949
KS
1994#[derive(Clone,Copy,Debug,PartialEq)]
1995enum Mode {
1996 Copy,
1997 Fixed,
1998 Dynamic,
1999}
2000
2001const MAX_BLOCK_SIZE: usize = 65535;
2002
2003///! Deflate stream compressor.
2004pub struct Deflate {
2005 mode: Mode,
2006 tokens: Vec<Token>,
2007 srcbuf: [u8; MAX_BLOCK_SIZE],
2008 ssize: usize,
2009 sum1: u32,
2010 sum2: u32,
2011 zlib_mode: bool,
9f838842 2012 parser: Box<dyn LZParse + Send>,
f8cdb949
KS
2013}
2014
2015impl Deflate {
2016 ///! Creates a new instance of `Deflate`.
2017 pub fn new(mode: DeflateMode) -> Self {
2018 let (mode, parser) = match mode {
9f838842
KS
2019 DeflateMode::NoCompr => (Mode::Copy, Box::new(NoParser{}) as Box<dyn LZParse + Send>),
2020 DeflateMode::Fast => (Mode::Fixed, Box::new(GreedyParser{}) as Box<dyn LZParse + Send>),
2021 DeflateMode::Better => (Mode::Dynamic, Box::new(LazyParser{}) as Box<dyn LZParse + Send>),
2022 DeflateMode::Best => (Mode::Dynamic, Box::new(OptimalParser::new()) as Box<dyn LZParse + Send>),
f8cdb949
KS
2023 };
2024 Self {
2025 mode, parser,
2026 tokens: Vec::with_capacity(MAX_BLOCK_SIZE),
2027 srcbuf: [0; MAX_BLOCK_SIZE],
2028 ssize: 0,
2029 sum1: 1,
2030 sum2: 0,
2031 zlib_mode: false,
2032 }
2033 }
2034 ///! Writes zlib stream header.
2035 pub fn write_zlib_header(&mut self, wr: &mut DeflateWriter) {
2036 wr.write(8, 4);
2037 wr.write(7, 4);
2038 let level = match self.mode {
2039 Mode::Copy => 0x01,
2040 Mode::Fixed => 0x5E,
2041 Mode::Dynamic => 0x9C,
2042 // 0xDA for the strongest one
2043 };
2044 wr.write(level, 8);
2045 self.zlib_mode = true;
2046 }
2047 fn write_zlib_footer(&self, wr: &mut DeflateWriter) {
2048 wr.align();
2049 wr.write((self.sum2 >> 8) as u16, 8);
2050 wr.write((self.sum2 & 0xFF) as u16, 8);
2051 wr.write((self.sum1 >> 8) as u16, 8);
2052 wr.write((self.sum1 & 0xFF) as u16, 8);
2053 }
2054 ///! Queues data for compression.
2055 ///!
2056 ///! The data might be not actually compressed until [`compress_end`] is called.
2057 ///!
2058 ///! [`compress_end`]: ./struct.Deflate.html#method.compress_end
2059 pub fn compress(&mut self, src: &[u8], wr: &mut DeflateWriter) {
2060 let mut src = src;
2061 while !src.is_empty() {
2062 let clen = src.len().min(MAX_BLOCK_SIZE - self.ssize);
2063 let (head, tail) = src.split_at(clen);
2064 src = tail;
2065 self.srcbuf[self.ssize..][..clen].copy_from_slice(head);
2066 self.ssize += clen;
2067 if self.ssize == MAX_BLOCK_SIZE {
2068 self.do_block(wr, false);
2069 }
2070 }
2071 }
2072 ///! Tells the encoder to finish data compression.
2073 ///!
2074 ///! Complete data will be output after this call.
2075 pub fn compress_end(&mut self, wr: &mut DeflateWriter) {
2076 if self.ssize > 0 {
2077 self.do_block(wr, true);
2078 } else {
2079 wr.write(1, 1);
2080 wr.write(1, 2);
2081 wr.write(0, 7); //static EOF sym
2082 }
2083 if self.zlib_mode {
2084 self.write_zlib_footer(wr);
2085 }
2086 }
80da4ff8
KS
2087 ///! Tells the encoder to compress the data it received and flush it.
2088 pub fn compress_flush(&mut self, wr: &mut DeflateWriter) {
2089 if self.ssize > 0 {
2090 self.do_block(wr, false);
2091 }
2092 if (wr.bits & 7) != 0 {
2093 // write zero-length copy block for byte-alignment
2094 wr.write(0, 1);
2095 wr.write(0, 2);
2096 wr.align();
2097 wr.write(0, 16);
2098 wr.write(0xFFFF, 16);
2099 }
2100 }
f8cdb949
KS
2101 fn do_block(&mut self, wr: &mut DeflateWriter, final_block: bool) {
2102 const CRC_BASE: u32 = 65521;
2103 for &b in self.srcbuf[..self.ssize].iter() {
2104 self.sum1 += u32::from(b);
2105 if self.sum1 >= CRC_BASE {
2106 self.sum1 -= CRC_BASE;
2107 }
2108 self.sum2 += self.sum1;
2109 if self.sum2 >= CRC_BASE {
2110 self.sum2 -= CRC_BASE;
2111 }
2112 }
2113 match self.mode {
2114 Mode::Copy => {
2115 wr.write(final_block as u16, 1);
2116 wr.write(0, 2);
2117 wr.align();
2118 wr.write(self.ssize as u16, 16);
2119 wr.write(!self.ssize as u16, 16);
2120 for &b in self.srcbuf[..self.ssize].iter() {
2121 wr.write(u16::from(b), 8);
2122 }
2123 },
2124 Mode::Fixed => {
2125 wr.write(final_block as u16, 1);
2126 wr.write(1, 2);
b191eef3 2127 self.tokens.clear();
f8cdb949
KS
2128 self.parser.parse(&self.srcbuf[..self.ssize], &mut self.tokens);
2129 let mut codes = CodeHuff::new(true);
2130 codes.make_codes(&self.tokens);
2131 let mut dists = DistHuff::new(true);
2132 dists.make_codes(&self.tokens);
2133 wr.write_tokens(&self.tokens, &codes, &dists);
2134 },
2135 Mode::Dynamic => {
2136 wr.write(final_block as u16, 1);
2137 wr.write(2, 2);
b191eef3 2138 self.tokens.clear();
f8cdb949
KS
2139 self.parser.parse(&self.srcbuf[..self.ssize], &mut self.tokens);
2140 let mut codes = CodeHuff::new(false);
2141 codes.make_codes(&self.tokens);
2142 let mut dists = DistHuff::new(false);
2143 dists.make_codes(&self.tokens);
2144 wr.write((codes.num_codes - 257) as u16, 5);
2145 wr.write((dists.num_codes - 1) as u16, 5);
2146 wr.write_codes(&codes, &dists);
2147 wr.write_tokens(&self.tokens, &codes, &dists);
2148 },
2149 }
2150 self.ssize = 0;
2151 }
2152}
2153
0443d0c5
KS
2154#[cfg(test)]
2155mod test {
2156 use super::*;
2157
2158 #[test]
2159 fn test_inflate1() {
2160 const TEST_DATA: &[u8] = &[
2161 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
2162 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
2163 const TEST_REF: &[u8] = b"Hello, world!";
2164 let mut dst_buf = [0u8; 13];
2165 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
2166 assert_eq!(len, 13);
2167 for i in 0..len {
2168 assert_eq!(dst_buf[i], TEST_REF[i]);
2169 }
2170 }
2171 #[test]
2172 fn test_inflate2() {
2173 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
2174 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
2175 let mut dst_buf = [0u8; 39];
2176
2177 let mut inflate = Inflate::new();
2178 let mut output_chunk = [0u8; 7];
2179 let mut output_pos = 0;
2180 for input in TEST_DATA3.chunks(3) {
2181 let mut repeat = false;
2182 loop {
2183 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
2184 match ret {
2185 Ok(len) => {
2186 for i in 0..len {
2187 dst_buf[output_pos + i] = output_chunk[i];
2188 }
2189 output_pos += len;
2190 break;
2191 },
2192 Err(DecompressError::ShortData) => {
2193 break;
2194 },
2195 Err(DecompressError::OutputFull) => {
2196 repeat = true;
2197 for i in 0..output_chunk.len() {
2198 dst_buf[output_pos + i] = output_chunk[i];
2199 }
2200 output_pos += output_chunk.len();
2201 },
2202 _ => {
2203 panic!("decompress error {:?}", ret.err().unwrap());
2204 },
2205 }
2206 }
2207 }
2208
2209 assert_eq!(output_pos, dst_buf.len());
2210 for i in 0..output_pos {
2211 assert_eq!(dst_buf[i], TEST_REF3[i]);
2212 }
2213 }
2214 #[test]
2215 fn test_inflate3() {
2216 const TEST_DATA: &[u8] = &[
2217 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
2218 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
2219 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
2220 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
2221 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
2222 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
2223 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
2224 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
2225 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
2226 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
2227 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
2228 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
2229 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
2230 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
2231 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
2232 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
2233 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
2234 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
2235 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
2236 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
2237 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
2238 ];
2239
2240 let mut mr = MemoryReader::new_read(TEST_DATA);
2241 let mut br = ByteReader::new(&mut mr);
2242 let _dst_buf = gzip_decode(&mut br, false).unwrap();
2243
2244// println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
2245 }
f8cdb949
KS
2246 #[test]
2247 fn test_deflate_crc() {
2248 let output = Vec::with_capacity(20);
2249 let mut writer = DeflateWriter::new(output);
2250 let mut compr = Deflate::new(DeflateMode::NoCompr);
2251 compr.write_zlib_header(&mut writer);
2252 compr.compress(b"Hello, world!", &mut writer);
2253 compr.compress_end(&mut writer);
2254 let output = writer.end();
2255 assert_eq!(output.as_slice(), b"\x78\x01\x01\x0D\x00\xF2\xFFHello, world!\x20\x5E\x04\x8A");
2256 }
2257 fn deflate_test(mode: DeflateMode) {
2258 const SRC: &[u8] =
2259b"The first day of Christmas,
2260My true love sent to me
2261A partridge in a pear tree.
2262
2263The second day of Christmas,
2264My true love sent to me
2265Two turtle doves, and
2266A partridge in a pear tree.
2267
2268The third day of Christmas,
2269My true love sent to me
2270Three French hens,
2271Two turtle doves, and
2272A partridge in a pear tree.
2273
2274The fourth day of Christmas,
2275My true love sent to me
2276Four colly birds,
2277Three French hens,
2278Two turtle doves, and
2279A partridge in a pear tree.
2280
2281The fifth day of Christmas,
2282My true love sent to me
2283Five gold rings,
2284Four colly birds,
2285Three French hens,
2286Two turtle doves, and
2287A partridge in a pear tree.";
2288 let output = Vec::with_capacity(SRC.len() + 16);
2289 let mut writer = DeflateWriter::new(output);
2290 let mut compr = Deflate::new(mode);
2291 compr.write_zlib_header(&mut writer);
2292 compr.compress(SRC, &mut writer);
2293 compr.compress_end(&mut writer);
2294 let output = writer.end();
2295 let mut uncompr = vec![0u8; SRC.len()];
2296 Inflate::uncompress(&output, &mut uncompr).unwrap();
2297 assert_eq!(SRC, uncompr.as_slice());
2298 }
2299 #[test]
2300 fn test_deflate_fast() {
2301 deflate_test(DeflateMode::Fast);
2302 }
2303 #[test]
2304 fn test_deflate_better() {
2305 deflate_test(DeflateMode::Better);
2306 }
2307 #[test]
2308 fn test_deflate_best() {
2309 deflate_test(DeflateMode::Best);
2310 }
0443d0c5 2311}