]> git.nihav.org Git - nihav.git/blame - nihav-core/src/compr/deflate.rs
avimux: do not record palette change chunks in OpenDML index
[nihav.git] / nihav-core / src / compr / deflate.rs
CommitLineData
0443d0c5
KS
1//! Deflate format (RFC 1951) support.
2//!
f8cdb949 3//! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`] and compressing raw or zlib streams via [`Deflate`].
0443d0c5 4//!
f8cdb949 5//! [`Deflate`]: ./struct.Deflate.html
0443d0c5
KS
6//! [`Inflate`]: ./struct.Inflate.html
7//! [`gzip_decode`]: ./fn.gzip_decode.html
8//!
9//! # Examples
10//!
11//! Decompressing full input buffer into sufficiently large output buffer:
12//! ```
13//! # use nihav_core::compr::DecompressError;
14//! use nihav_core::compr::deflate::Inflate;
15//!
16//! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
17//! # let mut output_buffer = [0u8; 16];
18//! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
19//! # Ok(())
20//! # }
21//! ```
22//!
23//! Decompressing input chunks into portions of output:
24//! ```
25//! use nihav_core::compr::DecompressError;
26//! use nihav_core::compr::deflate::Inflate;
27//!
28//! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
29//! let mut inflate = Inflate::new();
30//! let mut dst_buf: Vec<u8> = Vec::new();
31//! let mut output_chunk = [0u8; 1024];
32//! for src in input_data.chunks(512) {
33//! let mut repeat = false;
34//! loop {
35//! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
36//! match ret {
37//! Ok(len) => { // we got a buffer decoded successfully to the end
38//! dst_buf.extend_from_slice(&output_chunk[..len]);
39//! break;
40//! },
41//! Err(DecompressError::ShortData) => { // this block of data was fully read
42//! break;
43//! },
44//! Err(DecompressError::OutputFull) => {
45//! // the output buffer is full, flush it and continue decoding the same block
46//! repeat = true;
47//! dst_buf.extend_from_slice(&output_chunk);
48//! },
49//! Err(err) => {
50//! return Err(err);
51//! },
52//! }
53//! }
54//! }
55//! # Ok(())
56//! # }
57//! ```
f8cdb949
KS
58//!
59//! Compressing input buffer into zlib stream:
60//! ```
61//! use nihav_core::compr::deflate::{Deflate, DeflateMode, DeflateWriter};
62//!
63//! # fn compress(input: &[u8]) {
64//! let output = Vec::with_capacity(input.len() * 65540 / 65535 + 6);
65//! let mut writer = DeflateWriter::new(output);
66//! let mut compr = Deflate::new(DeflateMode::Fast);
67//! compr.write_zlib_header(&mut writer);
68//! compr.compress(input, &mut writer);
69//! compr.compress_end(&mut writer);
70//! let output = writer.end();
71//! # }
72//! ```
0443d0c5 73
8570a0b3 74use crate::options::NAOptionDefinitionType;
0443d0c5
KS
75use crate::io::byteio::*;
76use crate::io::bitreader::*;
77use crate::io::codebook::*;
78use super::*;
79
80const NUM_LITERALS: usize = 287;
81const NUM_DISTS: usize = 32;
82
83struct FixedLenCodeReader {}
84
85impl CodebookDescReader<u16> for FixedLenCodeReader {
86 fn bits(&mut self, idx: usize) -> u8 {
87 if idx < 144 { 8 }
88 else if idx < 256 { 9 }
89 else if idx < 280 { 7 }
90 else { 8 }
91 }
b36f412c 92 #[allow(clippy::identity_op)]
0443d0c5
KS
93 fn code(&mut self, idx: usize) -> u32 {
94 let base = idx as u32;
95 let bits = self.bits(idx);
96 if idx < 144 { reverse_bits(base + 0x30, bits) }
97 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
98 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
99 else { reverse_bits(base + 0xC0 - 280, bits) }
100 }
101 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
102 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
103}
104
105#[derive(Clone,Copy,Default)]
106struct BitReaderState {
107 pos: usize,
108 bitbuf: u32,
109 bits: u8,
110}
111
112struct CurrentSource<'a> {
113 src: &'a [u8],
114 br: BitReaderState,
115}
116
117impl<'a> CurrentSource<'a> {
118 fn new(src: &'a [u8], br: BitReaderState) -> Self {
119 let mut newsrc = Self { src, br };
120 newsrc.br.pos = 0;
121 newsrc.refill();
122 newsrc
123 }
124 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
125 let mut newsrc = Self { src, br };
126 newsrc.refill();
127 newsrc
128 }
129 fn refill(&mut self) {
130 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
131 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
132 self.br.bits += 8;
133 self.br.pos += 1;
134 }
135 }
136 fn skip_cache(&mut self, nbits: u8) {
137 self.br.bitbuf >>= nbits;
138 self.br.bits -= nbits;
139 }
140 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
141 if nbits == 0 { return Ok(0); }
142 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
143 if self.br.bits < nbits {
144 self.refill();
145 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
146 }
147 let ret = self.br.bitbuf & ((1 << nbits) - 1);
148 self.skip_cache(nbits);
149 Ok(ret)
150 }
151 fn read_bool(&mut self) -> BitReaderResult<bool> {
152 if self.br.bits == 0 {
153 self.refill();
154 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
155 }
156 let ret = (self.br.bitbuf & 1) != 0;
157 self.skip_cache(1);
158 Ok(ret)
159 }
160 fn peek(&mut self, nbits: u8) -> u32 {
161 if nbits == 0 || nbits > 16 { return 0; }
162 if self.br.bits < nbits {
163 self.refill();
164 }
165 self.br.bitbuf & ((1 << nbits) - 1)
166 }
167 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
168 if u32::from(self.br.bits) >= nbits {
169 self.skip_cache(nbits as u8);
170 } else {
171 unreachable!();
172 }
173 Ok(())
174 }
7bf82b33
KS
175 fn skip_bytes(&mut self, nbytes: usize) -> BitReaderResult<()> {
176 self.align();
177 let cached = usize::from(self.br.bits / 8);
178 if nbytes <= cached {
179 self.skip((nbytes as u32) * 8)?;
180 } else {
181 self.skip((cached as u32) * 8)?;
182 self.br.bits = 0;
183 self.br.bitbuf = 0;
184 self.br.pos += nbytes - cached;
185 if self.br.pos > self.src.len() {
186 return Err(BitReaderError::BitstreamEnd);
187 }
188 self.refill();
189 }
190 Ok(())
191 }
0443d0c5
KS
192 fn align(&mut self) {
193 let b = self.br.bits & 7;
194 if b != 0 {
11f1d51e 195 self.skip_cache(b);
0443d0c5
KS
196 }
197 }
198 fn left(&self) -> isize {
199 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
200 }
7bf82b33
KS
201 fn tell(&self) -> usize {
202 self.br.pos - usize::from(self.br.bits / 8)
203 }
0443d0c5
KS
204}
205
206impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
207 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
208 let mut esc = true;
209 let mut idx = 0;
210 let mut lut_bits = cb.lut_bits;
211 let orig_br = self.br;
212 while esc {
e6aaad5c 213 let lut_idx = (self.peek(lut_bits) as usize) + idx;
0443d0c5
KS
214 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
215 let bits = cb.table[lut_idx] & 0x7F;
216 esc = (cb.table[lut_idx] & 0x80) != 0;
217 idx = (cb.table[lut_idx] >> 8) as usize;
218 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
219 if (skip_bits as isize) > self.left() {
220 self.br = orig_br;
221 self.refill();
222 return Err(CodebookError::MemoryError);
223 }
e6aaad5c 224 self.skip(skip_bits).unwrap();
0443d0c5
KS
225 lut_bits = bits as u8;
226 }
227 Ok(cb.syms[idx])
228 }
229}
230
231enum InflateState {
232 Start,
233 BlockStart,
234 BlockMode,
235 StaticBlockLen,
236 StaticBlockInvLen(u32),
237 StaticBlockCopy(usize),
238 FixedBlock,
239 FixedBlockLengthExt(usize, u8),
240 FixedBlockDist(usize),
241 FixedBlockDistExt(usize, usize, u8),
242 FixedBlockCopy(usize, usize),
243 FixedBlockLiteral(u8),
244 DynBlockHlit,
245 DynBlockHdist,
246 DynBlockHclen,
247 DynLengths(usize),
248 DynCodeLengths,
249 DynCodeLengthsAdd(usize),
250 DynBlock,
251 DynBlockLengthExt(usize, u8),
252 DynBlockDist(usize),
253 DynBlockDistExt(usize, usize, u8),
254 DynCopy(usize, usize),
255 DynBlockLiteral(u8),
256 End,
257}
258
79fa5fbf 259/// The decompressor for deflated streams (RFC 1951).
0443d0c5
KS
260pub struct Inflate {
261 br: BitReaderState,
262 fix_len_cb: Codebook<u16>,
263
264 buf: [u8; 65536],
265 bpos: usize,
266 output_idx: usize,
37d96dbb 267 full_pos: usize,
0443d0c5
KS
268
269 state: InflateState,
270 final_block: bool,
271 hlit: usize,
272 hdist: usize,
273 dyn_len_cb: Option<Codebook<u32>>,
274 dyn_lit_cb: Option<Codebook<u32>>,
275 dyn_dist_cb: Option<Codebook<u32>>,
276 len_lengths: [u8; 19],
277 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
278 cur_len_idx: usize,
279}
280
281const LENGTH_ADD_BITS: [u8; 29] = [
282 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
283 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
284 4, 4, 4, 4, 5, 5, 5, 5, 0
285];
286const LENGTH_BASE: [u16; 29] = [
287 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
288 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
289 67, 83, 99, 115, 131, 163, 195, 227, 258
290];
291const DIST_ADD_BITS: [u8; 30] = [
292 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
293 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
294 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
295];
296const DIST_BASE: [u16; 30] = [
297 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
298 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
299 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
300];
301const LEN_RECODE: [usize; 19] = [
302 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
303];
304const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
305const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
306
307macro_rules! read_bits {
308 ($self: expr, $csrc: expr, $bits: expr) => ({
309 if $csrc.left() < $bits as isize {
310 $self.br = $csrc.br;
311 return Err(DecompressError::ShortData);
312 }
313 $csrc.read($bits).unwrap()
314 })
315}
316
317macro_rules! read_cb {
318 ($self: expr, $csrc: expr, $cb: expr) => ({
319 let ret = $csrc.read_cb($cb);
320 if let Err(CodebookError::MemoryError) = ret {
321 $self.br = $csrc.br;
322 return Err(DecompressError::ShortData);
323 }
324 match ret {
325 Ok(val) => val,
326 Err(_) => {
327 $self.state = InflateState::End;
328 return Err(DecompressError::InvalidData);
329 },
330 }
331 })
332}
333
334impl Inflate {
79fa5fbf 335 /// Creates a new instance of `Inflate` struct.
0443d0c5
KS
336 pub fn new() -> Self {
337 let mut cr = FixedLenCodeReader {};
338 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
339 Self {
340 br: BitReaderState::default(),
341 fix_len_cb,
342
343 buf: [0; 65536],
344 bpos: 0,
345 output_idx: 0,
37d96dbb 346 full_pos: 0,
0443d0c5
KS
347
348 state: InflateState::Start,
349 final_block: false,
350 dyn_len_cb: None,
351 dyn_lit_cb: None,
352 dyn_dist_cb: None,
353 hlit: 0,
354 hdist: 0,
355 len_lengths: [0; 19],
356 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
357 cur_len_idx: 0,
358 }
359 }
360 fn put_literal(&mut self, val: u8) {
361 self.buf[self.bpos] = val;
c96ad969 362 self.bpos = (self.bpos + 1) & (self.buf.len() - 1);
37d96dbb 363 self.full_pos += 1;
0443d0c5
KS
364 }
365 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
366 let mask = self.buf.len() - 1;
37d96dbb 367 if offset > self.full_pos {
0443d0c5
KS
368 return Err(DecompressError::InvalidData);
369 }
c96ad969 370 let cstart = (self.bpos.wrapping_sub(offset)) & mask;
1ff7036b 371 for (i, out) in dst[..len].iter_mut().enumerate() {
0443d0c5 372 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
1ff7036b 373 *out = self.buf[(cstart + i) & mask];
0443d0c5 374 }
c96ad969 375 self.bpos = (self.bpos + len) & mask;
37d96dbb 376 self.full_pos += len;
0443d0c5
KS
377 Ok(())
378 }
79fa5fbf 379 /// Sets custom history for decoding an update for already decoded data.
bc23de6b
KS
380 pub fn set_dict(&mut self, dict: &[u8]) {
381 let len = dict.len().min(self.buf.len());
382 let start = dict.len() - len;
383 self.buf[..len].copy_from_slice(&dict[start..]);
384 self.bpos = len;
385 self.full_pos = len;
386 }
79fa5fbf 387 /// Reports whether decoder has finished decoding the input.
0443d0c5 388 pub fn is_finished(&self) -> bool {
6f263099 389 matches!(self.state, InflateState::End)
0443d0c5 390 }
79fa5fbf 391 /// Reports the current amount of bytes output into the destination buffer after the last run.
0443d0c5 392 pub fn get_current_output_size(&self) -> usize { self.output_idx }
79fa5fbf 393 /// Reports the total amount of bytes decoded so far.
0443d0c5 394 pub fn get_total_output_size(&self) -> usize { self.bpos }
79fa5fbf
KS
395 /// Tries to decompress input data and write it to the output buffer.
396 ///
397 /// Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
398 /// * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
399 /// * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
400 /// * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
401 ///
402 /// [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
403 /// [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
0443d0c5 404 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
de161d26
KS
405 self.decompress_data_internal(src, dst, continue_block, false)
406 }
79fa5fbf 407 /// Tries to decompress whole input chunk to the output buffer.
de161d26
KS
408 pub fn decompress_block(&mut self, src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
409 self.decompress_data_internal(src, dst, false, true)
410 }
411 #[allow(clippy::comparison_chain)]
412 fn decompress_data_internal(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool, do_one_block: bool) -> DecompressResult<usize> {
b36f412c 413 if src.is_empty() || dst.is_empty() {
0443d0c5
KS
414 return Err(DecompressError::InvalidArgument);
415 }
416 let mut csrc = if !continue_block {
417 CurrentSource::new(src, self.br)
418 } else {
419 self.output_idx = 0;
420 CurrentSource::reinit(src, self.br)
421 };
de161d26
KS
422 if do_one_block {
423 self.output_idx = 0;
424 }
0c9b706a
KS
425 // check for zlib stream header
426 if let (&InflateState::Start, true) = (&self.state, src.len() > 2) {
427 let cm = src[0] & 0xF;
428 let cinfo = src[0] >> 4;
429 let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]);
430 if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 {
431 csrc.skip(16).unwrap();
432 }
433 }
0443d0c5
KS
434 'main: loop {
435 match self.state {
436 InflateState::Start | InflateState::BlockStart => {
437 if csrc.left() == 0 {
de161d26
KS
438 if do_one_block {
439 return Ok(self.output_idx);
440 }
0443d0c5
KS
441 self.br = csrc.br;
442 return Err(DecompressError::ShortData);
443 }
444 self.final_block = csrc.read_bool().unwrap();
445 self.state = InflateState::BlockMode;
446 },
447 InflateState::BlockMode => {
448 let bmode = read_bits!(self, csrc, 2);
449 match bmode {
450 0 => {
451 csrc.align();
452 self.state = InflateState::StaticBlockLen;
453 },
454 1 => { self.state = InflateState::FixedBlock; },
455 2 => { self.state = InflateState::DynBlockHlit; },
456 _ => {
457 self.state = InflateState::End;
458 return Err(DecompressError::InvalidHeader);
459 },
460 };
461 },
462 InflateState::StaticBlockLen => {
463 let len = read_bits!(self, csrc, 16);
464 self.state = InflateState::StaticBlockInvLen(len);
465 },
466 InflateState::StaticBlockInvLen(len) => {
467 let inv_len = read_bits!(self, csrc, 16);
6130c973 468 if (len ^ inv_len) != 0xFFFF {
0443d0c5
KS
469 self.state = InflateState::End;
470 return Err(DecompressError::InvalidHeader);
471 }
472 self.state = InflateState::StaticBlockCopy(len as usize);
473 },
474 InflateState::StaticBlockCopy(len) => {
475 for i in 0..len {
476 if csrc.left() < 8 {
477 self.br = csrc.br;
478 self.state = InflateState::StaticBlockCopy(len - i);
479 return Err(DecompressError::ShortData);
480 }
481 let val = csrc.read(8).unwrap() as u8;
482 self.put_literal(val);
483 }
484 self.state = InflateState::BlockStart;
485 }
486 InflateState::FixedBlock => {
487 let val = read_cb!(self, csrc, &self.fix_len_cb);
488 if val < 256 {
489 if self.output_idx >= dst.len() {
490 self.br = csrc.br;
491 self.state = InflateState::FixedBlockLiteral(val as u8);
492 return Err(DecompressError::OutputFull);
493 }
494 self.put_literal(val as u8);
495 dst[self.output_idx] = val as u8;
496 self.output_idx += 1;
497 } else if val == 256 {
498 if self.final_block {
499 self.state = InflateState::End;
500 return Ok(self.output_idx);
501 } else {
502 self.state = InflateState::BlockStart;
503 }
504 } else {
505 let len_idx = (val - 257) as usize;
506 if len_idx >= LENGTH_BASE.len() {
507 self.state = InflateState::End;
508 return Err(DecompressError::InvalidData);
509 }
510 let len_bits = LENGTH_ADD_BITS[len_idx];
511 let add_base = LENGTH_BASE[len_idx] as usize;
512 if len_bits > 0 {
513 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
514 } else {
515 self.state = InflateState::FixedBlockDist(add_base);
516 }
517 }
518 },
519 InflateState::FixedBlockLiteral(sym) => {
520 if self.output_idx >= dst.len() {
521 self.br = csrc.br;
522 return Err(DecompressError::OutputFull);
523 }
524 self.put_literal(sym);
525 dst[self.output_idx] = sym;
526 self.output_idx += 1;
527 self.state = InflateState::FixedBlock;
528 },
529 InflateState::FixedBlockLengthExt(base, bits) => {
530 let add = read_bits!(self, csrc, bits) as usize;
531 self.state = InflateState::FixedBlockDist(base + add);
532 },
533 InflateState::FixedBlockDist(length) => {
534 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
535 if dist_idx >= DIST_BASE.len() {
536 self.state = InflateState::End;
537 return Err(DecompressError::InvalidData);
538 }
539 let dist_bits = DIST_ADD_BITS[dist_idx];
540 let dist_base = DIST_BASE[dist_idx] as usize;
541 if dist_bits == 0 {
542 self.state = InflateState::FixedBlockCopy(length, dist_base);
543 } else {
544 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
545 }
546 },
547 InflateState::FixedBlockDistExt(length, base, bits) => {
548 let add = read_bits!(self, csrc, bits) as usize;
549 self.state = InflateState::FixedBlockCopy(length, base + add);
550 },
551 InflateState::FixedBlockCopy(length, dist) => {
552 if self.output_idx + length > dst.len() {
553 let copy_size = dst.len() - self.output_idx;
554 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
555 if ret.is_err() {
556 self.state = InflateState::End;
557 return Err(DecompressError::InvalidData);
558 }
559 self.output_idx += copy_size;
560 self.br = csrc.br;
561 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
562 return Err(DecompressError::OutputFull);
563 }
564 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
565 if ret.is_err() {
566 self.state = InflateState::End;
567 return Err(DecompressError::InvalidData);
568 }
569 self.output_idx += length;
570 self.state = InflateState::FixedBlock;
571 }
572 InflateState::DynBlockHlit => {
573 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
574 if self.hlit >= 287 {
575 self.state = InflateState::End;
576 return Err(DecompressError::InvalidHeader);
577 }
578 self.state = InflateState::DynBlockHdist;
579 }
580 InflateState::DynBlockHdist => {
581 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
582 self.state = InflateState::DynBlockHclen;
583 },
584 InflateState::DynBlockHclen => {
585 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
586 self.cur_len_idx = 0;
587 self.len_lengths = [0; 19];
588 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
589 self.state = InflateState::DynLengths(hclen);
590 },
591 InflateState::DynLengths(len) => {
592 for i in 0..len {
593 if csrc.left() < 3 {
594 self.br = csrc.br;
595 self.state = InflateState::DynLengths(len - i);
596 return Err(DecompressError::ShortData);
597 }
598 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
599 self.cur_len_idx += 1;
600 }
601 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
602 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
78da9b7f 603 let mut cr = ShortCodebookDescReader::new(&len_codes);
0443d0c5
KS
604 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
605 if ret.is_err() {
606 self.state = InflateState::End;
607 return Err(DecompressError::InvalidHeader);
608 }
609 self.dyn_len_cb = Some(ret.unwrap());
610 self.cur_len_idx = 0;
611 self.state = InflateState::DynCodeLengths;
612 },
613 InflateState::DynCodeLengths => {
614 if let Some(ref len_cb) = self.dyn_len_cb {
615 while self.cur_len_idx < self.hlit + self.hdist {
616 let ret = csrc.read_cb(len_cb);
617 let val = match ret {
618 Ok(val) => val,
619 Err(CodebookError::MemoryError) => {
620 self.br = csrc.br;
621 return Err(DecompressError::ShortData);
622 },
623 Err(_) => {
624 self.state = InflateState::End;
625 return Err(DecompressError::InvalidHeader);
626 },
627 };
628 if val < 16 {
629 self.all_lengths[self.cur_len_idx] = val as u8;
630 self.cur_len_idx += 1;
631 } else {
632 let idx = (val as usize) - 16;
633 if idx > 2 {
634 self.state = InflateState::End;
635 return Err(DecompressError::InvalidHeader);
636 }
637 self.state = InflateState::DynCodeLengthsAdd(idx);
638 continue 'main;
639 }
640 }
641 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
642
643 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
6f263099 644 lengths_to_codes(lit_lengths, &mut lit_codes)?;
78da9b7f 645 let mut cr = ShortCodebookDescReader::new(&lit_codes);
0443d0c5
KS
646 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
647 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
648 self.dyn_lit_cb = Some(ret.unwrap());
649
650 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
651 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
78da9b7f 652 let mut cr = ShortCodebookDescReader::new(&dist_codes);
0443d0c5
KS
653 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
654 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
655 self.dyn_dist_cb = Some(ret.unwrap());
656
657 self.state = InflateState::DynBlock;
658 } else {
659 unreachable!();
660 }
661 },
662 InflateState::DynCodeLengthsAdd(mode) => {
663 let base = REPEAT_BASE[mode] as usize;
664 let bits = REPEAT_BITS[mode];
665 let len = base + read_bits!(self, csrc, bits) as usize;
666 if self.cur_len_idx + len > self.hlit + self.hdist {
667 self.state = InflateState::End;
668 return Err(DecompressError::InvalidHeader);
669 }
b36f412c
KS
670 let rpt = if mode == 0 {
671 if self.cur_len_idx == 0 {
672 self.state = InflateState::End;
673 return Err(DecompressError::InvalidHeader);
674 }
675 self.all_lengths[self.cur_len_idx - 1]
676 } else {
677 0
678 };
0443d0c5
KS
679 for _ in 0..len {
680 self.all_lengths[self.cur_len_idx] = rpt;
681 self.cur_len_idx += 1;
682 }
683 self.state = InflateState::DynCodeLengths;
684 },
685 InflateState::DynBlock => {
686 if let Some(ref lit_cb) = self.dyn_lit_cb {
687 let val = read_cb!(self, csrc, lit_cb);
688 if val < 256 {
689 if self.output_idx >= dst.len() {
690 self.br = csrc.br;
691 self.state = InflateState::DynBlockLiteral(val as u8);
692 return Err(DecompressError::OutputFull);
693 }
694 self.put_literal(val as u8);
695 dst[self.output_idx] = val as u8;
696 self.output_idx += 1;
697 } else if val == 256 {
698 if self.final_block {
699 self.state = InflateState::End;
700 return Ok(self.output_idx);
701 } else {
702 self.state = InflateState::BlockStart;
703 }
704 } else {
705 let len_idx = (val - 257) as usize;
706 if len_idx >= LENGTH_BASE.len() {
707 self.state = InflateState::End;
708 return Err(DecompressError::InvalidData);
709 }
710 let len_bits = LENGTH_ADD_BITS[len_idx];
711 let add_base = LENGTH_BASE[len_idx] as usize;
712 if len_bits > 0 {
713 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
714 } else {
715 self.state = InflateState::DynBlockDist(add_base);
716 }
717 }
718 } else {
719 unreachable!();
720 }
721 },
722 InflateState::DynBlockLiteral(sym) => {
723 if self.output_idx >= dst.len() {
724 self.br = csrc.br;
725 return Err(DecompressError::OutputFull);
726 }
727 self.put_literal(sym);
728 dst[self.output_idx] = sym;
729 self.output_idx += 1;
730 self.state = InflateState::DynBlock;
731 },
732 InflateState::DynBlockLengthExt(base, bits) => {
733 let add = read_bits!(self, csrc, bits) as usize;
734 self.state = InflateState::DynBlockDist(base + add);
735 },
736 InflateState::DynBlockDist(length) => {
737 if let Some(ref dist_cb) = self.dyn_dist_cb {
738 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
739 if dist_idx >= DIST_BASE.len() {
740 self.state = InflateState::End;
741 return Err(DecompressError::InvalidData);
742 }
743 let dist_bits = DIST_ADD_BITS[dist_idx];
744 let dist_base = DIST_BASE[dist_idx] as usize;
745 if dist_bits == 0 {
746 self.state = InflateState::DynCopy(length, dist_base);
747 } else {
748 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
749 }
750 } else {
751 unreachable!();
752 }
753 },
754 InflateState::DynBlockDistExt(length, base, bits) => {
755 let add = read_bits!(self, csrc, bits) as usize;
756 self.state = InflateState::DynCopy(length, base + add);
757 },
758 InflateState::DynCopy(length, dist) => {
759 if self.output_idx + length > dst.len() {
760 let copy_size = dst.len() - self.output_idx;
761 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
762 if ret.is_err() {
763 self.state = InflateState::End;
764 return Err(DecompressError::InvalidData);
765 }
766 self.output_idx += copy_size;
767 self.br = csrc.br;
768 self.state = InflateState::DynCopy(length - copy_size, dist);
769 return Err(DecompressError::OutputFull);
770 }
771 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
772 if ret.is_err() {
773 self.state = InflateState::End;
774 return Err(DecompressError::InvalidData);
775 }
776 self.output_idx += length;
777 self.state = InflateState::DynBlock;
778 }
779 InflateState::End => {
780 return Ok(0);
781 },
782 }
783 }
784 }
79fa5fbf 785 /// Resets decoder state.
de161d26
KS
786 pub fn reset(&mut self) {
787 self.bpos = 0;
788 self.output_idx = 0;
789 self.full_pos = 0;
790 self.state = InflateState::Start;
791 }
792
820e8a4a 793 #[allow(clippy::comparison_chain)]
79fa5fbf 794 /// Decompresses input data into output returning the uncompressed data length.
0443d0c5 795 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
7bf82b33
KS
796 let mut csrc = CurrentSource::new(src, BitReaderState::default());
797 if src.len() > 2 {
798 let cm = src[0] & 0xF;
799 let cinfo = src[0] >> 4;
800 let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]);
801 if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 {
802 csrc.skip(16).unwrap();
803 }
804 }
805
806 let mut fix_len_cb = None;
807
808 let mut dst_idx = 0;
809 let mut final_block = false;
810 while !final_block {
811 final_block = csrc.read_bool()?;
812
813 let bmode = csrc.read(2)?;
814 match bmode {
815 0 => {
816 csrc.align();
817 let len = csrc.read(16)? as usize;
818 let inv_len = csrc.read(16)? as usize;
819 if (len ^ inv_len) != 0xFFFF {
820 return Err(DecompressError::InvalidHeader);
821 }
822 let src_pos = csrc.tell();
823 if src_pos + len > src.len() {
824 return Err(DecompressError::ShortData);
825 }
826 if dst_idx + len > dst.len() {
827 return Err(DecompressError::OutputFull);
828 }
829 dst[dst_idx..][..len].copy_from_slice(&src[src_pos..][..len]);
830 dst_idx += len;
831 csrc.skip_bytes(len)?;
832 },
833 1 => {
834 if fix_len_cb.is_none() {
835 let mut cr = FixedLenCodeReader {};
836 fix_len_cb = Some(Codebook::new(&mut cr, CodebookMode::LSB).unwrap());
837 }
838 if let Some(ref len_cb) = &fix_len_cb {
839 loop {
840 let val = csrc.read_cb(len_cb)?;
841 if val < 256 {
842 if dst_idx >= dst.len() {
843 return Err(DecompressError::OutputFull);
844 }
845 dst[dst_idx] = val as u8;
846 dst_idx += 1;
847 } else if val == 256 {
848 break;
849 } else {
850 let len_idx = (val - 257) as usize;
851 if len_idx >= LENGTH_BASE.len() {
852 return Err(DecompressError::InvalidData);
853 }
854 let len_bits = LENGTH_ADD_BITS[len_idx];
855 let mut length = LENGTH_BASE[len_idx] as usize;
856 if len_bits > 0 {
857 length += csrc.read(len_bits)? as usize;
858 }
859 let dist_idx = reverse_bits(csrc.read(5)?, 5) as usize;
860 if dist_idx >= DIST_BASE.len() {
861 return Err(DecompressError::InvalidData);
862 }
863 let dist_bits = DIST_ADD_BITS[dist_idx];
864 let mut dist = DIST_BASE[dist_idx] as usize;
865 if dist_bits > 0 {
866 dist += csrc.read(dist_bits)? as usize;
867 }
868
869 if dst_idx + length > dst.len() {
870 return Err(DecompressError::OutputFull);
871 }
872 if dist > dst_idx {
873 return Err(DecompressError::InvalidData);
874 }
875 lz_copy(dst, dst_idx, dist, length);
876 dst_idx += length;
877 }
878 }
879 } else {
880 unreachable!();
881 }
882 },
883 2 => {
884 let hlit = csrc.read(5)? as usize + 257;
885 if hlit >= 287 {
886 return Err(DecompressError::InvalidHeader);
887 }
888 let hdist = csrc.read(5)? as usize + 1;
889 let hclen = csrc.read(4)? as usize + 4;
7bf82b33
KS
890 let mut len_lengths = [0; 19];
891 let mut all_lengths = [0; NUM_LITERALS + NUM_DISTS];
892
820e8a4a 893 for cur_len_idx in 0..hclen {
7bf82b33 894 len_lengths[LEN_RECODE[cur_len_idx]] = csrc.read(3)? as u8;
7bf82b33
KS
895 }
896 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
897 lengths_to_codes(&len_lengths, &mut len_codes)?;
78da9b7f 898 let mut cr = ShortCodebookDescReader::new(&len_codes);
7bf82b33
KS
899 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
900 if ret.is_err() {
901 return Err(DecompressError::InvalidHeader);
902 }
903 let dyn_len_cb = ret.unwrap();
904
905 let mut cur_len_idx = 0;
906 while cur_len_idx < hlit + hdist {
907 let val = csrc.read_cb(&dyn_len_cb)?;
908 if val < 16 {
909 all_lengths[cur_len_idx] = val as u8;
910 cur_len_idx += 1;
911 } else {
912 let mode = (val as usize) - 16;
913 if mode > 2 {
914 return Err(DecompressError::InvalidHeader);
915 }
916 let base = REPEAT_BASE[mode] as usize;
917 let bits = REPEAT_BITS[mode];
918 let len = base + (csrc.read(bits)? as usize);
919 if cur_len_idx + len > hlit + hdist {
920 return Err(DecompressError::InvalidHeader);
921 }
922 let rpt = if mode == 0 {
923 if cur_len_idx == 0 {
924 return Err(DecompressError::InvalidHeader);
925 }
926 all_lengths[cur_len_idx - 1]
927 } else {
928 0
929 };
930 for _ in 0..len {
931 all_lengths[cur_len_idx] = rpt;
932 cur_len_idx += 1;
933 }
934 }
935 }
936 let (lit_lengths, dist_lengths) = all_lengths.split_at(hlit);
937
938 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
939 lengths_to_codes(lit_lengths, &mut lit_codes)?;
78da9b7f 940 let mut cr = ShortCodebookDescReader::new(&lit_codes);
7bf82b33
KS
941 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
942 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
943 let dyn_lit_cb = ret.unwrap();
944
945 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
946 lengths_to_codes(&dist_lengths[..hdist], &mut dist_codes)?;
78da9b7f 947 let mut cr = ShortCodebookDescReader::new(&dist_codes);
7bf82b33
KS
948 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
949 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
950 let dyn_dist_cb = ret.unwrap();
951
952 loop {
953 let val = csrc.read_cb(&dyn_lit_cb)?;
954 if val < 256 {
955 if dst_idx >= dst.len() {
956 return Err(DecompressError::OutputFull);
957 }
958 dst[dst_idx] = val as u8;
959 dst_idx += 1;
960 } else if val == 256 {
961 break;
962 } else {
963 let len_idx = (val - 257) as usize;
964 if len_idx >= LENGTH_BASE.len() {
965 return Err(DecompressError::InvalidData);
966 }
967 let len_bits = LENGTH_ADD_BITS[len_idx];
968 let mut length = LENGTH_BASE[len_idx] as usize;
969 if len_bits > 0 {
970 length += csrc.read(len_bits)? as usize;
971 }
972
973 let dist_idx = csrc.read_cb(&dyn_dist_cb)? as usize;
974 if dist_idx >= DIST_BASE.len() {
975 return Err(DecompressError::InvalidData);
976 }
977 let dist_bits = DIST_ADD_BITS[dist_idx];
978 let mut dist = DIST_BASE[dist_idx] as usize;
979 if dist_bits > 0 {
980 dist += csrc.read(dist_bits)? as usize;
981 }
982
983 if dst_idx + length > dst.len() {
984 return Err(DecompressError::OutputFull);
985 }
986 if dist > dst_idx {
987 return Err(DecompressError::InvalidData);
988 }
989 lz_copy(dst, dst_idx, dist, length);
990 dst_idx += length;
991 }
992 }
993 },
994 _ => return Err(DecompressError::InvalidHeader),
995 };
996 }
997 Ok(dst_idx)
0443d0c5
KS
998 }
999}
1000
b36f412c
KS
1001impl Default for Inflate {
1002 fn default() -> Self {
1003 Self::new()
1004 }
1005}
1006
0443d0c5
KS
1007fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
1008 let mut bits = [0u32; 32];
1009 let mut pfx = [0u32; 33];
1010 for len in lens.iter() {
1011 let len = *len as usize;
1012 if len >= bits.len() {
1013 return Err(DecompressError::InvalidHeader);
1014 }
1015 bits[len] += 1;
1016 }
1017 bits[0] = 0;
1018 let mut code = 0;
1019 for i in 0..bits.len() {
1020 code = (code + bits[i]) << 1;
1021 pfx[i + 1] = code;
1022 }
1023
1024 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
1025 let len = *len as usize;
1026 if len != 0 {
1027 let bits = len as u8;
1028 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
1029 pfx[len] += 1;
1030 } else {
1031 *codes = ShortCodebookDesc { code: 0, bits: 0 };
1032 }
1033 }
e65c0040 1034
0443d0c5
KS
1035 Ok(())
1036}
1037
1038struct GzipCRC32 {
1039 tab: [u32; 256],
1040 crc: u32,
1041}
1042
1043impl GzipCRC32 {
1044 fn new() -> Self {
1045 let mut tab = [0u32; 256];
1ff7036b 1046 for (i, el) in tab.iter_mut().enumerate() {
0443d0c5
KS
1047 let mut c = i as u32;
1048 for _ in 0..8 {
1049 if (c & 1) != 0 {
1050 c = 0xEDB88320 ^ (c >> 1);
1051 } else {
1052 c >>= 1;
1053 }
1054 }
1ff7036b 1055 *el = c;
0443d0c5
KS
1056 }
1057 Self { tab, crc: 0 }
1058 }
1059 fn update_crc(&mut self, src: &[u8]) {
1060 let mut c = !self.crc;
1061 for el in src.iter() {
1062 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
1063 }
1064 self.crc = !c;
1065 }
1066}
1067
79fa5fbf 1068/// Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
0443d0c5
KS
1069pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
1070 const FLAG_HCRC: u8 = 0x02;
1071 const FLAG_EXTRA: u8 = 0x04;
1072 const FLAG_NAME: u8 = 0x08;
1073 const FLAG_COMMENT: u8 = 0x10;
1074
1075 let id1 = br.read_byte()?;
1076 let id2 = br.read_byte()?;
1077 let cm = br.read_byte()?;
1078 let flg = br.read_byte()?;
1079 let _mtime = br.read_u32le()?;
1080 let _xfl = br.read_byte()?;
1081 let _os = br.read_byte()?;
1082 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
1083 return Err(DecompressError::InvalidHeader);
1084 }
1085
1086 if (flg & FLAG_EXTRA) != 0 {
1087 let xlen = br.read_u16le()? as usize;
1088 br.read_skip(xlen)?;
1089 }
1090 if (flg & FLAG_NAME) != 0 {
1091 loop {
1092 let b = br.read_byte()?;
1093 if b == 0 {
1094 break;
1095 }
1096 }
1097 }
1098 if (flg & FLAG_COMMENT) != 0 {
1099 loop {
1100 let b = br.read_byte()?;
1101 if b == 0 {
1102 break;
1103 }
1104 }
1105 }
1106 let _hcrc = if (flg & FLAG_HCRC) != 0 {
1107 br.read_u16le()?
1108 } else {
1109 0
1110 };
1111 if (flg & 0xE0) != 0 {
1112 return Err(DecompressError::Unsupported);
1113 }
1114
1115 let mut output: Vec<u8> = Vec::new();
1116 let mut tail = [0u8; 8];
1117 let mut inblk = [0u8; 1024];
1118 let mut oblk = [0u8; 4096];
1119 let mut inflate = Inflate::new();
1120 let mut checker = GzipCRC32::new();
1121
1122 loop {
1123 let ret = br.read_buf_some(&mut inblk);
1124 if let Err(ByteIOError::EOF) = ret {
1125 break;
1126 }
1127 let inlen = match ret {
1128 Ok(val) => val,
1129 Err(_) => return Err(DecompressError::IOError),
1130 };
1131 let mut repeat = false;
1132 loop {
1133 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
1134 match ret {
1135 Ok(outlen) => {
1136 checker.update_crc(&oblk[..outlen]);
1137 output.extend_from_slice(&oblk[..outlen]);
1138 break;
1139 },
1140 Err(DecompressError::ShortData) => {
1141 break;
1142 },
1143 Err(DecompressError::OutputFull) => {
1144 repeat = true;
1145 checker.update_crc(&oblk);
1146 output.extend_from_slice(&oblk);
1147 },
1148 Err(err) => {
1149 return Err(err);
1150 },
1151 }
1152 }
1153 // Save last 8 bytes for CRC and size.
1154 if inlen >= 8 {
1155 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
1156 } else {
1157 let shift_len = 8 - inlen;
1158 for i in 0..shift_len {
1159 tail[i] = tail[i + inlen];
1160 }
1ff7036b 1161 tail[shift_len..8].copy_from_slice(&inblk[..8 - shift_len]);
0443d0c5
KS
1162 }
1163 }
1164 if !skip_crc {
1165 if !inflate.is_finished() { println!("???"); }
1166 let crc = read_u32le(&tail[0..4])?;
1167 let size = read_u32le(&tail[4..8])?;
1168 if size != (output.len() as u32) {
1169 return Err(DecompressError::CRCError);
1170 }
1171 if crc != checker.crc {
1172 return Err(DecompressError::CRCError);
1173 }
1174 }
1175
1176 Ok(output)
1177}
1178
f8cdb949
KS
1179#[derive(Clone,Copy,Default)]
1180struct Token {
1181 sym: u16,
1182 distsym: u8,
1183 len: u16,
1184 dist: u16,
1185}
1186
1187const TOKEN_EOB: Token = Token { sym: 256, distsym: 0, len: 0, dist: 0 };
1188
1189impl Token {
1190 fn from_literal(sym: u8) -> Self {
1191 Self {
1192 sym: u16::from(sym),
1193 distsym: 0,
1194 dist: 0,
1195 len: 0,
1196 }
1197 }
1198 fn from_match(dist: u16, len: u16) -> Self {
1199 let sym = match len {
1200 3..= 10 => 257 + len - 3,
1201 11..= 18 => 265 + (len - 11) / 2,
1202 19..= 34 => 269 + (len - 19) / 4,
1203 35..= 66 => 273 + (len - 35) / 8,
1204 67..=130 => 277 + (len - 67) / 16,
1205 131..=257 => 281 + (len - 131) / 32,
1206 _ => 285,
1207 };
1208 let distsym = if dist <= 4 {
1209 (dist - 1) as u8
1210 } else {
1211 let bits = 16 - (dist - 1).leading_zeros();
1212 (bits as u8) * 2 - 2 + if ((dist - 1) & (1 << (bits - 2))) != 0 { 1 } else { 0 }
1213 };
1214 Self {
1215 sym, distsym, len, dist
1216 }
1217 }
1218}
1219
1220fn add_codes(lens: &[u8], stats: &mut [u32], toks: &mut Vec<(u8, u8)>) {
1221 let mut last = 42;
1222 let mut lcount = 0;
1223
1224 for &len in lens.iter() {
1225 if len == last {
1226 lcount += 1;
1227 } else {
1228 if last == 0 {
1229 while lcount > 10 {
1230 let run = lcount.min(138);
1231 stats[18] += 1;
1232 toks.push((18, run - 11));
1233 lcount -= run;
1234 }
1235 if lcount >= 3 {
1236 stats[17] += 1;
1237 toks.push((17, lcount - 3));
1238 lcount = 0;
1239 }
1240 for _ in 0..lcount {
1241 stats[0] += 1;
1242 toks.push((0, 0));
1243 }
1244 } else {
1245 while lcount >= 3 {
1246 let run = lcount.min(6);
1247 stats[16] += 1;
1248 toks.push((16, run - 3));
1249 lcount -= run;
1250 }
1251 for _ in 0..lcount {
1252 stats[last as usize] += 1;
1253 toks.push((last, 0));
1254 }
1255 }
1256 stats[len as usize] += 1;
1257 toks.push((len, 0));
1258 last = len;
1259 lcount = 0;
1260 }
1261 }
1262 if lcount > 0 {
1263 if last == 0 {
1264 while lcount > 10 {
1265 let run = lcount.min(138);
1266 stats[18] += 1;
1267 toks.push((18, run - 11));
1268 lcount -= run;
1269 }
1270 if lcount >= 3 {
1271 stats[17] += 1;
1272 toks.push((17, lcount - 3));
1273 lcount = 0;
1274 }
1275 for _ in 0..lcount {
1276 stats[0] += 1;
1277 toks.push((0, 0));
1278 }
1279 } else {
1280 while lcount >= 3 {
1281 let run = lcount.min(6);
1282 stats[16] += 1;
1283 toks.push((16, run - 3));
1284 lcount -= run;
1285 }
1286 for _ in 0..lcount {
1287 stats[last as usize] += 1;
1288 toks.push((last, 0));
1289 }
1290 }
1291 }
1292}
1293
79fa5fbf 1294/// Deflate stream writer.
f8cdb949
KS
1295pub struct DeflateWriter {
1296 dst: Vec<u8>,
1297 bits: u8,
1298 bbuf: u32,
1299}
1300
1301impl DeflateWriter {
79fa5fbf 1302 /// Creates a new instance of `DeflateWriter` for a provided output.
f8cdb949
KS
1303 pub fn new(dst: Vec<u8>) -> Self {
1304 Self {
1305 dst,
1306 bits: 0,
1307 bbuf: 0,
1308 }
1309 }
1310 fn align(&mut self) {
1311 if (self.bits & 7) != 0 {
1312 self.bits += 8 - (self.bits & 7);
1313 }
1314 }
1315 fn flush(&mut self) {
1316 while self.bits >= 8 {
1317 self.dst.push(self.bbuf as u8);
1318 self.bbuf >>= 8;
1319 self.bits -= 8;
1320 }
1321 }
1322 fn write(&mut self, val: u16, len: u8) {
1323 self.flush();
1324 self.bbuf |= u32::from(val) << self.bits;
1325 self.bits += len;
1326 }
79fa5fbf 1327 /// Finishes writing the stream and returns the output vector.
f8cdb949
KS
1328 pub fn end(mut self) -> Vec<u8> {
1329 self.flush();
1330 if self.bits > 0 {
1331 self.dst.push(self.bbuf as u8);
1332 }
1333 self.dst
1334 }
1335
1336 fn write_codes(&mut self, codes: &CodeHuff, dists: &DistHuff) {
1337 let mut stats = [0u32; 19];
1338 let mut toks = Vec::with_capacity(NUM_LITERALS + NUM_DISTS);
1339 let mut cw = [0u16; 19];
1340 let mut cl = [0u8; 19];
1341 let mut nc = 0;
1342
1343 add_codes(&codes.lens[..codes.num_codes], &mut stats, &mut toks);
1344 add_codes(&dists.lens[..dists.num_codes], &mut stats, &mut toks);
1345
1346 gen_tree(&mut cw, &mut cl, &mut nc, &mut stats, 7);
1347
1348 nc = cw.len();
1349 for &idx in LEN_RECODE.iter().rev() {
1350 if cl[idx] == 0 {
1351 nc -= 1;
1352 } else {
1353 break;
1354 }
1355 }
1356 if nc < 4 {
1357 nc = 4;
1358 }
1359 self.write((nc - 4) as u16, 4);
1360 for &idx in LEN_RECODE.iter().take(nc) {
1361 self.write(u16::from(cl[idx]), 3);
1362 }
1363 for &(sym, add) in toks.iter() {
1364 self.write(cw[sym as usize], cl[sym as usize]);
1365 match sym {
1366 16 => self.write(u16::from(add), 2),
1367 17 => self.write(u16::from(add), 3),
1368 18 => self.write(u16::from(add), 7),
1369 _ => {},
1370 };
1371 }
1372 }
1373 fn write_tokens(&mut self, src: &[Token], codes: &CodeHuff, dists: &DistHuff) {
1374 for &tok in src.iter() {
1375 self.write(codes.codes[tok.sym as usize], codes.lens[tok.sym as usize]);
1376 if tok.sym > 256 {
1377 self.write_len_bits(tok.len);
1378 self.write(dists.codes[tok.distsym as usize], dists.lens[tok.distsym as usize]);
1379 self.write_dist_bits(tok.dist);
1380 }
1381 }
1382 }
1383 fn write_len_bits(&mut self, len: u16) {
1384 let llen = len - 3;
1ff7036b 1385 if (8..255).contains(&llen) {
f8cdb949
KS
1386 let bits = (16 - llen.leading_zeros() - 3) as u8;
1387 self.write(llen & ((1 << bits) - 1), bits);
1388 }
1389 }
1390 fn write_dist_bits(&mut self, dist: u16) {
1391 let ddist = dist - 1;
1392 if dist >= 4 {
1393 let bits = (16 - ddist.leading_zeros() - 2) as u8;
1394 self.write(ddist & ((1 << bits) - 1), bits);
1395 }
1396 }
1397}
1398
1399struct CodeHuff {
1400 is_fixed: bool,
1401 stats: [u32; NUM_LITERALS],
1402 codes: [u16; NUM_LITERALS],
1403 lens: [u8; NUM_LITERALS],
1404 num_codes: usize,
1405}
1406
1407impl CodeHuff {
1408 fn new(is_fixed: bool) -> Self {
1409 Self {
1410 is_fixed,
1411 stats: [0; NUM_LITERALS],
1412 codes: [0; NUM_LITERALS],
1413 lens: [0; NUM_LITERALS],
1414 num_codes: NUM_LITERALS,
1415 }
1416 }
1417 fn make_codes(&mut self, src: &[Token]) {
1418 if self.is_fixed {
1419 for i in 0..=143 {
1420 self.codes[i] = reverse_bits((i + 0x30) as u32, 8) as u16;
1421 self.lens[i] = 8;
1422 }
1423 for i in 144..=255 {
1424 self.codes[i] = reverse_bits((i + 0x100) as u32, 9) as u16;
1425 self.lens[i] = 9;
1426 }
1427 for i in 256..=279 {
1428 self.codes[i] = reverse_bits((i & 0x1F) as u32, 7) as u16;
1429 self.lens[i] = 7;
1430 }
1431 for i in 280..NUM_LITERALS {
1432 self.codes[i] = reverse_bits((i - 280 + 0xC0) as u32, 8) as u16;
1433 self.lens[i] = 8;
1434 }
1435 } else {
1436 for &tok in src.iter() {
1437 self.stats[tok.sym as usize] += 1;
1438 }
1439 gen_tree(&mut self.codes, &mut self.lens, &mut self.num_codes, &mut self.stats, 15);
1440 if self.num_codes < 257 {
1441 self.num_codes = 257;
1442 }
1443 }
1444 }
1445}
1446
1447struct DistHuff {
1448 is_fixed: bool,
1449 stats: [u32; NUM_DISTS],
1450 codes: [u16; NUM_DISTS],
1451 lens: [u8; NUM_DISTS],
1452 num_codes: usize,
1453}
1454
1455impl DistHuff {
1456 fn new(is_fixed: bool) -> Self {
1457 Self {
1458 is_fixed,
1459 stats: [0; NUM_DISTS],
1460 codes: [0; NUM_DISTS],
1461 lens: [0; NUM_DISTS],
1462 num_codes: NUM_DISTS,
1463 }
1464 }
1465 fn make_codes(&mut self, src: &[Token]) {
1466 if self.is_fixed {
1467 for i in 0..NUM_DISTS {
1468 self.codes[i] = reverse_bits(i as u32, 5) as u16;
1469 self.lens[i] = 5;
1470 }
1471 } else {
1472 for &tok in src.iter() {
1473 if tok.sym > 256 {
1474 self.stats[tok.distsym as usize] += 1;
1475 }
1476 }
1477 gen_tree(&mut self.codes, &mut self.lens, &mut self.num_codes, &mut self.stats, 15);
1478 if self.num_codes < 1 {
1479 self.num_codes = 1;
1480 }
1481 }
1482 }
1483}
1484
1485#[derive(Clone,Copy,Default)]
1486struct Node {
1487 sym: u16,
1488 w: u16,
1489 idx0: u16,
1490 idx1: u16,
1491}
1492
1493const NODE_SYM: u16 = 65500;
1494
1495struct Tree {
1496 nodes: [Node; NUM_LITERALS * 2],
1497 nnodes: usize,
1498}
1499
1500impl Tree {
1501 fn new() -> Self {
1502 Self {
1503 nodes: [Node::default(); NUM_LITERALS * 2],
1504 nnodes: 0,
1505 }
1506 }
1507 fn insert(&mut self, val: Node) {
1508 let mut idx = self.nnodes;
1509 for (i, nn) in self.nodes[..self.nnodes].iter().enumerate() {
1510 if nn.w > val.w {
1511 idx = i;
1512 break;
1513 }
1514 }
1515 if idx < self.nnodes {
1516 for i in (idx..self.nnodes).rev() {
1517 self.nodes[i + 1] = self.nodes[i];
1518 }
1519 }
1520 self.nodes[idx] = val;
1521 self.nnodes += 1;
1522 }
1523 fn trim(&mut self) {
1524 let mut start = self.nnodes;
1525 for (i, n) in self.nodes[..self.nnodes].iter().enumerate() {
1526 if n.w != 0 {
1527 start = i;
1528 break;
1529 }
1530 }
1531 if start != 0 {
1532 for i in 0..self.nnodes - start {
1533 self.nodes[i] = self.nodes[i + start];
1534 }
1535 self.nnodes -= start;
1536 }
1537 }
1538 fn build(&mut self) {
1539 if self.nnodes == 1 {
1540 self.nodes[0].w = 1;
1541 return;
1542 }
1543 let mut start = 0;
1544 while start + 1 < self.nnodes {
1545 let nn1 = self.nodes[start];
1546 let nn2 = self.nodes[start + 1];
1547 let n = Node {
1548 sym: NODE_SYM,
1549 w: nn1.w + nn2.w,
1550 idx0: start as u16,
1551 idx1: (start + 1) as u16,
1552 };
1553 self.nodes[start].w = 0;
1554 self.nodes[start + 1].w = 0;
1555 start += 2;
1556 self.insert(n);
1557 }
1558 if self.nnodes > 1 {
1559 self.assign_len(self.nnodes - 1, 0);
1560 }
1561 }
1562 fn assign_len(&mut self, idx: usize, len: u16) {
1563 if self.nodes[idx].sym == NODE_SYM {
1564 self.assign_len(self.nodes[idx].idx0 as usize, len + 1);
1565 self.assign_len(self.nodes[idx].idx1 as usize, len + 1);
1566 } else {
1567 self.nodes[idx].w = len;
1568 }
1569 }
1570}
1571
1572fn gen_tree(codes: &mut [u16], lens: &mut [u8], num_codes: &mut usize, stats: &mut [u32], max_bits: u8) {
1573 let mut tot_w = 0;
1574 for &w in stats.iter() {
1575 tot_w += w;
1576 }
1577 if tot_w == 0 {
1578 codes[0] = 0;
1579 lens[0] = 0;
1580 *num_codes = 0;
1581 return;
1582 }
b6b2edf6
KS
1583 loop {
1584 let mut tree = Tree::new();
1585 for (sym, &w) in stats.iter().enumerate() {
1586 tree.insert(Node{ sym: sym as u16, w: w as u16, idx0: 64000, idx1: 64000 });
f8cdb949 1587 }
b6b2edf6
KS
1588 tree.trim();
1589 tree.build();
f8cdb949 1590
b6b2edf6
KS
1591 for n in tree.nodes[..tree.nnodes].iter() {
1592 if n.sym != NODE_SYM {
1593 lens[n.sym as usize] = n.w as u8;
1594 }
1595 }
1596 if !lens.iter().any(|&x| x > max_bits) {
1597 break;
1598 } else {
1599 for w in stats.iter_mut() {
1600 *w = (*w + 1) >> 1;
1601 }
f8cdb949
KS
1602 }
1603 }
1604 lengths_to_codes16(lens, codes);
1605 let mut sz = codes.len();
1606 for &len in lens.iter().rev() {
1607 if len != 0 {
1608 break;
1609 }
1610 sz -= 1;
1611 }
1612 *num_codes = sz;
1613}
1614
1615fn lengths_to_codes16(lens: &[u8], codes: &mut [u16]) {
1616 let mut bits = [0u32; 32];
1617 let mut pfx = [0u32; 33];
1618 for len in lens.iter() {
1619 let len = *len as usize;
1620 bits[len] += 1;
1621 }
1622 bits[0] = 0;
1623 let mut code = 0;
1624 for i in 0..bits.len() {
1625 code = (code + bits[i]) << 1;
1626 pfx[i + 1] = code;
1627 }
1628
1629 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
1630 let len = *len as usize;
1631 if len != 0 {
1632 let bits = len as u8;
1633 *codes = reverse_bits(pfx[len], bits) as u16;
1634 pfx[len] += 1;
1635 } else {
1636 *codes = 0;
1637 }
1638 }
1639}
1640
1641trait LZParse {
1642 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>);
1643}
1644
1645struct NoParser {}
1646impl LZParse for NoParser {
1647 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1648 dst.reserve(src.len());
1649 for &b in src.iter() {
1650 dst.push(Token::from_literal(b));
1651 }
1652 dst.push(TOKEN_EOB);
1653 }
1654}
1655
1656fn check_match(src1: &[u8], src2: &[u8]) -> u16 {
1657 let mut len = 0;
1658 for (&a, &b) in src1.iter().zip(src2.iter()) {
1659 if a != b {
1660 break;
1661 }
1662 len += 1;
1663 }
1664 len
1665}
1666
1667const HASH_SIZE: usize = 4096;
1668const MAX_MATCH_LEN: usize = 258;
1669const WINDOW_SIZE: usize = 32768 - MAX_MATCH_LEN;
497aa09d 1670const WINDOW_MASK: usize = 32767;
f8cdb949
KS
1671
1672struct MatchFinder<'a> {
1673 src: &'a [u8],
1674 pos: usize,
1675 hstart: [usize; HASH_SIZE],
497aa09d 1676 hnext: [usize; WINDOW_MASK + 1],
f8cdb949
KS
1677}
1678
1679impl<'a> MatchFinder<'a> {
1680 fn new(src: &'a [u8]) -> Self {
497aa09d 1681 Self {
f8cdb949
KS
1682 src,
1683 pos: 0,
497aa09d
KS
1684 hstart: [src.len(); HASH_SIZE],
1685 hnext: [src.len(); WINDOW_MASK + 1],
1686 }
f8cdb949
KS
1687 }
1688 fn hash(src: &[u8]) -> usize {
1689 let _ = src[2];
1690 (((u16::from(src[0]) << 10) ^ (u16::from(src[1]) << 5) ^ u16::from(src[2])) & ((HASH_SIZE as u16) - 1)) as usize
1691 }
497aa09d
KS
1692 fn add_hash(&mut self, hash: usize) {
1693 self.hnext[self.pos & WINDOW_MASK] = self.hstart[hash];
1694 self.hstart[hash] = self.pos;
1695 self.hnext[(self.pos + 1) & WINDOW_MASK] = self.src.len();
f8cdb949
KS
1696 }
1697 fn find_match(&mut self) -> (u16, u16) {
1698 if self.pos == 0 || self.pos + 3 > self.src.len() {
1699 return (0, 0);
1700 }
497aa09d 1701 let key = Self::hash(&self.src[self.pos..]);
f8cdb949
KS
1702
1703 let mut best_pos = 0;
1704 let mut best_len = 0;
1705 let mut idx = self.hstart[key];
497aa09d
KS
1706 let search_end = self.pos.saturating_sub(WINDOW_SIZE);
1707 let mut tries = 0;
1708 while idx >= search_end && idx < self.pos {
1709 let cur_len = check_match(&self.src[self.pos..], &self.src[idx..]);
1710 if cur_len > best_len {
1711 best_len = cur_len;
1712 best_pos = self.pos - idx;
1713 if best_len >= (MAX_MATCH_LEN as u16) {
1714 return (best_pos as u16, MAX_MATCH_LEN as u16);
f8cdb949
KS
1715 }
1716 }
497aa09d
KS
1717 tries += 1;
1718 if tries > 16 {
1719 break;
1720 }
1721 idx = self.hnext[idx & WINDOW_MASK];
f8cdb949 1722 }
497aa09d 1723 self.add_hash(key);
f8cdb949
KS
1724 (best_pos as u16, best_len)
1725 }
1726 fn find_all_matches(&mut self, dists: &mut [u16; MAX_MATCH_LEN + 1]) {
1727 if self.pos == 0 || self.pos + 3 > self.src.len() {
1728 return;
1729 }
497aa09d 1730 let key = Self::hash(&self.src[self.pos..]);
f8cdb949 1731 let mut idx = self.hstart[key];
497aa09d
KS
1732 let search_end = self.pos.saturating_sub(WINDOW_SIZE);
1733 let mut tries = 0;
1734 while idx >= search_end && idx < self.pos {
1735 let cur_len = (check_match(&self.src[self.pos..], &self.src[idx..]) as usize).min(MAX_MATCH_LEN);
1736 if cur_len > 0 && dists[cur_len] == 0 {
1737 dists[cur_len] = (self.pos - idx) as u16;
1738 }
1739 idx = self.hnext[idx & WINDOW_MASK];
1740 tries += 1;
1741 if tries > 128 {
1742 break;
f8cdb949 1743 }
f8cdb949 1744 }
497aa09d 1745 self.add_hash(key);
f8cdb949
KS
1746 }
1747 fn advance(&mut self, num: usize) {
497aa09d
KS
1748 self.pos += 1;
1749 if num > 1 {
1750 for _ in 1..num {
1751 if self.pos + 3 <= self.src.len() {
1752 let key = Self::hash(&self.src[self.pos..]);
1753 self.add_hash(key);
f8cdb949 1754 }
497aa09d 1755 self.pos += 1;
f8cdb949
KS
1756 }
1757 }
1758 }
1759 fn get_sym(&self) -> u8 { self.src[self.pos] }
1760 fn is_end(&self) -> bool { self.pos >= self.src.len() }
1761}
1762
1763struct GreedyParser {}
1764impl LZParse for GreedyParser {
1765 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1766 dst.reserve(src.len());
1767
1768 let mut matcher = MatchFinder::new(src);
1769 while !matcher.is_end() {
1770 let (best_pos, best_len) = matcher.find_match();
1771
1772 if best_len >= 3 {
1773 dst.push(Token::from_match(best_pos, best_len));
1774 matcher.advance(best_len as usize);
1775 } else {
1776 dst.push(Token::from_literal(matcher.get_sym()));
1777 matcher.advance(1);
1778 }
1779 }
1780 dst.push(TOKEN_EOB);
1781 }
1782}
1783
1784struct LazyParser {}
1785impl LZParse for LazyParser {
1786 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1787 dst.reserve(src.len());
1788
1789 let mut matcher = MatchFinder::new(src);
1790 while !matcher.is_end() {
1791 let (best_pos, best_len) = matcher.find_match();
1792 if best_len >= 3 {
1793 let last_sym = matcher.get_sym();
1794 matcher.advance(1);
1795 if !matcher.is_end() {
1796 let (best_pos1, best_len1) = matcher.find_match();
1797 if best_len1 > best_len + 1 {
1798 dst.push(Token::from_literal(last_sym));
1799 dst.push(Token::from_match(best_pos1, best_len1));
1800 matcher.advance(best_len1 as usize);
1801 continue;
1802 }
1803 }
1804 dst.push(Token::from_match(best_pos, best_len));
1805 matcher.advance((best_len - 1) as usize);
1806 } else {
1807 dst.push(Token::from_literal(matcher.get_sym()));
1808 matcher.advance(1);
1809 }
1810 }
1811 dst.push(TOKEN_EOB);
1812 }
1813}
1814
1815#[derive(Clone,Copy)]
1816struct TNode {
1817 price: u32,
1818 dist: u16,
1819 link: usize,
1820}
1821
1822impl Default for TNode {
1823 fn default() -> Self {
1824 Self {
08e0a8bf 1825 price: u32::MAX,
f8cdb949
KS
1826 dist: 0,
1827 link: 0,
1828 }
1829 }
1830}
1831
1832struct OptimalParser {
1833 trellis: Vec<TNode>,
1834}
1835impl OptimalParser {
1836 fn new() -> Self { Self::default() }
1837 fn sym_price(_sym: u8) -> u32 { 9 }
1838 fn match_price(dist: u16, _len: u16) -> u32 {
1839 if dist <= 4 {
1840 9 + 5
1841 } else {
1842 let bits = 16 - (dist - 1).leading_zeros();
1843 9 + 3 + bits
1844 }
1845 }
1846}
1847impl Default for OptimalParser {
1848 fn default() -> Self {
1849 Self {
1850 trellis: Vec::with_capacity(WINDOW_SIZE),
1851 }
1852 }
1853}
1854impl LZParse for OptimalParser {
1855 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1856 if src.is_empty() {
1857 dst.push(TOKEN_EOB);
1858 return;
1859 }
1860 dst.reserve(src.len());
1861
b191eef3 1862 self.trellis.clear();
f8cdb949
KS
1863 self.trellis.reserve(src.len() + 1);
1864 for _ in 0..=src.len() {
1865 self.trellis.push(TNode::default());
1866 }
1867 self.trellis[0].price = 0;
1868
1869 let mut matcher = MatchFinder::new(src);
1870 for i in 0..self.trellis.len() - 1 {
1871 let mut dists = [0; MAX_MATCH_LEN + 1];
1872 matcher.find_all_matches(&mut dists);
1873
1874 let sym = matcher.get_sym();
1875 let lprice = Self::sym_price(sym) + self.trellis[i].price;
1876 if self.trellis[i + 1].price > lprice {
1877 self.trellis[i + 1].price = lprice;
1878 self.trellis[i + 1].link = i;
1879 }
1880 for (len, &dist) in dists.iter().enumerate() {
1881 if dist != 0 {
1882 let mprice = Self::match_price(dist, len as u16) + self.trellis[i].price;
1883 if self.trellis[i + len].price > mprice {
1884 self.trellis[i + len].price = mprice;
1885 self.trellis[i + len].link = i;
1886 self.trellis[i].dist = dist;
1887 }
1888 }
1889 }
1890
1891 matcher.advance(1);
1892 }
1893 let mut idx = self.trellis.len() - 1;
1894 let mut nidx = self.trellis[idx].link;
1895 while idx > 0 {
1896 let oidx = idx;
1897 idx = nidx;
1898 nidx = self.trellis[idx].link;
1899 self.trellis[idx].link = oidx;
1900 }
1901
1902 let mut idx = 0;
1903 while idx < self.trellis.len() - 1 {
1904 let len = self.trellis[idx].link - idx;
1905 if len == 1 {
1906 dst.push(Token::from_literal(src[idx]));
1907 } else {
1908 dst.push(Token::from_match(self.trellis[idx].dist, len as u16));
1909 }
1910 idx = self.trellis[idx].link;
1911 }
1912
1913 dst.push(TOKEN_EOB);
1914 }
1915}
1916
79fa5fbf 1917/// Deflate compression mode.
e6aaad5c 1918#[derive(Clone,Copy,Debug,PartialEq,Default)]
f8cdb949 1919pub enum DeflateMode {
79fa5fbf 1920 /// No compression.
f8cdb949 1921 NoCompr,
79fa5fbf 1922 /// Fast compression.
f8cdb949 1923 Fast,
79fa5fbf 1924 /// Still fast but better compression.
e6aaad5c 1925 #[default]
f8cdb949 1926 Better,
79fa5fbf 1927 /// Slow but the best compression.
f8cdb949
KS
1928 Best,
1929}
1930
8570a0b3 1931pub const DEFLATE_MODE_DESCRIPTION: &str = "Deflate compression level.";
79fa5fbf 1932/// Deflate option for no compression.
8570a0b3 1933pub const DEFLATE_MODE_NONE: &str = "none";
79fa5fbf 1934/// Deflate option for fast compression.
8570a0b3 1935pub const DEFLATE_MODE_FAST: &str = "fast";
79fa5fbf 1936/// Deflate option for better compression.
8570a0b3 1937pub const DEFLATE_MODE_BETTER: &str = "better";
79fa5fbf 1938/// Deflate option for best compression.
8570a0b3
KS
1939pub const DEFLATE_MODE_BEST: &str = "best";
1940
79fa5fbf 1941/// All possible option values for deflate compression.
8570a0b3
KS
1942pub const DEFLATE_OPTION_VALUES: NAOptionDefinitionType = NAOptionDefinitionType::String(Some(&[DEFLATE_MODE_NONE, DEFLATE_MODE_FAST, DEFLATE_MODE_BETTER, DEFLATE_MODE_BEST]));
1943
1944impl std::str::FromStr for DeflateMode {
1945 type Err = ();
1946
1947 fn from_str(s: &str) -> Result<Self, Self::Err> {
1948 match s {
1949 DEFLATE_MODE_NONE => Ok(DeflateMode::NoCompr),
1950 DEFLATE_MODE_FAST => Ok(DeflateMode::Fast),
1951 DEFLATE_MODE_BETTER => Ok(DeflateMode::Better),
1952 DEFLATE_MODE_BEST => Ok(DeflateMode::Best),
1953 _ => Err(()),
1954 }
1955 }
1956}
1957
1958impl ToString for DeflateMode {
1959 fn to_string(&self) -> String {
1960 match *self {
1961 DeflateMode::NoCompr => DEFLATE_MODE_NONE.to_string(),
1962 DeflateMode::Fast => DEFLATE_MODE_FAST.to_string(),
1963 DeflateMode::Better => DEFLATE_MODE_BETTER.to_string(),
1964 DeflateMode::Best => DEFLATE_MODE_BEST.to_string(),
1965 }
1966 }
1967}
1968
f8cdb949
KS
1969#[derive(Clone,Copy,Debug,PartialEq)]
1970enum Mode {
1971 Copy,
1972 Fixed,
1973 Dynamic,
1974}
1975
1976const MAX_BLOCK_SIZE: usize = 65535;
1977
79fa5fbf 1978/// Deflate stream compressor.
f8cdb949
KS
1979pub struct Deflate {
1980 mode: Mode,
1981 tokens: Vec<Token>,
1982 srcbuf: [u8; MAX_BLOCK_SIZE],
1983 ssize: usize,
1984 sum1: u32,
1985 sum2: u32,
1986 zlib_mode: bool,
9f838842 1987 parser: Box<dyn LZParse + Send>,
f8cdb949
KS
1988}
1989
1990impl Deflate {
79fa5fbf 1991 /// Creates a new instance of `Deflate`.
f8cdb949
KS
1992 pub fn new(mode: DeflateMode) -> Self {
1993 let (mode, parser) = match mode {
9f838842
KS
1994 DeflateMode::NoCompr => (Mode::Copy, Box::new(NoParser{}) as Box<dyn LZParse + Send>),
1995 DeflateMode::Fast => (Mode::Fixed, Box::new(GreedyParser{}) as Box<dyn LZParse + Send>),
1996 DeflateMode::Better => (Mode::Dynamic, Box::new(LazyParser{}) as Box<dyn LZParse + Send>),
1997 DeflateMode::Best => (Mode::Dynamic, Box::new(OptimalParser::new()) as Box<dyn LZParse + Send>),
f8cdb949
KS
1998 };
1999 Self {
2000 mode, parser,
2001 tokens: Vec::with_capacity(MAX_BLOCK_SIZE),
2002 srcbuf: [0; MAX_BLOCK_SIZE],
2003 ssize: 0,
2004 sum1: 1,
2005 sum2: 0,
2006 zlib_mode: false,
2007 }
2008 }
79fa5fbf 2009 /// Writes zlib stream header.
f8cdb949
KS
2010 pub fn write_zlib_header(&mut self, wr: &mut DeflateWriter) {
2011 wr.write(8, 4);
2012 wr.write(7, 4);
2013 let level = match self.mode {
2014 Mode::Copy => 0x01,
2015 Mode::Fixed => 0x5E,
2016 Mode::Dynamic => 0x9C,
2017 // 0xDA for the strongest one
2018 };
2019 wr.write(level, 8);
2020 self.zlib_mode = true;
2021 }
2022 fn write_zlib_footer(&self, wr: &mut DeflateWriter) {
2023 wr.align();
2024 wr.write((self.sum2 >> 8) as u16, 8);
2025 wr.write((self.sum2 & 0xFF) as u16, 8);
2026 wr.write((self.sum1 >> 8) as u16, 8);
2027 wr.write((self.sum1 & 0xFF) as u16, 8);
2028 }
79fa5fbf
KS
2029 /// Queues data for compression.
2030 ///
2031 /// The data might be not actually compressed until [`compress_end`] is called.
2032 ///
2033 /// [`compress_end`]: ./struct.Deflate.html#method.compress_end
f8cdb949
KS
2034 pub fn compress(&mut self, src: &[u8], wr: &mut DeflateWriter) {
2035 let mut src = src;
2036 while !src.is_empty() {
2037 let clen = src.len().min(MAX_BLOCK_SIZE - self.ssize);
2038 let (head, tail) = src.split_at(clen);
2039 src = tail;
2040 self.srcbuf[self.ssize..][..clen].copy_from_slice(head);
2041 self.ssize += clen;
2042 if self.ssize == MAX_BLOCK_SIZE {
2043 self.do_block(wr, false);
2044 }
2045 }
2046 }
79fa5fbf
KS
2047 /// Tells the encoder to finish data compression.
2048 ///
2049 /// Complete data will be output after this call.
f8cdb949
KS
2050 pub fn compress_end(&mut self, wr: &mut DeflateWriter) {
2051 if self.ssize > 0 {
2052 self.do_block(wr, true);
2053 } else {
2054 wr.write(1, 1);
2055 wr.write(1, 2);
2056 wr.write(0, 7); //static EOF sym
2057 }
2058 if self.zlib_mode {
2059 self.write_zlib_footer(wr);
2060 }
2061 }
79fa5fbf 2062 /// Tells the encoder to compress the data it received and flush it.
80da4ff8
KS
2063 pub fn compress_flush(&mut self, wr: &mut DeflateWriter) {
2064 if self.ssize > 0 {
2065 self.do_block(wr, false);
2066 }
2067 if (wr.bits & 7) != 0 {
2068 // write zero-length copy block for byte-alignment
2069 wr.write(0, 1);
2070 wr.write(0, 2);
2071 wr.align();
2072 wr.write(0, 16);
2073 wr.write(0xFFFF, 16);
2074 }
2075 }
f8cdb949
KS
2076 fn do_block(&mut self, wr: &mut DeflateWriter, final_block: bool) {
2077 const CRC_BASE: u32 = 65521;
2078 for &b in self.srcbuf[..self.ssize].iter() {
2079 self.sum1 += u32::from(b);
2080 if self.sum1 >= CRC_BASE {
2081 self.sum1 -= CRC_BASE;
2082 }
2083 self.sum2 += self.sum1;
2084 if self.sum2 >= CRC_BASE {
2085 self.sum2 -= CRC_BASE;
2086 }
2087 }
2088 match self.mode {
2089 Mode::Copy => {
2090 wr.write(final_block as u16, 1);
2091 wr.write(0, 2);
2092 wr.align();
2093 wr.write(self.ssize as u16, 16);
2094 wr.write(!self.ssize as u16, 16);
2095 for &b in self.srcbuf[..self.ssize].iter() {
2096 wr.write(u16::from(b), 8);
2097 }
2098 },
2099 Mode::Fixed => {
2100 wr.write(final_block as u16, 1);
2101 wr.write(1, 2);
b191eef3 2102 self.tokens.clear();
f8cdb949
KS
2103 self.parser.parse(&self.srcbuf[..self.ssize], &mut self.tokens);
2104 let mut codes = CodeHuff::new(true);
2105 codes.make_codes(&self.tokens);
2106 let mut dists = DistHuff::new(true);
2107 dists.make_codes(&self.tokens);
2108 wr.write_tokens(&self.tokens, &codes, &dists);
2109 },
2110 Mode::Dynamic => {
2111 wr.write(final_block as u16, 1);
2112 wr.write(2, 2);
b191eef3 2113 self.tokens.clear();
f8cdb949
KS
2114 self.parser.parse(&self.srcbuf[..self.ssize], &mut self.tokens);
2115 let mut codes = CodeHuff::new(false);
2116 codes.make_codes(&self.tokens);
2117 let mut dists = DistHuff::new(false);
2118 dists.make_codes(&self.tokens);
2119 wr.write((codes.num_codes - 257) as u16, 5);
2120 wr.write((dists.num_codes - 1) as u16, 5);
2121 wr.write_codes(&codes, &dists);
2122 wr.write_tokens(&self.tokens, &codes, &dists);
2123 },
2124 }
2125 self.ssize = 0;
2126 }
2127}
2128
0443d0c5
KS
2129#[cfg(test)]
2130mod test {
2131 use super::*;
2132
2133 #[test]
2134 fn test_inflate1() {
2135 const TEST_DATA: &[u8] = &[
2136 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
2137 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
2138 const TEST_REF: &[u8] = b"Hello, world!";
2139 let mut dst_buf = [0u8; 13];
2140 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
2141 assert_eq!(len, 13);
2142 for i in 0..len {
2143 assert_eq!(dst_buf[i], TEST_REF[i]);
2144 }
2145 }
2146 #[test]
2147 fn test_inflate2() {
2148 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
2149 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
2150 let mut dst_buf = [0u8; 39];
2151
2152 let mut inflate = Inflate::new();
2153 let mut output_chunk = [0u8; 7];
2154 let mut output_pos = 0;
2155 for input in TEST_DATA3.chunks(3) {
2156 let mut repeat = false;
2157 loop {
2158 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
2159 match ret {
2160 Ok(len) => {
2161 for i in 0..len {
2162 dst_buf[output_pos + i] = output_chunk[i];
2163 }
2164 output_pos += len;
2165 break;
2166 },
2167 Err(DecompressError::ShortData) => {
2168 break;
2169 },
2170 Err(DecompressError::OutputFull) => {
2171 repeat = true;
2172 for i in 0..output_chunk.len() {
2173 dst_buf[output_pos + i] = output_chunk[i];
2174 }
2175 output_pos += output_chunk.len();
2176 },
2177 _ => {
2178 panic!("decompress error {:?}", ret.err().unwrap());
2179 },
2180 }
2181 }
2182 }
2183
2184 assert_eq!(output_pos, dst_buf.len());
2185 for i in 0..output_pos {
2186 assert_eq!(dst_buf[i], TEST_REF3[i]);
2187 }
2188 }
2189 #[test]
2190 fn test_inflate3() {
2191 const TEST_DATA: &[u8] = &[
2192 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
2193 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
2194 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
2195 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
2196 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
2197 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
2198 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
2199 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
2200 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
2201 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
2202 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
2203 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
2204 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
2205 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
2206 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
2207 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
2208 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
2209 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
2210 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
2211 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
2212 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
2213 ];
2214
2215 let mut mr = MemoryReader::new_read(TEST_DATA);
2216 let mut br = ByteReader::new(&mut mr);
2217 let _dst_buf = gzip_decode(&mut br, false).unwrap();
2218
2219// println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
2220 }
f8cdb949
KS
2221 #[test]
2222 fn test_deflate_crc() {
2223 let output = Vec::with_capacity(20);
2224 let mut writer = DeflateWriter::new(output);
2225 let mut compr = Deflate::new(DeflateMode::NoCompr);
2226 compr.write_zlib_header(&mut writer);
2227 compr.compress(b"Hello, world!", &mut writer);
2228 compr.compress_end(&mut writer);
2229 let output = writer.end();
2230 assert_eq!(output.as_slice(), b"\x78\x01\x01\x0D\x00\xF2\xFFHello, world!\x20\x5E\x04\x8A");
2231 }
2232 fn deflate_test(mode: DeflateMode) {
2233 const SRC: &[u8] =
2234b"The first day of Christmas,
2235My true love sent to me
2236A partridge in a pear tree.
2237
2238The second day of Christmas,
2239My true love sent to me
2240Two turtle doves, and
2241A partridge in a pear tree.
2242
2243The third day of Christmas,
2244My true love sent to me
2245Three French hens,
2246Two turtle doves, and
2247A partridge in a pear tree.
2248
2249The fourth day of Christmas,
2250My true love sent to me
2251Four colly birds,
2252Three French hens,
2253Two turtle doves, and
2254A partridge in a pear tree.
2255
2256The fifth day of Christmas,
2257My true love sent to me
2258Five gold rings,
2259Four colly birds,
2260Three French hens,
2261Two turtle doves, and
2262A partridge in a pear tree.";
2263 let output = Vec::with_capacity(SRC.len() + 16);
2264 let mut writer = DeflateWriter::new(output);
2265 let mut compr = Deflate::new(mode);
2266 compr.write_zlib_header(&mut writer);
2267 compr.compress(SRC, &mut writer);
2268 compr.compress_end(&mut writer);
2269 let output = writer.end();
2270 let mut uncompr = vec![0u8; SRC.len()];
2271 Inflate::uncompress(&output, &mut uncompr).unwrap();
2272 assert_eq!(SRC, uncompr.as_slice());
2273 }
2274 #[test]
2275 fn test_deflate_fast() {
2276 deflate_test(DeflateMode::Fast);
2277 }
2278 #[test]
2279 fn test_deflate_better() {
2280 deflate_test(DeflateMode::Better);
2281 }
2282 #[test]
2283 fn test_deflate_best() {
2284 deflate_test(DeflateMode::Best);
2285 }
0443d0c5 2286}