add Acorn Moving Blocks HQ decoder
[nihav.git] / nihav-core / src / compr / deflate.rs
CommitLineData
0443d0c5
KS
1//! Deflate format (RFC 1951) support.
2//!
f8cdb949 3//! This module provides functionality for decompressing raw deflated streams via [`Inflate`] and gzip files (RFC 1952) via [`gzip_decode`] and compressing raw or zlib streams via [`Deflate`].
0443d0c5 4//!
f8cdb949 5//! [`Deflate`]: ./struct.Deflate.html
0443d0c5
KS
6//! [`Inflate`]: ./struct.Inflate.html
7//! [`gzip_decode`]: ./fn.gzip_decode.html
8//!
9//! # Examples
10//!
11//! Decompressing full input buffer into sufficiently large output buffer:
12//! ```
13//! # use nihav_core::compr::DecompressError;
14//! use nihav_core::compr::deflate::Inflate;
15//!
16//! # fn decompress(input: &[u8]) -> Result<(), DecompressError> {
17//! # let mut output_buffer = [0u8; 16];
18//! let output_length = Inflate::uncompress(input, &mut output_buffer)?;
19//! # Ok(())
20//! # }
21//! ```
22//!
23//! Decompressing input chunks into portions of output:
24//! ```
25//! use nihav_core::compr::DecompressError;
26//! use nihav_core::compr::deflate::Inflate;
27//!
28//! # fn decompress(input_data: &[u8]) -> Result<(), DecompressError> {
29//! let mut inflate = Inflate::new();
30//! let mut dst_buf: Vec<u8> = Vec::new();
31//! let mut output_chunk = [0u8; 1024];
32//! for src in input_data.chunks(512) {
33//! let mut repeat = false;
34//! loop {
35//! let ret = inflate.decompress_data(src, &mut output_chunk, repeat);
36//! match ret {
37//! Ok(len) => { // we got a buffer decoded successfully to the end
38//! dst_buf.extend_from_slice(&output_chunk[..len]);
39//! break;
40//! },
41//! Err(DecompressError::ShortData) => { // this block of data was fully read
42//! break;
43//! },
44//! Err(DecompressError::OutputFull) => {
45//! // the output buffer is full, flush it and continue decoding the same block
46//! repeat = true;
47//! dst_buf.extend_from_slice(&output_chunk);
48//! },
49//! Err(err) => {
50//! return Err(err);
51//! },
52//! }
53//! }
54//! }
55//! # Ok(())
56//! # }
57//! ```
f8cdb949
KS
58//!
59//! Compressing input buffer into zlib stream:
60//! ```
61//! use nihav_core::compr::deflate::{Deflate, DeflateMode, DeflateWriter};
62//!
63//! # fn compress(input: &[u8]) {
64//! let output = Vec::with_capacity(input.len() * 65540 / 65535 + 6);
65//! let mut writer = DeflateWriter::new(output);
66//! let mut compr = Deflate::new(DeflateMode::Fast);
67//! compr.write_zlib_header(&mut writer);
68//! compr.compress(input, &mut writer);
69//! compr.compress_end(&mut writer);
70//! let output = writer.end();
71//! # }
72//! ```
0443d0c5 73
8570a0b3 74use crate::options::NAOptionDefinitionType;
0443d0c5
KS
75use crate::io::byteio::*;
76use crate::io::bitreader::*;
77use crate::io::codebook::*;
78use super::*;
79
80const NUM_LITERALS: usize = 287;
81const NUM_DISTS: usize = 32;
82
83struct FixedLenCodeReader {}
84
85impl CodebookDescReader<u16> for FixedLenCodeReader {
86 fn bits(&mut self, idx: usize) -> u8 {
87 if idx < 144 { 8 }
88 else if idx < 256 { 9 }
89 else if idx < 280 { 7 }
90 else { 8 }
91 }
b36f412c 92 #[allow(clippy::identity_op)]
0443d0c5
KS
93 fn code(&mut self, idx: usize) -> u32 {
94 let base = idx as u32;
95 let bits = self.bits(idx);
96 if idx < 144 { reverse_bits(base + 0x30, bits) }
97 else if idx < 256 { reverse_bits(base + 0x190 - 144, bits) }
98 else if idx < 280 { reverse_bits(base + 0x000 - 256, bits) }
99 else { reverse_bits(base + 0xC0 - 280, bits) }
100 }
101 fn sym (&mut self, idx: usize) -> u16 { idx as u16 }
102 fn len(&mut self) -> usize { NUM_LITERALS + 1 }
103}
104
105#[derive(Clone,Copy,Default)]
106struct BitReaderState {
107 pos: usize,
108 bitbuf: u32,
109 bits: u8,
110}
111
112struct CurrentSource<'a> {
113 src: &'a [u8],
114 br: BitReaderState,
115}
116
117impl<'a> CurrentSource<'a> {
118 fn new(src: &'a [u8], br: BitReaderState) -> Self {
119 let mut newsrc = Self { src, br };
120 newsrc.br.pos = 0;
121 newsrc.refill();
122 newsrc
123 }
124 fn reinit(src: &'a [u8], br: BitReaderState) -> Self {
125 let mut newsrc = Self { src, br };
126 newsrc.refill();
127 newsrc
128 }
129 fn refill(&mut self) {
130 while (self.br.bits <= 24) && (self.br.pos < self.src.len()) {
131 self.br.bitbuf |= u32::from(self.src[self.br.pos]) << self.br.bits;
132 self.br.bits += 8;
133 self.br.pos += 1;
134 }
135 }
136 fn skip_cache(&mut self, nbits: u8) {
137 self.br.bitbuf >>= nbits;
138 self.br.bits -= nbits;
139 }
140 fn read(&mut self, nbits: u8) -> BitReaderResult<u32> {
141 if nbits == 0 { return Ok(0); }
142 if nbits > 16 { return Err(BitReaderError::TooManyBitsRequested); }
143 if self.br.bits < nbits {
144 self.refill();
145 if self.br.bits < nbits { return Err(BitReaderError::BitstreamEnd); }
146 }
147 let ret = self.br.bitbuf & ((1 << nbits) - 1);
148 self.skip_cache(nbits);
149 Ok(ret)
150 }
151 fn read_bool(&mut self) -> BitReaderResult<bool> {
152 if self.br.bits == 0 {
153 self.refill();
154 if self.br.bits == 0 { return Err(BitReaderError::BitstreamEnd); }
155 }
156 let ret = (self.br.bitbuf & 1) != 0;
157 self.skip_cache(1);
158 Ok(ret)
159 }
160 fn peek(&mut self, nbits: u8) -> u32 {
161 if nbits == 0 || nbits > 16 { return 0; }
162 if self.br.bits < nbits {
163 self.refill();
164 }
165 self.br.bitbuf & ((1 << nbits) - 1)
166 }
167 fn skip(&mut self, nbits: u32) -> BitReaderResult<()> {
168 if u32::from(self.br.bits) >= nbits {
169 self.skip_cache(nbits as u8);
170 } else {
171 unreachable!();
172 }
173 Ok(())
174 }
7bf82b33
KS
175 fn skip_bytes(&mut self, nbytes: usize) -> BitReaderResult<()> {
176 self.align();
177 let cached = usize::from(self.br.bits / 8);
178 if nbytes <= cached {
179 self.skip((nbytes as u32) * 8)?;
180 } else {
181 self.skip((cached as u32) * 8)?;
182 self.br.bits = 0;
183 self.br.bitbuf = 0;
184 self.br.pos += nbytes - cached;
185 if self.br.pos > self.src.len() {
186 return Err(BitReaderError::BitstreamEnd);
187 }
188 self.refill();
189 }
190 Ok(())
191 }
0443d0c5
KS
192 fn align(&mut self) {
193 let b = self.br.bits & 7;
194 if b != 0 {
11f1d51e 195 self.skip_cache(b);
0443d0c5
KS
196 }
197 }
198 fn left(&self) -> isize {
199 ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize)
200 }
7bf82b33
KS
201 fn tell(&self) -> usize {
202 self.br.pos - usize::from(self.br.bits / 8)
203 }
0443d0c5
KS
204}
205
206impl<'a, S: Copy> CodebookReader<S> for CurrentSource<'a> {
207 fn read_cb(&mut self, cb: &Codebook<S>) -> CodebookResult<S> {
208 let mut esc = true;
209 let mut idx = 0;
210 let mut lut_bits = cb.lut_bits;
211 let orig_br = self.br;
212 while esc {
e6aaad5c 213 let lut_idx = (self.peek(lut_bits) as usize) + idx;
0443d0c5
KS
214 if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); }
215 let bits = cb.table[lut_idx] & 0x7F;
216 esc = (cb.table[lut_idx] & 0x80) != 0;
217 idx = (cb.table[lut_idx] >> 8) as usize;
218 let skip_bits = if esc { u32::from(lut_bits) } else { bits };
219 if (skip_bits as isize) > self.left() {
220 self.br = orig_br;
221 self.refill();
222 return Err(CodebookError::MemoryError);
223 }
e6aaad5c 224 self.skip(skip_bits).unwrap();
0443d0c5
KS
225 lut_bits = bits as u8;
226 }
227 Ok(cb.syms[idx])
228 }
229}
230
231enum InflateState {
232 Start,
233 BlockStart,
234 BlockMode,
235 StaticBlockLen,
236 StaticBlockInvLen(u32),
237 StaticBlockCopy(usize),
238 FixedBlock,
239 FixedBlockLengthExt(usize, u8),
240 FixedBlockDist(usize),
241 FixedBlockDistExt(usize, usize, u8),
242 FixedBlockCopy(usize, usize),
243 FixedBlockLiteral(u8),
244 DynBlockHlit,
245 DynBlockHdist,
246 DynBlockHclen,
247 DynLengths(usize),
248 DynCodeLengths,
249 DynCodeLengthsAdd(usize),
250 DynBlock,
251 DynBlockLengthExt(usize, u8),
252 DynBlockDist(usize),
253 DynBlockDistExt(usize, usize, u8),
254 DynCopy(usize, usize),
255 DynBlockLiteral(u8),
256 End,
257}
258
79fa5fbf 259/// The decompressor for deflated streams (RFC 1951).
0443d0c5
KS
260pub struct Inflate {
261 br: BitReaderState,
262 fix_len_cb: Codebook<u16>,
263
264 buf: [u8; 65536],
265 bpos: usize,
266 output_idx: usize,
37d96dbb 267 full_pos: usize,
0443d0c5
KS
268
269 state: InflateState,
270 final_block: bool,
271 hlit: usize,
272 hdist: usize,
273 dyn_len_cb: Option<Codebook<u32>>,
274 dyn_lit_cb: Option<Codebook<u32>>,
275 dyn_dist_cb: Option<Codebook<u32>>,
276 len_lengths: [u8; 19],
277 all_lengths: [u8; NUM_LITERALS + NUM_DISTS],
278 cur_len_idx: usize,
279}
280
281const LENGTH_ADD_BITS: [u8; 29] = [
282 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
283 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
284 4, 4, 4, 4, 5, 5, 5, 5, 0
285];
286const LENGTH_BASE: [u16; 29] = [
287 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
288 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
289 67, 83, 99, 115, 131, 163, 195, 227, 258
290];
291const DIST_ADD_BITS: [u8; 30] = [
292 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
293 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
294 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
295];
296const DIST_BASE: [u16; 30] = [
297 1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
298 33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
299 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
300];
301const LEN_RECODE: [usize; 19] = [
302 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
303];
304const REPEAT_BITS: [u8; 3] = [ 2, 3, 7 ];
305const REPEAT_BASE: [u8; 3] = [ 3, 3, 11 ];
306
307macro_rules! read_bits {
308 ($self: expr, $csrc: expr, $bits: expr) => ({
309 if $csrc.left() < $bits as isize {
310 $self.br = $csrc.br;
311 return Err(DecompressError::ShortData);
312 }
313 $csrc.read($bits).unwrap()
314 })
315}
316
317macro_rules! read_cb {
318 ($self: expr, $csrc: expr, $cb: expr) => ({
319 let ret = $csrc.read_cb($cb);
320 if let Err(CodebookError::MemoryError) = ret {
321 $self.br = $csrc.br;
322 return Err(DecompressError::ShortData);
323 }
324 match ret {
325 Ok(val) => val,
326 Err(_) => {
327 $self.state = InflateState::End;
328 return Err(DecompressError::InvalidData);
329 },
330 }
331 })
332}
333
334impl Inflate {
79fa5fbf 335 /// Creates a new instance of `Inflate` struct.
0443d0c5
KS
336 pub fn new() -> Self {
337 let mut cr = FixedLenCodeReader {};
338 let fix_len_cb = Codebook::new(&mut cr, CodebookMode::LSB).unwrap();
339 Self {
340 br: BitReaderState::default(),
341 fix_len_cb,
342
343 buf: [0; 65536],
344 bpos: 0,
345 output_idx: 0,
37d96dbb 346 full_pos: 0,
0443d0c5
KS
347
348 state: InflateState::Start,
349 final_block: false,
350 dyn_len_cb: None,
351 dyn_lit_cb: None,
352 dyn_dist_cb: None,
353 hlit: 0,
354 hdist: 0,
355 len_lengths: [0; 19],
356 all_lengths: [0; NUM_LITERALS + NUM_DISTS],
357 cur_len_idx: 0,
358 }
359 }
360 fn put_literal(&mut self, val: u8) {
361 self.buf[self.bpos] = val;
c96ad969 362 self.bpos = (self.bpos + 1) & (self.buf.len() - 1);
37d96dbb 363 self.full_pos += 1;
0443d0c5
KS
364 }
365 fn lz_copy(&mut self, offset: usize, len: usize, dst: &mut [u8]) -> DecompressResult<()> {
366 let mask = self.buf.len() - 1;
37d96dbb 367 if offset > self.full_pos {
0443d0c5
KS
368 return Err(DecompressError::InvalidData);
369 }
c96ad969 370 let cstart = (self.bpos.wrapping_sub(offset)) & mask;
0443d0c5
KS
371 for i in 0..len {
372 self.buf[(self.bpos + i) & mask] = self.buf[(cstart + i) & mask];
373 dst[i] = self.buf[(cstart + i) & mask];
374 }
c96ad969 375 self.bpos = (self.bpos + len) & mask;
37d96dbb 376 self.full_pos += len;
0443d0c5
KS
377 Ok(())
378 }
79fa5fbf 379 /// Sets custom history for decoding an update for already decoded data.
bc23de6b
KS
380 pub fn set_dict(&mut self, dict: &[u8]) {
381 let len = dict.len().min(self.buf.len());
382 let start = dict.len() - len;
383 self.buf[..len].copy_from_slice(&dict[start..]);
384 self.bpos = len;
385 self.full_pos = len;
386 }
79fa5fbf 387 /// Reports whether decoder has finished decoding the input.
0443d0c5 388 pub fn is_finished(&self) -> bool {
6f263099 389 matches!(self.state, InflateState::End)
0443d0c5 390 }
79fa5fbf 391 /// Reports the current amount of bytes output into the destination buffer after the last run.
0443d0c5 392 pub fn get_current_output_size(&self) -> usize { self.output_idx }
79fa5fbf 393 /// Reports the total amount of bytes decoded so far.
0443d0c5 394 pub fn get_total_output_size(&self) -> usize { self.bpos }
79fa5fbf
KS
395 /// Tries to decompress input data and write it to the output buffer.
396 ///
397 /// Since the decompressor can work with arbitrary input and output chunks its return value may have several meanings:
398 /// * `Ok(len)` means the stream has been fully decoded and then number of bytes output into the destination buffer is returned.
399 /// * [`DecompressError::ShortData`] means the input stream has been fully read but more data is needed.
400 /// * [`DecompressError::OutputFull`] means the output buffer is full and should be flushed. Then decoding should continue on the same input block with `continue_block` parameter set to `true`.
401 ///
402 /// [`DecompressError::ShortData`]: ../enum.DecompressError.html#variant.ShortData
403 /// [`DecompressError::OutputFull`]: ../enum.DecompressError.html#variant.OutputFull
0443d0c5 404 pub fn decompress_data(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool) -> DecompressResult<usize> {
de161d26
KS
405 self.decompress_data_internal(src, dst, continue_block, false)
406 }
79fa5fbf 407 /// Tries to decompress whole input chunk to the output buffer.
de161d26
KS
408 pub fn decompress_block(&mut self, src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
409 self.decompress_data_internal(src, dst, false, true)
410 }
411 #[allow(clippy::comparison_chain)]
412 fn decompress_data_internal(&mut self, src: &[u8], dst: &mut [u8], continue_block: bool, do_one_block: bool) -> DecompressResult<usize> {
b36f412c 413 if src.is_empty() || dst.is_empty() {
0443d0c5
KS
414 return Err(DecompressError::InvalidArgument);
415 }
416 let mut csrc = if !continue_block {
417 CurrentSource::new(src, self.br)
418 } else {
419 self.output_idx = 0;
420 CurrentSource::reinit(src, self.br)
421 };
de161d26
KS
422 if do_one_block {
423 self.output_idx = 0;
424 }
0c9b706a
KS
425 // check for zlib stream header
426 if let (&InflateState::Start, true) = (&self.state, src.len() > 2) {
427 let cm = src[0] & 0xF;
428 let cinfo = src[0] >> 4;
429 let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]);
430 if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 {
431 csrc.skip(16).unwrap();
432 }
433 }
0443d0c5
KS
434 'main: loop {
435 match self.state {
436 InflateState::Start | InflateState::BlockStart => {
437 if csrc.left() == 0 {
de161d26
KS
438 if do_one_block {
439 return Ok(self.output_idx);
440 }
0443d0c5
KS
441 self.br = csrc.br;
442 return Err(DecompressError::ShortData);
443 }
444 self.final_block = csrc.read_bool().unwrap();
445 self.state = InflateState::BlockMode;
446 },
447 InflateState::BlockMode => {
448 let bmode = read_bits!(self, csrc, 2);
449 match bmode {
450 0 => {
451 csrc.align();
452 self.state = InflateState::StaticBlockLen;
453 },
454 1 => { self.state = InflateState::FixedBlock; },
455 2 => { self.state = InflateState::DynBlockHlit; },
456 _ => {
457 self.state = InflateState::End;
458 return Err(DecompressError::InvalidHeader);
459 },
460 };
461 },
462 InflateState::StaticBlockLen => {
463 let len = read_bits!(self, csrc, 16);
464 self.state = InflateState::StaticBlockInvLen(len);
465 },
466 InflateState::StaticBlockInvLen(len) => {
467 let inv_len = read_bits!(self, csrc, 16);
6130c973 468 if (len ^ inv_len) != 0xFFFF {
0443d0c5
KS
469 self.state = InflateState::End;
470 return Err(DecompressError::InvalidHeader);
471 }
472 self.state = InflateState::StaticBlockCopy(len as usize);
473 },
474 InflateState::StaticBlockCopy(len) => {
475 for i in 0..len {
476 if csrc.left() < 8 {
477 self.br = csrc.br;
478 self.state = InflateState::StaticBlockCopy(len - i);
479 return Err(DecompressError::ShortData);
480 }
481 let val = csrc.read(8).unwrap() as u8;
482 self.put_literal(val);
483 }
484 self.state = InflateState::BlockStart;
485 }
486 InflateState::FixedBlock => {
487 let val = read_cb!(self, csrc, &self.fix_len_cb);
488 if val < 256 {
489 if self.output_idx >= dst.len() {
490 self.br = csrc.br;
491 self.state = InflateState::FixedBlockLiteral(val as u8);
492 return Err(DecompressError::OutputFull);
493 }
494 self.put_literal(val as u8);
495 dst[self.output_idx] = val as u8;
496 self.output_idx += 1;
497 } else if val == 256 {
498 if self.final_block {
499 self.state = InflateState::End;
500 return Ok(self.output_idx);
501 } else {
502 self.state = InflateState::BlockStart;
503 }
504 } else {
505 let len_idx = (val - 257) as usize;
506 if len_idx >= LENGTH_BASE.len() {
507 self.state = InflateState::End;
508 return Err(DecompressError::InvalidData);
509 }
510 let len_bits = LENGTH_ADD_BITS[len_idx];
511 let add_base = LENGTH_BASE[len_idx] as usize;
512 if len_bits > 0 {
513 self.state = InflateState::FixedBlockLengthExt(add_base, len_bits);
514 } else {
515 self.state = InflateState::FixedBlockDist(add_base);
516 }
517 }
518 },
519 InflateState::FixedBlockLiteral(sym) => {
520 if self.output_idx >= dst.len() {
521 self.br = csrc.br;
522 return Err(DecompressError::OutputFull);
523 }
524 self.put_literal(sym);
525 dst[self.output_idx] = sym;
526 self.output_idx += 1;
527 self.state = InflateState::FixedBlock;
528 },
529 InflateState::FixedBlockLengthExt(base, bits) => {
530 let add = read_bits!(self, csrc, bits) as usize;
531 self.state = InflateState::FixedBlockDist(base + add);
532 },
533 InflateState::FixedBlockDist(length) => {
534 let dist_idx = reverse_bits(read_bits!(self, csrc, 5), 5) as usize;
535 if dist_idx >= DIST_BASE.len() {
536 self.state = InflateState::End;
537 return Err(DecompressError::InvalidData);
538 }
539 let dist_bits = DIST_ADD_BITS[dist_idx];
540 let dist_base = DIST_BASE[dist_idx] as usize;
541 if dist_bits == 0 {
542 self.state = InflateState::FixedBlockCopy(length, dist_base);
543 } else {
544 self.state = InflateState::FixedBlockDistExt(length, dist_base, dist_bits);
545 }
546 },
547 InflateState::FixedBlockDistExt(length, base, bits) => {
548 let add = read_bits!(self, csrc, bits) as usize;
549 self.state = InflateState::FixedBlockCopy(length, base + add);
550 },
551 InflateState::FixedBlockCopy(length, dist) => {
552 if self.output_idx + length > dst.len() {
553 let copy_size = dst.len() - self.output_idx;
554 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
555 if ret.is_err() {
556 self.state = InflateState::End;
557 return Err(DecompressError::InvalidData);
558 }
559 self.output_idx += copy_size;
560 self.br = csrc.br;
561 self.state = InflateState::FixedBlockCopy(length - copy_size, dist);
562 return Err(DecompressError::OutputFull);
563 }
564 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
565 if ret.is_err() {
566 self.state = InflateState::End;
567 return Err(DecompressError::InvalidData);
568 }
569 self.output_idx += length;
570 self.state = InflateState::FixedBlock;
571 }
572 InflateState::DynBlockHlit => {
573 self.hlit = (read_bits!(self, csrc, 5) as usize) + 257;
574 if self.hlit >= 287 {
575 self.state = InflateState::End;
576 return Err(DecompressError::InvalidHeader);
577 }
578 self.state = InflateState::DynBlockHdist;
579 }
580 InflateState::DynBlockHdist => {
581 self.hdist = (read_bits!(self, csrc, 5) as usize) + 1;
582 self.state = InflateState::DynBlockHclen;
583 },
584 InflateState::DynBlockHclen => {
585 let hclen = (read_bits!(self, csrc, 4) as usize) + 4;
586 self.cur_len_idx = 0;
587 self.len_lengths = [0; 19];
588 self.all_lengths = [0; NUM_LITERALS + NUM_DISTS];
589 self.state = InflateState::DynLengths(hclen);
590 },
591 InflateState::DynLengths(len) => {
592 for i in 0..len {
593 if csrc.left() < 3 {
594 self.br = csrc.br;
595 self.state = InflateState::DynLengths(len - i);
596 return Err(DecompressError::ShortData);
597 }
598 self.len_lengths[LEN_RECODE[self.cur_len_idx]] = csrc.read(3).unwrap() as u8;
599 self.cur_len_idx += 1;
600 }
601 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
602 lengths_to_codes(&self.len_lengths, &mut len_codes)?;
603 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
604 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
605 if ret.is_err() {
606 self.state = InflateState::End;
607 return Err(DecompressError::InvalidHeader);
608 }
609 self.dyn_len_cb = Some(ret.unwrap());
610 self.cur_len_idx = 0;
611 self.state = InflateState::DynCodeLengths;
612 },
613 InflateState::DynCodeLengths => {
614 if let Some(ref len_cb) = self.dyn_len_cb {
615 while self.cur_len_idx < self.hlit + self.hdist {
616 let ret = csrc.read_cb(len_cb);
617 let val = match ret {
618 Ok(val) => val,
619 Err(CodebookError::MemoryError) => {
620 self.br = csrc.br;
621 return Err(DecompressError::ShortData);
622 },
623 Err(_) => {
624 self.state = InflateState::End;
625 return Err(DecompressError::InvalidHeader);
626 },
627 };
628 if val < 16 {
629 self.all_lengths[self.cur_len_idx] = val as u8;
630 self.cur_len_idx += 1;
631 } else {
632 let idx = (val as usize) - 16;
633 if idx > 2 {
634 self.state = InflateState::End;
635 return Err(DecompressError::InvalidHeader);
636 }
637 self.state = InflateState::DynCodeLengthsAdd(idx);
638 continue 'main;
639 }
640 }
641 let (lit_lengths, dist_lengths) = self.all_lengths.split_at(self.hlit);
642
643 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
6f263099 644 lengths_to_codes(lit_lengths, &mut lit_codes)?;
0443d0c5
KS
645 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
646 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
647 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
648 self.dyn_lit_cb = Some(ret.unwrap());
649
650 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
651 lengths_to_codes(&dist_lengths[..self.hdist], &mut dist_codes)?;
652 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
653 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
654 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
655 self.dyn_dist_cb = Some(ret.unwrap());
656
657 self.state = InflateState::DynBlock;
658 } else {
659 unreachable!();
660 }
661 },
662 InflateState::DynCodeLengthsAdd(mode) => {
663 let base = REPEAT_BASE[mode] as usize;
664 let bits = REPEAT_BITS[mode];
665 let len = base + read_bits!(self, csrc, bits) as usize;
666 if self.cur_len_idx + len > self.hlit + self.hdist {
667 self.state = InflateState::End;
668 return Err(DecompressError::InvalidHeader);
669 }
b36f412c
KS
670 let rpt = if mode == 0 {
671 if self.cur_len_idx == 0 {
672 self.state = InflateState::End;
673 return Err(DecompressError::InvalidHeader);
674 }
675 self.all_lengths[self.cur_len_idx - 1]
676 } else {
677 0
678 };
0443d0c5
KS
679 for _ in 0..len {
680 self.all_lengths[self.cur_len_idx] = rpt;
681 self.cur_len_idx += 1;
682 }
683 self.state = InflateState::DynCodeLengths;
684 },
685 InflateState::DynBlock => {
686 if let Some(ref lit_cb) = self.dyn_lit_cb {
687 let val = read_cb!(self, csrc, lit_cb);
688 if val < 256 {
689 if self.output_idx >= dst.len() {
690 self.br = csrc.br;
691 self.state = InflateState::DynBlockLiteral(val as u8);
692 return Err(DecompressError::OutputFull);
693 }
694 self.put_literal(val as u8);
695 dst[self.output_idx] = val as u8;
696 self.output_idx += 1;
697 } else if val == 256 {
698 if self.final_block {
699 self.state = InflateState::End;
700 return Ok(self.output_idx);
701 } else {
702 self.state = InflateState::BlockStart;
703 }
704 } else {
705 let len_idx = (val - 257) as usize;
706 if len_idx >= LENGTH_BASE.len() {
707 self.state = InflateState::End;
708 return Err(DecompressError::InvalidData);
709 }
710 let len_bits = LENGTH_ADD_BITS[len_idx];
711 let add_base = LENGTH_BASE[len_idx] as usize;
712 if len_bits > 0 {
713 self.state = InflateState::DynBlockLengthExt(add_base, len_bits);
714 } else {
715 self.state = InflateState::DynBlockDist(add_base);
716 }
717 }
718 } else {
719 unreachable!();
720 }
721 },
722 InflateState::DynBlockLiteral(sym) => {
723 if self.output_idx >= dst.len() {
724 self.br = csrc.br;
725 return Err(DecompressError::OutputFull);
726 }
727 self.put_literal(sym);
728 dst[self.output_idx] = sym;
729 self.output_idx += 1;
730 self.state = InflateState::DynBlock;
731 },
732 InflateState::DynBlockLengthExt(base, bits) => {
733 let add = read_bits!(self, csrc, bits) as usize;
734 self.state = InflateState::DynBlockDist(base + add);
735 },
736 InflateState::DynBlockDist(length) => {
737 if let Some(ref dist_cb) = self.dyn_dist_cb {
738 let dist_idx = read_cb!(self, csrc, dist_cb) as usize;
739 if dist_idx >= DIST_BASE.len() {
740 self.state = InflateState::End;
741 return Err(DecompressError::InvalidData);
742 }
743 let dist_bits = DIST_ADD_BITS[dist_idx];
744 let dist_base = DIST_BASE[dist_idx] as usize;
745 if dist_bits == 0 {
746 self.state = InflateState::DynCopy(length, dist_base);
747 } else {
748 self.state = InflateState::DynBlockDistExt(length, dist_base, dist_bits);
749 }
750 } else {
751 unreachable!();
752 }
753 },
754 InflateState::DynBlockDistExt(length, base, bits) => {
755 let add = read_bits!(self, csrc, bits) as usize;
756 self.state = InflateState::DynCopy(length, base + add);
757 },
758 InflateState::DynCopy(length, dist) => {
759 if self.output_idx + length > dst.len() {
760 let copy_size = dst.len() - self.output_idx;
761 let ret = self.lz_copy(dist, copy_size, &mut dst[self.output_idx..]);
762 if ret.is_err() {
763 self.state = InflateState::End;
764 return Err(DecompressError::InvalidData);
765 }
766 self.output_idx += copy_size;
767 self.br = csrc.br;
768 self.state = InflateState::DynCopy(length - copy_size, dist);
769 return Err(DecompressError::OutputFull);
770 }
771 let ret = self.lz_copy(dist, length, &mut dst[self.output_idx..]);
772 if ret.is_err() {
773 self.state = InflateState::End;
774 return Err(DecompressError::InvalidData);
775 }
776 self.output_idx += length;
777 self.state = InflateState::DynBlock;
778 }
779 InflateState::End => {
780 return Ok(0);
781 },
782 }
783 }
784 }
79fa5fbf 785 /// Resets decoder state.
de161d26
KS
786 pub fn reset(&mut self) {
787 self.bpos = 0;
788 self.output_idx = 0;
789 self.full_pos = 0;
790 self.state = InflateState::Start;
791 }
792
820e8a4a 793 #[allow(clippy::comparison_chain)]
79fa5fbf 794 /// Decompresses input data into output returning the uncompressed data length.
0443d0c5 795 pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult<usize> {
7bf82b33
KS
796 let mut csrc = CurrentSource::new(src, BitReaderState::default());
797 if src.len() > 2 {
798 let cm = src[0] & 0xF;
799 let cinfo = src[0] >> 4;
800 let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]);
801 if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 {
802 csrc.skip(16).unwrap();
803 }
804 }
805
806 let mut fix_len_cb = None;
807
808 let mut dst_idx = 0;
809 let mut final_block = false;
810 while !final_block {
811 final_block = csrc.read_bool()?;
812
813 let bmode = csrc.read(2)?;
814 match bmode {
815 0 => {
816 csrc.align();
817 let len = csrc.read(16)? as usize;
818 let inv_len = csrc.read(16)? as usize;
819 if (len ^ inv_len) != 0xFFFF {
820 return Err(DecompressError::InvalidHeader);
821 }
822 let src_pos = csrc.tell();
823 if src_pos + len > src.len() {
824 return Err(DecompressError::ShortData);
825 }
826 if dst_idx + len > dst.len() {
827 return Err(DecompressError::OutputFull);
828 }
829 dst[dst_idx..][..len].copy_from_slice(&src[src_pos..][..len]);
830 dst_idx += len;
831 csrc.skip_bytes(len)?;
832 },
833 1 => {
834 if fix_len_cb.is_none() {
835 let mut cr = FixedLenCodeReader {};
836 fix_len_cb = Some(Codebook::new(&mut cr, CodebookMode::LSB).unwrap());
837 }
838 if let Some(ref len_cb) = &fix_len_cb {
839 loop {
840 let val = csrc.read_cb(len_cb)?;
841 if val < 256 {
842 if dst_idx >= dst.len() {
843 return Err(DecompressError::OutputFull);
844 }
845 dst[dst_idx] = val as u8;
846 dst_idx += 1;
847 } else if val == 256 {
848 break;
849 } else {
850 let len_idx = (val - 257) as usize;
851 if len_idx >= LENGTH_BASE.len() {
852 return Err(DecompressError::InvalidData);
853 }
854 let len_bits = LENGTH_ADD_BITS[len_idx];
855 let mut length = LENGTH_BASE[len_idx] as usize;
856 if len_bits > 0 {
857 length += csrc.read(len_bits)? as usize;
858 }
859 let dist_idx = reverse_bits(csrc.read(5)?, 5) as usize;
860 if dist_idx >= DIST_BASE.len() {
861 return Err(DecompressError::InvalidData);
862 }
863 let dist_bits = DIST_ADD_BITS[dist_idx];
864 let mut dist = DIST_BASE[dist_idx] as usize;
865 if dist_bits > 0 {
866 dist += csrc.read(dist_bits)? as usize;
867 }
868
869 if dst_idx + length > dst.len() {
870 return Err(DecompressError::OutputFull);
871 }
872 if dist > dst_idx {
873 return Err(DecompressError::InvalidData);
874 }
875 lz_copy(dst, dst_idx, dist, length);
876 dst_idx += length;
877 }
878 }
879 } else {
880 unreachable!();
881 }
882 },
883 2 => {
884 let hlit = csrc.read(5)? as usize + 257;
885 if hlit >= 287 {
886 return Err(DecompressError::InvalidHeader);
887 }
888 let hdist = csrc.read(5)? as usize + 1;
889 let hclen = csrc.read(4)? as usize + 4;
7bf82b33
KS
890 let mut len_lengths = [0; 19];
891 let mut all_lengths = [0; NUM_LITERALS + NUM_DISTS];
892
820e8a4a 893 for cur_len_idx in 0..hclen {
7bf82b33 894 len_lengths[LEN_RECODE[cur_len_idx]] = csrc.read(3)? as u8;
7bf82b33
KS
895 }
896 let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19];
897 lengths_to_codes(&len_lengths, &mut len_codes)?;
898 let mut cr = ShortCodebookDescReader::new(len_codes.to_vec());
899 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
900 if ret.is_err() {
901 return Err(DecompressError::InvalidHeader);
902 }
903 let dyn_len_cb = ret.unwrap();
904
905 let mut cur_len_idx = 0;
906 while cur_len_idx < hlit + hdist {
907 let val = csrc.read_cb(&dyn_len_cb)?;
908 if val < 16 {
909 all_lengths[cur_len_idx] = val as u8;
910 cur_len_idx += 1;
911 } else {
912 let mode = (val as usize) - 16;
913 if mode > 2 {
914 return Err(DecompressError::InvalidHeader);
915 }
916 let base = REPEAT_BASE[mode] as usize;
917 let bits = REPEAT_BITS[mode];
918 let len = base + (csrc.read(bits)? as usize);
919 if cur_len_idx + len > hlit + hdist {
920 return Err(DecompressError::InvalidHeader);
921 }
922 let rpt = if mode == 0 {
923 if cur_len_idx == 0 {
924 return Err(DecompressError::InvalidHeader);
925 }
926 all_lengths[cur_len_idx - 1]
927 } else {
928 0
929 };
930 for _ in 0..len {
931 all_lengths[cur_len_idx] = rpt;
932 cur_len_idx += 1;
933 }
934 }
935 }
936 let (lit_lengths, dist_lengths) = all_lengths.split_at(hlit);
937
938 let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS];
939 lengths_to_codes(lit_lengths, &mut lit_codes)?;
940 let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec());
941 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
942 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
943 let dyn_lit_cb = ret.unwrap();
944
945 let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS];
946 lengths_to_codes(&dist_lengths[..hdist], &mut dist_codes)?;
947 let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec());
948 let ret = Codebook::new(&mut cr, CodebookMode::LSB);
949 if ret.is_err() { return Err(DecompressError::InvalidHeader); }
950 let dyn_dist_cb = ret.unwrap();
951
952 loop {
953 let val = csrc.read_cb(&dyn_lit_cb)?;
954 if val < 256 {
955 if dst_idx >= dst.len() {
956 return Err(DecompressError::OutputFull);
957 }
958 dst[dst_idx] = val as u8;
959 dst_idx += 1;
960 } else if val == 256 {
961 break;
962 } else {
963 let len_idx = (val - 257) as usize;
964 if len_idx >= LENGTH_BASE.len() {
965 return Err(DecompressError::InvalidData);
966 }
967 let len_bits = LENGTH_ADD_BITS[len_idx];
968 let mut length = LENGTH_BASE[len_idx] as usize;
969 if len_bits > 0 {
970 length += csrc.read(len_bits)? as usize;
971 }
972
973 let dist_idx = csrc.read_cb(&dyn_dist_cb)? as usize;
974 if dist_idx >= DIST_BASE.len() {
975 return Err(DecompressError::InvalidData);
976 }
977 let dist_bits = DIST_ADD_BITS[dist_idx];
978 let mut dist = DIST_BASE[dist_idx] as usize;
979 if dist_bits > 0 {
980 dist += csrc.read(dist_bits)? as usize;
981 }
982
983 if dst_idx + length > dst.len() {
984 return Err(DecompressError::OutputFull);
985 }
986 if dist > dst_idx {
987 return Err(DecompressError::InvalidData);
988 }
989 lz_copy(dst, dst_idx, dist, length);
990 dst_idx += length;
991 }
992 }
993 },
994 _ => return Err(DecompressError::InvalidHeader),
995 };
996 }
997 Ok(dst_idx)
0443d0c5
KS
998 }
999}
1000
b36f412c
KS
1001impl Default for Inflate {
1002 fn default() -> Self {
1003 Self::new()
1004 }
1005}
1006
0443d0c5
KS
1007fn lengths_to_codes(lens: &[u8], codes: &mut [ShortCodebookDesc]) -> DecompressResult<()> {
1008 let mut bits = [0u32; 32];
1009 let mut pfx = [0u32; 33];
1010 for len in lens.iter() {
1011 let len = *len as usize;
1012 if len >= bits.len() {
1013 return Err(DecompressError::InvalidHeader);
1014 }
1015 bits[len] += 1;
1016 }
1017 bits[0] = 0;
1018 let mut code = 0;
1019 for i in 0..bits.len() {
1020 code = (code + bits[i]) << 1;
1021 pfx[i + 1] = code;
1022 }
1023
1024 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
1025 let len = *len as usize;
1026 if len != 0 {
1027 let bits = len as u8;
1028 *codes = ShortCodebookDesc { code: reverse_bits(pfx[len], bits), bits };
1029 pfx[len] += 1;
1030 } else {
1031 *codes = ShortCodebookDesc { code: 0, bits: 0 };
1032 }
1033 }
e65c0040 1034
0443d0c5
KS
1035 Ok(())
1036}
1037
1038struct GzipCRC32 {
1039 tab: [u32; 256],
1040 crc: u32,
1041}
1042
1043impl GzipCRC32 {
b36f412c 1044 #[allow(clippy::unreadable_literal)]
0443d0c5
KS
1045 fn new() -> Self {
1046 let mut tab = [0u32; 256];
1047 for i in 0..256 {
1048 let mut c = i as u32;
1049 for _ in 0..8 {
1050 if (c & 1) != 0 {
1051 c = 0xEDB88320 ^ (c >> 1);
1052 } else {
1053 c >>= 1;
1054 }
1055 }
1056 tab[i] = c;
1057 }
1058 Self { tab, crc: 0 }
1059 }
1060 fn update_crc(&mut self, src: &[u8]) {
1061 let mut c = !self.crc;
1062 for el in src.iter() {
1063 c = self.tab[((c ^ u32::from(*el)) & 0xFF) as usize] ^ (c >> 8);
1064 }
1065 self.crc = !c;
1066 }
1067}
1068
79fa5fbf 1069/// Decodes input data in gzip file format (RFC 1952) returning a vector containing decoded data.
0443d0c5
KS
1070pub fn gzip_decode(br: &mut ByteReader, skip_crc: bool) -> DecompressResult<Vec<u8>> {
1071 const FLAG_HCRC: u8 = 0x02;
1072 const FLAG_EXTRA: u8 = 0x04;
1073 const FLAG_NAME: u8 = 0x08;
1074 const FLAG_COMMENT: u8 = 0x10;
1075
1076 let id1 = br.read_byte()?;
1077 let id2 = br.read_byte()?;
1078 let cm = br.read_byte()?;
1079 let flg = br.read_byte()?;
1080 let _mtime = br.read_u32le()?;
1081 let _xfl = br.read_byte()?;
1082 let _os = br.read_byte()?;
1083 if id1 != 0x1F || id2 != 0x8B || cm != 8 {
1084 return Err(DecompressError::InvalidHeader);
1085 }
1086
1087 if (flg & FLAG_EXTRA) != 0 {
1088 let xlen = br.read_u16le()? as usize;
1089 br.read_skip(xlen)?;
1090 }
1091 if (flg & FLAG_NAME) != 0 {
1092 loop {
1093 let b = br.read_byte()?;
1094 if b == 0 {
1095 break;
1096 }
1097 }
1098 }
1099 if (flg & FLAG_COMMENT) != 0 {
1100 loop {
1101 let b = br.read_byte()?;
1102 if b == 0 {
1103 break;
1104 }
1105 }
1106 }
1107 let _hcrc = if (flg & FLAG_HCRC) != 0 {
1108 br.read_u16le()?
1109 } else {
1110 0
1111 };
1112 if (flg & 0xE0) != 0 {
1113 return Err(DecompressError::Unsupported);
1114 }
1115
1116 let mut output: Vec<u8> = Vec::new();
1117 let mut tail = [0u8; 8];
1118 let mut inblk = [0u8; 1024];
1119 let mut oblk = [0u8; 4096];
1120 let mut inflate = Inflate::new();
1121 let mut checker = GzipCRC32::new();
1122
1123 loop {
1124 let ret = br.read_buf_some(&mut inblk);
1125 if let Err(ByteIOError::EOF) = ret {
1126 break;
1127 }
1128 let inlen = match ret {
1129 Ok(val) => val,
1130 Err(_) => return Err(DecompressError::IOError),
1131 };
1132 let mut repeat = false;
1133 loop {
1134 let ret = inflate.decompress_data(&inblk[..inlen], &mut oblk, repeat);
1135 match ret {
1136 Ok(outlen) => {
1137 checker.update_crc(&oblk[..outlen]);
1138 output.extend_from_slice(&oblk[..outlen]);
1139 break;
1140 },
1141 Err(DecompressError::ShortData) => {
1142 break;
1143 },
1144 Err(DecompressError::OutputFull) => {
1145 repeat = true;
1146 checker.update_crc(&oblk);
1147 output.extend_from_slice(&oblk);
1148 },
1149 Err(err) => {
1150 return Err(err);
1151 },
1152 }
1153 }
1154 // Save last 8 bytes for CRC and size.
1155 if inlen >= 8 {
1156 tail.copy_from_slice(&inblk[inlen - 8..][..8]);
1157 } else {
1158 let shift_len = 8 - inlen;
1159 for i in 0..shift_len {
1160 tail[i] = tail[i + inlen];
1161 }
1162 for i in shift_len..8 {
1163 tail[i] = inblk[i - shift_len];
1164 }
1165 }
1166 }
1167 if !skip_crc {
1168 if !inflate.is_finished() { println!("???"); }
1169 let crc = read_u32le(&tail[0..4])?;
1170 let size = read_u32le(&tail[4..8])?;
1171 if size != (output.len() as u32) {
1172 return Err(DecompressError::CRCError);
1173 }
1174 if crc != checker.crc {
1175 return Err(DecompressError::CRCError);
1176 }
1177 }
1178
1179 Ok(output)
1180}
1181
f8cdb949
KS
1182#[derive(Clone,Copy,Default)]
1183struct Token {
1184 sym: u16,
1185 distsym: u8,
1186 len: u16,
1187 dist: u16,
1188}
1189
1190const TOKEN_EOB: Token = Token { sym: 256, distsym: 0, len: 0, dist: 0 };
1191
1192impl Token {
1193 fn from_literal(sym: u8) -> Self {
1194 Self {
1195 sym: u16::from(sym),
1196 distsym: 0,
1197 dist: 0,
1198 len: 0,
1199 }
1200 }
1201 fn from_match(dist: u16, len: u16) -> Self {
1202 let sym = match len {
1203 3..= 10 => 257 + len - 3,
1204 11..= 18 => 265 + (len - 11) / 2,
1205 19..= 34 => 269 + (len - 19) / 4,
1206 35..= 66 => 273 + (len - 35) / 8,
1207 67..=130 => 277 + (len - 67) / 16,
1208 131..=257 => 281 + (len - 131) / 32,
1209 _ => 285,
1210 };
1211 let distsym = if dist <= 4 {
1212 (dist - 1) as u8
1213 } else {
1214 let bits = 16 - (dist - 1).leading_zeros();
1215 (bits as u8) * 2 - 2 + if ((dist - 1) & (1 << (bits - 2))) != 0 { 1 } else { 0 }
1216 };
1217 Self {
1218 sym, distsym, len, dist
1219 }
1220 }
1221}
1222
1223fn add_codes(lens: &[u8], stats: &mut [u32], toks: &mut Vec<(u8, u8)>) {
1224 let mut last = 42;
1225 let mut lcount = 0;
1226
1227 for &len in lens.iter() {
1228 if len == last {
1229 lcount += 1;
1230 } else {
1231 if last == 0 {
1232 while lcount > 10 {
1233 let run = lcount.min(138);
1234 stats[18] += 1;
1235 toks.push((18, run - 11));
1236 lcount -= run;
1237 }
1238 if lcount >= 3 {
1239 stats[17] += 1;
1240 toks.push((17, lcount - 3));
1241 lcount = 0;
1242 }
1243 for _ in 0..lcount {
1244 stats[0] += 1;
1245 toks.push((0, 0));
1246 }
1247 } else {
1248 while lcount >= 3 {
1249 let run = lcount.min(6);
1250 stats[16] += 1;
1251 toks.push((16, run - 3));
1252 lcount -= run;
1253 }
1254 for _ in 0..lcount {
1255 stats[last as usize] += 1;
1256 toks.push((last, 0));
1257 }
1258 }
1259 stats[len as usize] += 1;
1260 toks.push((len, 0));
1261 last = len;
1262 lcount = 0;
1263 }
1264 }
1265 if lcount > 0 {
1266 if last == 0 {
1267 while lcount > 10 {
1268 let run = lcount.min(138);
1269 stats[18] += 1;
1270 toks.push((18, run - 11));
1271 lcount -= run;
1272 }
1273 if lcount >= 3 {
1274 stats[17] += 1;
1275 toks.push((17, lcount - 3));
1276 lcount = 0;
1277 }
1278 for _ in 0..lcount {
1279 stats[0] += 1;
1280 toks.push((0, 0));
1281 }
1282 } else {
1283 while lcount >= 3 {
1284 let run = lcount.min(6);
1285 stats[16] += 1;
1286 toks.push((16, run - 3));
1287 lcount -= run;
1288 }
1289 for _ in 0..lcount {
1290 stats[last as usize] += 1;
1291 toks.push((last, 0));
1292 }
1293 }
1294 }
1295}
1296
79fa5fbf 1297/// Deflate stream writer.
f8cdb949
KS
1298pub struct DeflateWriter {
1299 dst: Vec<u8>,
1300 bits: u8,
1301 bbuf: u32,
1302}
1303
1304impl DeflateWriter {
79fa5fbf 1305 /// Creates a new instance of `DeflateWriter` for a provided output.
f8cdb949
KS
1306 pub fn new(dst: Vec<u8>) -> Self {
1307 Self {
1308 dst,
1309 bits: 0,
1310 bbuf: 0,
1311 }
1312 }
1313 fn align(&mut self) {
1314 if (self.bits & 7) != 0 {
1315 self.bits += 8 - (self.bits & 7);
1316 }
1317 }
1318 fn flush(&mut self) {
1319 while self.bits >= 8 {
1320 self.dst.push(self.bbuf as u8);
1321 self.bbuf >>= 8;
1322 self.bits -= 8;
1323 }
1324 }
1325 fn write(&mut self, val: u16, len: u8) {
1326 self.flush();
1327 self.bbuf |= u32::from(val) << self.bits;
1328 self.bits += len;
1329 }
79fa5fbf 1330 /// Finishes writing the stream and returns the output vector.
f8cdb949
KS
1331 pub fn end(mut self) -> Vec<u8> {
1332 self.flush();
1333 if self.bits > 0 {
1334 self.dst.push(self.bbuf as u8);
1335 }
1336 self.dst
1337 }
1338
1339 fn write_codes(&mut self, codes: &CodeHuff, dists: &DistHuff) {
1340 let mut stats = [0u32; 19];
1341 let mut toks = Vec::with_capacity(NUM_LITERALS + NUM_DISTS);
1342 let mut cw = [0u16; 19];
1343 let mut cl = [0u8; 19];
1344 let mut nc = 0;
1345
1346 add_codes(&codes.lens[..codes.num_codes], &mut stats, &mut toks);
1347 add_codes(&dists.lens[..dists.num_codes], &mut stats, &mut toks);
1348
1349 gen_tree(&mut cw, &mut cl, &mut nc, &mut stats, 7);
1350
1351 nc = cw.len();
1352 for &idx in LEN_RECODE.iter().rev() {
1353 if cl[idx] == 0 {
1354 nc -= 1;
1355 } else {
1356 break;
1357 }
1358 }
1359 if nc < 4 {
1360 nc = 4;
1361 }
1362 self.write((nc - 4) as u16, 4);
1363 for &idx in LEN_RECODE.iter().take(nc) {
1364 self.write(u16::from(cl[idx]), 3);
1365 }
1366 for &(sym, add) in toks.iter() {
1367 self.write(cw[sym as usize], cl[sym as usize]);
1368 match sym {
1369 16 => self.write(u16::from(add), 2),
1370 17 => self.write(u16::from(add), 3),
1371 18 => self.write(u16::from(add), 7),
1372 _ => {},
1373 };
1374 }
1375 }
1376 fn write_tokens(&mut self, src: &[Token], codes: &CodeHuff, dists: &DistHuff) {
1377 for &tok in src.iter() {
1378 self.write(codes.codes[tok.sym as usize], codes.lens[tok.sym as usize]);
1379 if tok.sym > 256 {
1380 self.write_len_bits(tok.len);
1381 self.write(dists.codes[tok.distsym as usize], dists.lens[tok.distsym as usize]);
1382 self.write_dist_bits(tok.dist);
1383 }
1384 }
1385 }
1386 fn write_len_bits(&mut self, len: u16) {
1387 let llen = len - 3;
1388 if llen >= 8 && llen < 255 {
1389 let bits = (16 - llen.leading_zeros() - 3) as u8;
1390 self.write(llen & ((1 << bits) - 1), bits);
1391 }
1392 }
1393 fn write_dist_bits(&mut self, dist: u16) {
1394 let ddist = dist - 1;
1395 if dist >= 4 {
1396 let bits = (16 - ddist.leading_zeros() - 2) as u8;
1397 self.write(ddist & ((1 << bits) - 1), bits);
1398 }
1399 }
1400}
1401
1402struct CodeHuff {
1403 is_fixed: bool,
1404 stats: [u32; NUM_LITERALS],
1405 codes: [u16; NUM_LITERALS],
1406 lens: [u8; NUM_LITERALS],
1407 num_codes: usize,
1408}
1409
1410impl CodeHuff {
1411 fn new(is_fixed: bool) -> Self {
1412 Self {
1413 is_fixed,
1414 stats: [0; NUM_LITERALS],
1415 codes: [0; NUM_LITERALS],
1416 lens: [0; NUM_LITERALS],
1417 num_codes: NUM_LITERALS,
1418 }
1419 }
1420 fn make_codes(&mut self, src: &[Token]) {
1421 if self.is_fixed {
1422 for i in 0..=143 {
1423 self.codes[i] = reverse_bits((i + 0x30) as u32, 8) as u16;
1424 self.lens[i] = 8;
1425 }
1426 for i in 144..=255 {
1427 self.codes[i] = reverse_bits((i + 0x100) as u32, 9) as u16;
1428 self.lens[i] = 9;
1429 }
1430 for i in 256..=279 {
1431 self.codes[i] = reverse_bits((i & 0x1F) as u32, 7) as u16;
1432 self.lens[i] = 7;
1433 }
1434 for i in 280..NUM_LITERALS {
1435 self.codes[i] = reverse_bits((i - 280 + 0xC0) as u32, 8) as u16;
1436 self.lens[i] = 8;
1437 }
1438 } else {
1439 for &tok in src.iter() {
1440 self.stats[tok.sym as usize] += 1;
1441 }
1442 gen_tree(&mut self.codes, &mut self.lens, &mut self.num_codes, &mut self.stats, 15);
1443 if self.num_codes < 257 {
1444 self.num_codes = 257;
1445 }
1446 }
1447 }
1448}
1449
1450struct DistHuff {
1451 is_fixed: bool,
1452 stats: [u32; NUM_DISTS],
1453 codes: [u16; NUM_DISTS],
1454 lens: [u8; NUM_DISTS],
1455 num_codes: usize,
1456}
1457
1458impl DistHuff {
1459 fn new(is_fixed: bool) -> Self {
1460 Self {
1461 is_fixed,
1462 stats: [0; NUM_DISTS],
1463 codes: [0; NUM_DISTS],
1464 lens: [0; NUM_DISTS],
1465 num_codes: NUM_DISTS,
1466 }
1467 }
1468 fn make_codes(&mut self, src: &[Token]) {
1469 if self.is_fixed {
1470 for i in 0..NUM_DISTS {
1471 self.codes[i] = reverse_bits(i as u32, 5) as u16;
1472 self.lens[i] = 5;
1473 }
1474 } else {
1475 for &tok in src.iter() {
1476 if tok.sym > 256 {
1477 self.stats[tok.distsym as usize] += 1;
1478 }
1479 }
1480 gen_tree(&mut self.codes, &mut self.lens, &mut self.num_codes, &mut self.stats, 15);
1481 if self.num_codes < 1 {
1482 self.num_codes = 1;
1483 }
1484 }
1485 }
1486}
1487
1488#[derive(Clone,Copy,Default)]
1489struct Node {
1490 sym: u16,
1491 w: u16,
1492 idx0: u16,
1493 idx1: u16,
1494}
1495
1496const NODE_SYM: u16 = 65500;
1497
1498struct Tree {
1499 nodes: [Node; NUM_LITERALS * 2],
1500 nnodes: usize,
1501}
1502
1503impl Tree {
1504 fn new() -> Self {
1505 Self {
1506 nodes: [Node::default(); NUM_LITERALS * 2],
1507 nnodes: 0,
1508 }
1509 }
1510 fn insert(&mut self, val: Node) {
1511 let mut idx = self.nnodes;
1512 for (i, nn) in self.nodes[..self.nnodes].iter().enumerate() {
1513 if nn.w > val.w {
1514 idx = i;
1515 break;
1516 }
1517 }
1518 if idx < self.nnodes {
1519 for i in (idx..self.nnodes).rev() {
1520 self.nodes[i + 1] = self.nodes[i];
1521 }
1522 }
1523 self.nodes[idx] = val;
1524 self.nnodes += 1;
1525 }
1526 fn trim(&mut self) {
1527 let mut start = self.nnodes;
1528 for (i, n) in self.nodes[..self.nnodes].iter().enumerate() {
1529 if n.w != 0 {
1530 start = i;
1531 break;
1532 }
1533 }
1534 if start != 0 {
1535 for i in 0..self.nnodes - start {
1536 self.nodes[i] = self.nodes[i + start];
1537 }
1538 self.nnodes -= start;
1539 }
1540 }
1541 fn build(&mut self) {
1542 if self.nnodes == 1 {
1543 self.nodes[0].w = 1;
1544 return;
1545 }
1546 let mut start = 0;
1547 while start + 1 < self.nnodes {
1548 let nn1 = self.nodes[start];
1549 let nn2 = self.nodes[start + 1];
1550 let n = Node {
1551 sym: NODE_SYM,
1552 w: nn1.w + nn2.w,
1553 idx0: start as u16,
1554 idx1: (start + 1) as u16,
1555 };
1556 self.nodes[start].w = 0;
1557 self.nodes[start + 1].w = 0;
1558 start += 2;
1559 self.insert(n);
1560 }
1561 if self.nnodes > 1 {
1562 self.assign_len(self.nnodes - 1, 0);
1563 }
1564 }
1565 fn assign_len(&mut self, idx: usize, len: u16) {
1566 if self.nodes[idx].sym == NODE_SYM {
1567 self.assign_len(self.nodes[idx].idx0 as usize, len + 1);
1568 self.assign_len(self.nodes[idx].idx1 as usize, len + 1);
1569 } else {
1570 self.nodes[idx].w = len;
1571 }
1572 }
1573}
1574
1575fn gen_tree(codes: &mut [u16], lens: &mut [u8], num_codes: &mut usize, stats: &mut [u32], max_bits: u8) {
1576 let mut tot_w = 0;
1577 for &w in stats.iter() {
1578 tot_w += w;
1579 }
1580 if tot_w == 0 {
1581 codes[0] = 0;
1582 lens[0] = 0;
1583 *num_codes = 0;
1584 return;
1585 }
b6b2edf6
KS
1586 loop {
1587 let mut tree = Tree::new();
1588 for (sym, &w) in stats.iter().enumerate() {
1589 tree.insert(Node{ sym: sym as u16, w: w as u16, idx0: 64000, idx1: 64000 });
f8cdb949 1590 }
b6b2edf6
KS
1591 tree.trim();
1592 tree.build();
f8cdb949 1593
b6b2edf6
KS
1594 for n in tree.nodes[..tree.nnodes].iter() {
1595 if n.sym != NODE_SYM {
1596 lens[n.sym as usize] = n.w as u8;
1597 }
1598 }
1599 if !lens.iter().any(|&x| x > max_bits) {
1600 break;
1601 } else {
1602 for w in stats.iter_mut() {
1603 *w = (*w + 1) >> 1;
1604 }
f8cdb949
KS
1605 }
1606 }
1607 lengths_to_codes16(lens, codes);
1608 let mut sz = codes.len();
1609 for &len in lens.iter().rev() {
1610 if len != 0 {
1611 break;
1612 }
1613 sz -= 1;
1614 }
1615 *num_codes = sz;
1616}
1617
1618fn lengths_to_codes16(lens: &[u8], codes: &mut [u16]) {
1619 let mut bits = [0u32; 32];
1620 let mut pfx = [0u32; 33];
1621 for len in lens.iter() {
1622 let len = *len as usize;
1623 bits[len] += 1;
1624 }
1625 bits[0] = 0;
1626 let mut code = 0;
1627 for i in 0..bits.len() {
1628 code = (code + bits[i]) << 1;
1629 pfx[i + 1] = code;
1630 }
1631
1632 for (len, codes) in lens.iter().zip(codes.iter_mut()) {
1633 let len = *len as usize;
1634 if len != 0 {
1635 let bits = len as u8;
1636 *codes = reverse_bits(pfx[len], bits) as u16;
1637 pfx[len] += 1;
1638 } else {
1639 *codes = 0;
1640 }
1641 }
1642}
1643
1644trait LZParse {
1645 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>);
1646}
1647
1648struct NoParser {}
1649impl LZParse for NoParser {
1650 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1651 dst.reserve(src.len());
1652 for &b in src.iter() {
1653 dst.push(Token::from_literal(b));
1654 }
1655 dst.push(TOKEN_EOB);
1656 }
1657}
1658
1659fn check_match(src1: &[u8], src2: &[u8]) -> u16 {
1660 let mut len = 0;
1661 for (&a, &b) in src1.iter().zip(src2.iter()) {
1662 if a != b {
1663 break;
1664 }
1665 len += 1;
1666 }
1667 len
1668}
1669
1670const HASH_SIZE: usize = 4096;
1671const MAX_MATCH_LEN: usize = 258;
1672const WINDOW_SIZE: usize = 32768 - MAX_MATCH_LEN;
497aa09d 1673const WINDOW_MASK: usize = 32767;
f8cdb949
KS
1674
1675struct MatchFinder<'a> {
1676 src: &'a [u8],
1677 pos: usize,
1678 hstart: [usize; HASH_SIZE],
497aa09d 1679 hnext: [usize; WINDOW_MASK + 1],
f8cdb949
KS
1680}
1681
1682impl<'a> MatchFinder<'a> {
1683 fn new(src: &'a [u8]) -> Self {
497aa09d 1684 Self {
f8cdb949
KS
1685 src,
1686 pos: 0,
497aa09d
KS
1687 hstart: [src.len(); HASH_SIZE],
1688 hnext: [src.len(); WINDOW_MASK + 1],
1689 }
f8cdb949
KS
1690 }
1691 fn hash(src: &[u8]) -> usize {
1692 let _ = src[2];
1693 (((u16::from(src[0]) << 10) ^ (u16::from(src[1]) << 5) ^ u16::from(src[2])) & ((HASH_SIZE as u16) - 1)) as usize
1694 }
497aa09d
KS
1695 fn add_hash(&mut self, hash: usize) {
1696 self.hnext[self.pos & WINDOW_MASK] = self.hstart[hash];
1697 self.hstart[hash] = self.pos;
1698 self.hnext[(self.pos + 1) & WINDOW_MASK] = self.src.len();
f8cdb949
KS
1699 }
1700 fn find_match(&mut self) -> (u16, u16) {
1701 if self.pos == 0 || self.pos + 3 > self.src.len() {
1702 return (0, 0);
1703 }
497aa09d 1704 let key = Self::hash(&self.src[self.pos..]);
f8cdb949
KS
1705
1706 let mut best_pos = 0;
1707 let mut best_len = 0;
1708 let mut idx = self.hstart[key];
497aa09d
KS
1709 let search_end = self.pos.saturating_sub(WINDOW_SIZE);
1710 let mut tries = 0;
1711 while idx >= search_end && idx < self.pos {
1712 let cur_len = check_match(&self.src[self.pos..], &self.src[idx..]);
1713 if cur_len > best_len {
1714 best_len = cur_len;
1715 best_pos = self.pos - idx;
1716 if best_len >= (MAX_MATCH_LEN as u16) {
1717 return (best_pos as u16, MAX_MATCH_LEN as u16);
f8cdb949
KS
1718 }
1719 }
497aa09d
KS
1720 tries += 1;
1721 if tries > 16 {
1722 break;
1723 }
1724 idx = self.hnext[idx & WINDOW_MASK];
f8cdb949 1725 }
497aa09d 1726 self.add_hash(key);
f8cdb949
KS
1727 (best_pos as u16, best_len)
1728 }
1729 fn find_all_matches(&mut self, dists: &mut [u16; MAX_MATCH_LEN + 1]) {
1730 if self.pos == 0 || self.pos + 3 > self.src.len() {
1731 return;
1732 }
497aa09d 1733 let key = Self::hash(&self.src[self.pos..]);
f8cdb949 1734 let mut idx = self.hstart[key];
497aa09d
KS
1735 let search_end = self.pos.saturating_sub(WINDOW_SIZE);
1736 let mut tries = 0;
1737 while idx >= search_end && idx < self.pos {
1738 let cur_len = (check_match(&self.src[self.pos..], &self.src[idx..]) as usize).min(MAX_MATCH_LEN);
1739 if cur_len > 0 && dists[cur_len] == 0 {
1740 dists[cur_len] = (self.pos - idx) as u16;
1741 }
1742 idx = self.hnext[idx & WINDOW_MASK];
1743 tries += 1;
1744 if tries > 128 {
1745 break;
f8cdb949 1746 }
f8cdb949 1747 }
497aa09d 1748 self.add_hash(key);
f8cdb949
KS
1749 }
1750 fn advance(&mut self, num: usize) {
497aa09d
KS
1751 self.pos += 1;
1752 if num > 1 {
1753 for _ in 1..num {
1754 if self.pos + 3 <= self.src.len() {
1755 let key = Self::hash(&self.src[self.pos..]);
1756 self.add_hash(key);
f8cdb949 1757 }
497aa09d 1758 self.pos += 1;
f8cdb949
KS
1759 }
1760 }
1761 }
1762 fn get_sym(&self) -> u8 { self.src[self.pos] }
1763 fn is_end(&self) -> bool { self.pos >= self.src.len() }
1764}
1765
1766struct GreedyParser {}
1767impl LZParse for GreedyParser {
1768 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1769 dst.reserve(src.len());
1770
1771 let mut matcher = MatchFinder::new(src);
1772 while !matcher.is_end() {
1773 let (best_pos, best_len) = matcher.find_match();
1774
1775 if best_len >= 3 {
1776 dst.push(Token::from_match(best_pos, best_len));
1777 matcher.advance(best_len as usize);
1778 } else {
1779 dst.push(Token::from_literal(matcher.get_sym()));
1780 matcher.advance(1);
1781 }
1782 }
1783 dst.push(TOKEN_EOB);
1784 }
1785}
1786
1787struct LazyParser {}
1788impl LZParse for LazyParser {
1789 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1790 dst.reserve(src.len());
1791
1792 let mut matcher = MatchFinder::new(src);
1793 while !matcher.is_end() {
1794 let (best_pos, best_len) = matcher.find_match();
1795 if best_len >= 3 {
1796 let last_sym = matcher.get_sym();
1797 matcher.advance(1);
1798 if !matcher.is_end() {
1799 let (best_pos1, best_len1) = matcher.find_match();
1800 if best_len1 > best_len + 1 {
1801 dst.push(Token::from_literal(last_sym));
1802 dst.push(Token::from_match(best_pos1, best_len1));
1803 matcher.advance(best_len1 as usize);
1804 continue;
1805 }
1806 }
1807 dst.push(Token::from_match(best_pos, best_len));
1808 matcher.advance((best_len - 1) as usize);
1809 } else {
1810 dst.push(Token::from_literal(matcher.get_sym()));
1811 matcher.advance(1);
1812 }
1813 }
1814 dst.push(TOKEN_EOB);
1815 }
1816}
1817
1818#[derive(Clone,Copy)]
1819struct TNode {
1820 price: u32,
1821 dist: u16,
1822 link: usize,
1823}
1824
1825impl Default for TNode {
1826 fn default() -> Self {
1827 Self {
1828 price: std::u32::MAX,
1829 dist: 0,
1830 link: 0,
1831 }
1832 }
1833}
1834
1835struct OptimalParser {
1836 trellis: Vec<TNode>,
1837}
1838impl OptimalParser {
1839 fn new() -> Self { Self::default() }
1840 fn sym_price(_sym: u8) -> u32 { 9 }
1841 fn match_price(dist: u16, _len: u16) -> u32 {
1842 if dist <= 4 {
1843 9 + 5
1844 } else {
1845 let bits = 16 - (dist - 1).leading_zeros();
1846 9 + 3 + bits
1847 }
1848 }
1849}
1850impl Default for OptimalParser {
1851 fn default() -> Self {
1852 Self {
1853 trellis: Vec::with_capacity(WINDOW_SIZE),
1854 }
1855 }
1856}
1857impl LZParse for OptimalParser {
1858 fn parse(&mut self, src: &[u8], dst: &mut Vec<Token>) {
1859 if src.is_empty() {
1860 dst.push(TOKEN_EOB);
1861 return;
1862 }
1863 dst.reserve(src.len());
1864
b191eef3 1865 self.trellis.clear();
f8cdb949
KS
1866 self.trellis.reserve(src.len() + 1);
1867 for _ in 0..=src.len() {
1868 self.trellis.push(TNode::default());
1869 }
1870 self.trellis[0].price = 0;
1871
1872 let mut matcher = MatchFinder::new(src);
1873 for i in 0..self.trellis.len() - 1 {
1874 let mut dists = [0; MAX_MATCH_LEN + 1];
1875 matcher.find_all_matches(&mut dists);
1876
1877 let sym = matcher.get_sym();
1878 let lprice = Self::sym_price(sym) + self.trellis[i].price;
1879 if self.trellis[i + 1].price > lprice {
1880 self.trellis[i + 1].price = lprice;
1881 self.trellis[i + 1].link = i;
1882 }
1883 for (len, &dist) in dists.iter().enumerate() {
1884 if dist != 0 {
1885 let mprice = Self::match_price(dist, len as u16) + self.trellis[i].price;
1886 if self.trellis[i + len].price > mprice {
1887 self.trellis[i + len].price = mprice;
1888 self.trellis[i + len].link = i;
1889 self.trellis[i].dist = dist;
1890 }
1891 }
1892 }
1893
1894 matcher.advance(1);
1895 }
1896 let mut idx = self.trellis.len() - 1;
1897 let mut nidx = self.trellis[idx].link;
1898 while idx > 0 {
1899 let oidx = idx;
1900 idx = nidx;
1901 nidx = self.trellis[idx].link;
1902 self.trellis[idx].link = oidx;
1903 }
1904
1905 let mut idx = 0;
1906 while idx < self.trellis.len() - 1 {
1907 let len = self.trellis[idx].link - idx;
1908 if len == 1 {
1909 dst.push(Token::from_literal(src[idx]));
1910 } else {
1911 dst.push(Token::from_match(self.trellis[idx].dist, len as u16));
1912 }
1913 idx = self.trellis[idx].link;
1914 }
1915
1916 dst.push(TOKEN_EOB);
1917 }
1918}
1919
79fa5fbf 1920/// Deflate compression mode.
e6aaad5c 1921#[derive(Clone,Copy,Debug,PartialEq,Default)]
f8cdb949 1922pub enum DeflateMode {
79fa5fbf 1923 /// No compression.
f8cdb949 1924 NoCompr,
79fa5fbf 1925 /// Fast compression.
f8cdb949 1926 Fast,
79fa5fbf 1927 /// Still fast but better compression.
e6aaad5c 1928 #[default]
f8cdb949 1929 Better,
79fa5fbf 1930 /// Slow but the best compression.
f8cdb949
KS
1931 Best,
1932}
1933
8570a0b3 1934pub const DEFLATE_MODE_DESCRIPTION: &str = "Deflate compression level.";
79fa5fbf 1935/// Deflate option for no compression.
8570a0b3 1936pub const DEFLATE_MODE_NONE: &str = "none";
79fa5fbf 1937/// Deflate option for fast compression.
8570a0b3 1938pub const DEFLATE_MODE_FAST: &str = "fast";
79fa5fbf 1939/// Deflate option for better compression.
8570a0b3 1940pub const DEFLATE_MODE_BETTER: &str = "better";
79fa5fbf 1941/// Deflate option for best compression.
8570a0b3
KS
1942pub const DEFLATE_MODE_BEST: &str = "best";
1943
79fa5fbf 1944/// All possible option values for deflate compression.
8570a0b3
KS
1945pub const DEFLATE_OPTION_VALUES: NAOptionDefinitionType = NAOptionDefinitionType::String(Some(&[DEFLATE_MODE_NONE, DEFLATE_MODE_FAST, DEFLATE_MODE_BETTER, DEFLATE_MODE_BEST]));
1946
1947impl std::str::FromStr for DeflateMode {
1948 type Err = ();
1949
1950 fn from_str(s: &str) -> Result<Self, Self::Err> {
1951 match s {
1952 DEFLATE_MODE_NONE => Ok(DeflateMode::NoCompr),
1953 DEFLATE_MODE_FAST => Ok(DeflateMode::Fast),
1954 DEFLATE_MODE_BETTER => Ok(DeflateMode::Better),
1955 DEFLATE_MODE_BEST => Ok(DeflateMode::Best),
1956 _ => Err(()),
1957 }
1958 }
1959}
1960
1961impl ToString for DeflateMode {
1962 fn to_string(&self) -> String {
1963 match *self {
1964 DeflateMode::NoCompr => DEFLATE_MODE_NONE.to_string(),
1965 DeflateMode::Fast => DEFLATE_MODE_FAST.to_string(),
1966 DeflateMode::Better => DEFLATE_MODE_BETTER.to_string(),
1967 DeflateMode::Best => DEFLATE_MODE_BEST.to_string(),
1968 }
1969 }
1970}
1971
f8cdb949
KS
1972#[derive(Clone,Copy,Debug,PartialEq)]
1973enum Mode {
1974 Copy,
1975 Fixed,
1976 Dynamic,
1977}
1978
1979const MAX_BLOCK_SIZE: usize = 65535;
1980
79fa5fbf 1981/// Deflate stream compressor.
f8cdb949
KS
1982pub struct Deflate {
1983 mode: Mode,
1984 tokens: Vec<Token>,
1985 srcbuf: [u8; MAX_BLOCK_SIZE],
1986 ssize: usize,
1987 sum1: u32,
1988 sum2: u32,
1989 zlib_mode: bool,
9f838842 1990 parser: Box<dyn LZParse + Send>,
f8cdb949
KS
1991}
1992
1993impl Deflate {
79fa5fbf 1994 /// Creates a new instance of `Deflate`.
f8cdb949
KS
1995 pub fn new(mode: DeflateMode) -> Self {
1996 let (mode, parser) = match mode {
9f838842
KS
1997 DeflateMode::NoCompr => (Mode::Copy, Box::new(NoParser{}) as Box<dyn LZParse + Send>),
1998 DeflateMode::Fast => (Mode::Fixed, Box::new(GreedyParser{}) as Box<dyn LZParse + Send>),
1999 DeflateMode::Better => (Mode::Dynamic, Box::new(LazyParser{}) as Box<dyn LZParse + Send>),
2000 DeflateMode::Best => (Mode::Dynamic, Box::new(OptimalParser::new()) as Box<dyn LZParse + Send>),
f8cdb949
KS
2001 };
2002 Self {
2003 mode, parser,
2004 tokens: Vec::with_capacity(MAX_BLOCK_SIZE),
2005 srcbuf: [0; MAX_BLOCK_SIZE],
2006 ssize: 0,
2007 sum1: 1,
2008 sum2: 0,
2009 zlib_mode: false,
2010 }
2011 }
79fa5fbf 2012 /// Writes zlib stream header.
f8cdb949
KS
2013 pub fn write_zlib_header(&mut self, wr: &mut DeflateWriter) {
2014 wr.write(8, 4);
2015 wr.write(7, 4);
2016 let level = match self.mode {
2017 Mode::Copy => 0x01,
2018 Mode::Fixed => 0x5E,
2019 Mode::Dynamic => 0x9C,
2020 // 0xDA for the strongest one
2021 };
2022 wr.write(level, 8);
2023 self.zlib_mode = true;
2024 }
2025 fn write_zlib_footer(&self, wr: &mut DeflateWriter) {
2026 wr.align();
2027 wr.write((self.sum2 >> 8) as u16, 8);
2028 wr.write((self.sum2 & 0xFF) as u16, 8);
2029 wr.write((self.sum1 >> 8) as u16, 8);
2030 wr.write((self.sum1 & 0xFF) as u16, 8);
2031 }
79fa5fbf
KS
2032 /// Queues data for compression.
2033 ///
2034 /// The data might be not actually compressed until [`compress_end`] is called.
2035 ///
2036 /// [`compress_end`]: ./struct.Deflate.html#method.compress_end
f8cdb949
KS
2037 pub fn compress(&mut self, src: &[u8], wr: &mut DeflateWriter) {
2038 let mut src = src;
2039 while !src.is_empty() {
2040 let clen = src.len().min(MAX_BLOCK_SIZE - self.ssize);
2041 let (head, tail) = src.split_at(clen);
2042 src = tail;
2043 self.srcbuf[self.ssize..][..clen].copy_from_slice(head);
2044 self.ssize += clen;
2045 if self.ssize == MAX_BLOCK_SIZE {
2046 self.do_block(wr, false);
2047 }
2048 }
2049 }
79fa5fbf
KS
2050 /// Tells the encoder to finish data compression.
2051 ///
2052 /// Complete data will be output after this call.
f8cdb949
KS
2053 pub fn compress_end(&mut self, wr: &mut DeflateWriter) {
2054 if self.ssize > 0 {
2055 self.do_block(wr, true);
2056 } else {
2057 wr.write(1, 1);
2058 wr.write(1, 2);
2059 wr.write(0, 7); //static EOF sym
2060 }
2061 if self.zlib_mode {
2062 self.write_zlib_footer(wr);
2063 }
2064 }
79fa5fbf 2065 /// Tells the encoder to compress the data it received and flush it.
80da4ff8
KS
2066 pub fn compress_flush(&mut self, wr: &mut DeflateWriter) {
2067 if self.ssize > 0 {
2068 self.do_block(wr, false);
2069 }
2070 if (wr.bits & 7) != 0 {
2071 // write zero-length copy block for byte-alignment
2072 wr.write(0, 1);
2073 wr.write(0, 2);
2074 wr.align();
2075 wr.write(0, 16);
2076 wr.write(0xFFFF, 16);
2077 }
2078 }
f8cdb949
KS
2079 fn do_block(&mut self, wr: &mut DeflateWriter, final_block: bool) {
2080 const CRC_BASE: u32 = 65521;
2081 for &b in self.srcbuf[..self.ssize].iter() {
2082 self.sum1 += u32::from(b);
2083 if self.sum1 >= CRC_BASE {
2084 self.sum1 -= CRC_BASE;
2085 }
2086 self.sum2 += self.sum1;
2087 if self.sum2 >= CRC_BASE {
2088 self.sum2 -= CRC_BASE;
2089 }
2090 }
2091 match self.mode {
2092 Mode::Copy => {
2093 wr.write(final_block as u16, 1);
2094 wr.write(0, 2);
2095 wr.align();
2096 wr.write(self.ssize as u16, 16);
2097 wr.write(!self.ssize as u16, 16);
2098 for &b in self.srcbuf[..self.ssize].iter() {
2099 wr.write(u16::from(b), 8);
2100 }
2101 },
2102 Mode::Fixed => {
2103 wr.write(final_block as u16, 1);
2104 wr.write(1, 2);
b191eef3 2105 self.tokens.clear();
f8cdb949
KS
2106 self.parser.parse(&self.srcbuf[..self.ssize], &mut self.tokens);
2107 let mut codes = CodeHuff::new(true);
2108 codes.make_codes(&self.tokens);
2109 let mut dists = DistHuff::new(true);
2110 dists.make_codes(&self.tokens);
2111 wr.write_tokens(&self.tokens, &codes, &dists);
2112 },
2113 Mode::Dynamic => {
2114 wr.write(final_block as u16, 1);
2115 wr.write(2, 2);
b191eef3 2116 self.tokens.clear();
f8cdb949
KS
2117 self.parser.parse(&self.srcbuf[..self.ssize], &mut self.tokens);
2118 let mut codes = CodeHuff::new(false);
2119 codes.make_codes(&self.tokens);
2120 let mut dists = DistHuff::new(false);
2121 dists.make_codes(&self.tokens);
2122 wr.write((codes.num_codes - 257) as u16, 5);
2123 wr.write((dists.num_codes - 1) as u16, 5);
2124 wr.write_codes(&codes, &dists);
2125 wr.write_tokens(&self.tokens, &codes, &dists);
2126 },
2127 }
2128 self.ssize = 0;
2129 }
2130}
2131
0443d0c5
KS
2132#[cfg(test)]
2133mod test {
2134 use super::*;
2135
2136 #[test]
2137 fn test_inflate1() {
2138 const TEST_DATA: &[u8] = &[
2139 0xF3, 0x48, 0xCD, 0xC9, 0xC9, 0xD7, 0x51, 0x28,
2140 0xCF, 0x2F, 0xCA, 0x49, 0x51, 0x04, 0x00 ];
2141 const TEST_REF: &[u8] = b"Hello, world!";
2142 let mut dst_buf = [0u8; 13];
2143 let len = Inflate::uncompress(TEST_DATA, &mut dst_buf).unwrap();
2144 assert_eq!(len, 13);
2145 for i in 0..len {
2146 assert_eq!(dst_buf[i], TEST_REF[i]);
2147 }
2148 }
2149 #[test]
2150 fn test_inflate2() {
2151 const TEST_DATA3: &[u8] = &[ 0x4B, 0x4C, 0x44, 0x80, 0x24, 0x54, 0x80, 0x2C, 0x06, 0x00 ];
2152 const TEST_REF3: &[u8] = b"aaaaaaaaaaaabbbbbbbbbbbbbbbaaaaabbbbbbb";
2153 let mut dst_buf = [0u8; 39];
2154
2155 let mut inflate = Inflate::new();
2156 let mut output_chunk = [0u8; 7];
2157 let mut output_pos = 0;
2158 for input in TEST_DATA3.chunks(3) {
2159 let mut repeat = false;
2160 loop {
2161 let ret = inflate.decompress_data(input, &mut output_chunk, repeat);
2162 match ret {
2163 Ok(len) => {
2164 for i in 0..len {
2165 dst_buf[output_pos + i] = output_chunk[i];
2166 }
2167 output_pos += len;
2168 break;
2169 },
2170 Err(DecompressError::ShortData) => {
2171 break;
2172 },
2173 Err(DecompressError::OutputFull) => {
2174 repeat = true;
2175 for i in 0..output_chunk.len() {
2176 dst_buf[output_pos + i] = output_chunk[i];
2177 }
2178 output_pos += output_chunk.len();
2179 },
2180 _ => {
2181 panic!("decompress error {:?}", ret.err().unwrap());
2182 },
2183 }
2184 }
2185 }
2186
2187 assert_eq!(output_pos, dst_buf.len());
2188 for i in 0..output_pos {
2189 assert_eq!(dst_buf[i], TEST_REF3[i]);
2190 }
2191 }
2192 #[test]
2193 fn test_inflate3() {
2194 const TEST_DATA: &[u8] = &[
2195 0x1F, 0x8B, 0x08, 0x08, 0xF6, 0x7B, 0x90, 0x5E, 0x02, 0x03, 0x31, 0x2E, 0x74, 0x78, 0x74, 0x00,
2196 0xE5, 0x95, 0x4B, 0x4E, 0xC3, 0x30, 0x10, 0x40, 0xF7, 0x39, 0xC5, 0x1C, 0x00, 0x16, 0x70, 0x83,
2197 0x0A, 0xB5, 0x3B, 0xE8, 0x82, 0x5E, 0x60, 0x1A, 0x4F, 0xE2, 0x11, 0xFE, 0x44, 0x1E, 0xA7, 0x69,
2198 0x6E, 0xCF, 0x38, 0xDD, 0xB0, 0x40, 0xA2, 0x46, 0x2D, 0x20, 0x2A, 0xE5, 0xAB, 0xCC, 0xE7, 0xBD,
2199 0x49, 0xAC, 0x6C, 0x03, 0x64, 0x4B, 0xD0, 0x71, 0x92, 0x0C, 0x06, 0x67, 0x88, 0x1D, 0x3C, 0xD9,
2200 0xC4, 0x92, 0x3D, 0x4A, 0xF3, 0x3C, 0x43, 0x4E, 0x23, 0x81, 0x8B, 0x07, 0x82, 0x1E, 0xF5, 0x90,
2201 0x23, 0x78, 0x6A, 0x56, 0x30, 0x60, 0xCA, 0x89, 0x4D, 0x4F, 0xC0, 0x01, 0x10, 0x06, 0xC2, 0xA4,
2202 0xA1, 0x44, 0xCD, 0xF6, 0x54, 0x50, 0xA8, 0x8D, 0xC1, 0x9C, 0x5F, 0x71, 0x37, 0x45, 0xC8, 0x63,
2203 0xCA, 0x8E, 0xC0, 0xE8, 0x23, 0x69, 0x56, 0x9A, 0x8D, 0x5F, 0xB6, 0xC9, 0x96, 0x53, 0x4D, 0x17,
2204 0xAB, 0xB9, 0xB0, 0x49, 0x14, 0x5A, 0x0B, 0x96, 0x82, 0x7C, 0xB7, 0x6F, 0x17, 0x35, 0xC7, 0x9E,
2205 0xDF, 0x78, 0xA3, 0xF1, 0xD0, 0xA2, 0x73, 0x1C, 0x7A, 0xD8, 0x2B, 0xB3, 0x5C, 0x90, 0x85, 0xBB,
2206 0x2A, 0x14, 0x2E, 0xF7, 0xD1, 0x19, 0x48, 0x0A, 0x23, 0x57, 0x45, 0x13, 0x3E, 0xD6, 0xA0, 0xBD,
2207 0xF2, 0x11, 0x7A, 0x22, 0x21, 0xAD, 0xE5, 0x70, 0x56, 0xA0, 0x9F, 0xA5, 0xA5, 0x03, 0x85, 0x2A,
2208 0xDE, 0x92, 0x00, 0x32, 0x61, 0x10, 0xAD, 0x27, 0x13, 0x7B, 0x5F, 0x98, 0x7F, 0x59, 0x83, 0xB8,
2209 0xB7, 0x35, 0x16, 0xEB, 0x12, 0x0F, 0x1E, 0xD9, 0x14, 0x0B, 0xCF, 0xEE, 0x6D, 0x91, 0xF8, 0x93,
2210 0x6E, 0x81, 0x3F, 0x7F, 0x41, 0xA4, 0x22, 0x1F, 0xB7, 0xE6, 0x85, 0x83, 0x9A, 0xA2, 0x61, 0x12,
2211 0x0D, 0x0F, 0x6D, 0x01, 0xBD, 0xB0, 0xE8, 0x1D, 0xEC, 0xD1, 0xA0, 0xBF, 0x1F, 0x4E, 0xFB, 0x55,
2212 0xBD, 0x73, 0xDD, 0x87, 0xB9, 0x53, 0x23, 0x17, 0xD3, 0xE2, 0xE9, 0x08, 0x87, 0x42, 0xFF, 0xCF,
2213 0x26, 0x42, 0xAE, 0x76, 0xB5, 0xAE, 0x97, 0x0C, 0x18, 0x78, 0xA0, 0x24, 0xE5, 0x54, 0x0C, 0x6E,
2214 0x60, 0x52, 0x79, 0x22, 0x57, 0xF5, 0x87, 0x78, 0x78, 0x04, 0x93, 0x46, 0xEF, 0xCB, 0x98, 0x96,
2215 0x8B, 0x65, 0x00, 0xB7, 0x36, 0xBD, 0x77, 0xA8, 0xBD, 0x5A, 0xAA, 0x1A, 0x09, 0x00, 0x00
2216 ];
2217
2218 let mut mr = MemoryReader::new_read(TEST_DATA);
2219 let mut br = ByteReader::new(&mut mr);
2220 let _dst_buf = gzip_decode(&mut br, false).unwrap();
2221
2222// println!("{}", String::from_utf8_lossy(_dst_buf.as_slice()));
2223 }
f8cdb949
KS
2224 #[test]
2225 fn test_deflate_crc() {
2226 let output = Vec::with_capacity(20);
2227 let mut writer = DeflateWriter::new(output);
2228 let mut compr = Deflate::new(DeflateMode::NoCompr);
2229 compr.write_zlib_header(&mut writer);
2230 compr.compress(b"Hello, world!", &mut writer);
2231 compr.compress_end(&mut writer);
2232 let output = writer.end();
2233 assert_eq!(output.as_slice(), b"\x78\x01\x01\x0D\x00\xF2\xFFHello, world!\x20\x5E\x04\x8A");
2234 }
2235 fn deflate_test(mode: DeflateMode) {
2236 const SRC: &[u8] =
2237b"The first day of Christmas,
2238My true love sent to me
2239A partridge in a pear tree.
2240
2241The second day of Christmas,
2242My true love sent to me
2243Two turtle doves, and
2244A partridge in a pear tree.
2245
2246The third day of Christmas,
2247My true love sent to me
2248Three French hens,
2249Two turtle doves, and
2250A partridge in a pear tree.
2251
2252The fourth day of Christmas,
2253My true love sent to me
2254Four colly birds,
2255Three French hens,
2256Two turtle doves, and
2257A partridge in a pear tree.
2258
2259The fifth day of Christmas,
2260My true love sent to me
2261Five gold rings,
2262Four colly birds,
2263Three French hens,
2264Two turtle doves, and
2265A partridge in a pear tree.";
2266 let output = Vec::with_capacity(SRC.len() + 16);
2267 let mut writer = DeflateWriter::new(output);
2268 let mut compr = Deflate::new(mode);
2269 compr.write_zlib_header(&mut writer);
2270 compr.compress(SRC, &mut writer);
2271 compr.compress_end(&mut writer);
2272 let output = writer.end();
2273 let mut uncompr = vec![0u8; SRC.len()];
2274 Inflate::uncompress(&output, &mut uncompr).unwrap();
2275 assert_eq!(SRC, uncompr.as_slice());
2276 }
2277 #[test]
2278 fn test_deflate_fast() {
2279 deflate_test(DeflateMode::Fast);
2280 }
2281 #[test]
2282 fn test_deflate_better() {
2283 deflate_test(DeflateMode::Better);
2284 }
2285 #[test]
2286 fn test_deflate_best() {
2287 deflate_test(DeflateMode::Best);
2288 }
0443d0c5 2289}