X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=nihav-core%2Fsrc%2Fcompr%2Fdeflate.rs;h=9277731d2301b02fc212137b637f4c53aa095441;hb=e6aaad5c5273cd814b5748b7faf3751835a37217;hp=71e0dfed331d8292ac9675d74e06a1c77c8a67ef;hpb=6f2630992fe340ad1a122ec10c649f756e478185;p=nihav.git diff --git a/nihav-core/src/compr/deflate.rs b/nihav-core/src/compr/deflate.rs index 71e0dfe..9277731 100644 --- a/nihav-core/src/compr/deflate.rs +++ b/nihav-core/src/compr/deflate.rs @@ -172,6 +172,23 @@ impl<'a> CurrentSource<'a> { } Ok(()) } + fn skip_bytes(&mut self, nbytes: usize) -> BitReaderResult<()> { + self.align(); + let cached = usize::from(self.br.bits / 8); + if nbytes <= cached { + self.skip((nbytes as u32) * 8)?; + } else { + self.skip((cached as u32) * 8)?; + self.br.bits = 0; + self.br.bitbuf = 0; + self.br.pos += nbytes - cached; + if self.br.pos > self.src.len() { + return Err(BitReaderError::BitstreamEnd); + } + self.refill(); + } + Ok(()) + } fn align(&mut self) { let b = self.br.bits & 7; if b != 0 { @@ -181,6 +198,9 @@ impl<'a> CurrentSource<'a> { fn left(&self) -> isize { ((self.src.len() as isize) - (self.br.pos as isize)) * 8 + (self.br.bits as isize) } + fn tell(&self) -> usize { + self.br.pos - usize::from(self.br.bits / 8) + } } impl<'a, S: Copy> CodebookReader for CurrentSource<'a> { @@ -190,7 +210,7 @@ impl<'a, S: Copy> CodebookReader for CurrentSource<'a> { let mut lut_bits = cb.lut_bits; let orig_br = self.br; while esc { - let lut_idx = (self.peek(lut_bits) as usize) + (idx as usize); + let lut_idx = (self.peek(lut_bits) as usize) + idx; if cb.table[lut_idx] == TABLE_FILL_VALUE { return Err(CodebookError::InvalidCode); } let bits = cb.table[lut_idx] & 0x7F; esc = (cb.table[lut_idx] & 0x80) != 0; @@ -201,7 +221,7 @@ impl<'a, S: Copy> CodebookReader for CurrentSource<'a> { self.refill(); return Err(CodebookError::MemoryError); } - self.skip(skip_bits as u32).unwrap(); + self.skip(skip_bits).unwrap(); lut_bits = bits as u8; } Ok(cb.syms[idx]) @@ -770,10 +790,211 @@ impl Inflate { self.state = InflateState::Start; } + #[allow(clippy::comparison_chain)] ///! Decompresses input data into output returning the uncompressed data length. pub fn uncompress(src: &[u8], dst: &mut [u8]) -> DecompressResult { - let mut inflate = Self::new(); - inflate.decompress_data(src, dst, false) + let mut csrc = CurrentSource::new(src, BitReaderState::default()); + if src.len() > 2 { + let cm = src[0] & 0xF; + let cinfo = src[0] >> 4; + let hdr = (u16::from(src[0]) << 8) | u16::from(src[1]); + if cm == 8 && cinfo <= 7 && (hdr % 31) == 0 { + csrc.skip(16).unwrap(); + } + } + + let mut fix_len_cb = None; + + let mut dst_idx = 0; + let mut final_block = false; + while !final_block { + final_block = csrc.read_bool()?; + + let bmode = csrc.read(2)?; + match bmode { + 0 => { + csrc.align(); + let len = csrc.read(16)? as usize; + let inv_len = csrc.read(16)? as usize; + if (len ^ inv_len) != 0xFFFF { + return Err(DecompressError::InvalidHeader); + } + let src_pos = csrc.tell(); + if src_pos + len > src.len() { + return Err(DecompressError::ShortData); + } + if dst_idx + len > dst.len() { + return Err(DecompressError::OutputFull); + } + dst[dst_idx..][..len].copy_from_slice(&src[src_pos..][..len]); + dst_idx += len; + csrc.skip_bytes(len)?; + }, + 1 => { + if fix_len_cb.is_none() { + let mut cr = FixedLenCodeReader {}; + fix_len_cb = Some(Codebook::new(&mut cr, CodebookMode::LSB).unwrap()); + } + if let Some(ref len_cb) = &fix_len_cb { + loop { + let val = csrc.read_cb(len_cb)?; + if val < 256 { + if dst_idx >= dst.len() { + return Err(DecompressError::OutputFull); + } + dst[dst_idx] = val as u8; + dst_idx += 1; + } else if val == 256 { + break; + } else { + let len_idx = (val - 257) as usize; + if len_idx >= LENGTH_BASE.len() { + return Err(DecompressError::InvalidData); + } + let len_bits = LENGTH_ADD_BITS[len_idx]; + let mut length = LENGTH_BASE[len_idx] as usize; + if len_bits > 0 { + length += csrc.read(len_bits)? as usize; + } + let dist_idx = reverse_bits(csrc.read(5)?, 5) as usize; + if dist_idx >= DIST_BASE.len() { + return Err(DecompressError::InvalidData); + } + let dist_bits = DIST_ADD_BITS[dist_idx]; + let mut dist = DIST_BASE[dist_idx] as usize; + if dist_bits > 0 { + dist += csrc.read(dist_bits)? as usize; + } + + if dst_idx + length > dst.len() { + return Err(DecompressError::OutputFull); + } + if dist > dst_idx { + return Err(DecompressError::InvalidData); + } + lz_copy(dst, dst_idx, dist, length); + dst_idx += length; + } + } + } else { + unreachable!(); + } + }, + 2 => { + let hlit = csrc.read(5)? as usize + 257; + if hlit >= 287 { + return Err(DecompressError::InvalidHeader); + } + let hdist = csrc.read(5)? as usize + 1; + let hclen = csrc.read(4)? as usize + 4; + let mut len_lengths = [0; 19]; + let mut all_lengths = [0; NUM_LITERALS + NUM_DISTS]; + + for cur_len_idx in 0..hclen { + len_lengths[LEN_RECODE[cur_len_idx]] = csrc.read(3)? as u8; + } + let mut len_codes = [ShortCodebookDesc { code: 0, bits: 0 }; 19]; + lengths_to_codes(&len_lengths, &mut len_codes)?; + let mut cr = ShortCodebookDescReader::new(len_codes.to_vec()); + let ret = Codebook::new(&mut cr, CodebookMode::LSB); + if ret.is_err() { + return Err(DecompressError::InvalidHeader); + } + let dyn_len_cb = ret.unwrap(); + + let mut cur_len_idx = 0; + while cur_len_idx < hlit + hdist { + let val = csrc.read_cb(&dyn_len_cb)?; + if val < 16 { + all_lengths[cur_len_idx] = val as u8; + cur_len_idx += 1; + } else { + let mode = (val as usize) - 16; + if mode > 2 { + return Err(DecompressError::InvalidHeader); + } + let base = REPEAT_BASE[mode] as usize; + let bits = REPEAT_BITS[mode]; + let len = base + (csrc.read(bits)? as usize); + if cur_len_idx + len > hlit + hdist { + return Err(DecompressError::InvalidHeader); + } + let rpt = if mode == 0 { + if cur_len_idx == 0 { + return Err(DecompressError::InvalidHeader); + } + all_lengths[cur_len_idx - 1] + } else { + 0 + }; + for _ in 0..len { + all_lengths[cur_len_idx] = rpt; + cur_len_idx += 1; + } + } + } + let (lit_lengths, dist_lengths) = all_lengths.split_at(hlit); + + let mut lit_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_LITERALS]; + lengths_to_codes(lit_lengths, &mut lit_codes)?; + let mut cr = ShortCodebookDescReader::new(lit_codes.to_vec()); + let ret = Codebook::new(&mut cr, CodebookMode::LSB); + if ret.is_err() { return Err(DecompressError::InvalidHeader); } + let dyn_lit_cb = ret.unwrap(); + + let mut dist_codes = [ShortCodebookDesc { code: 0, bits: 0 }; NUM_DISTS]; + lengths_to_codes(&dist_lengths[..hdist], &mut dist_codes)?; + let mut cr = ShortCodebookDescReader::new(dist_codes.to_vec()); + let ret = Codebook::new(&mut cr, CodebookMode::LSB); + if ret.is_err() { return Err(DecompressError::InvalidHeader); } + let dyn_dist_cb = ret.unwrap(); + + loop { + let val = csrc.read_cb(&dyn_lit_cb)?; + if val < 256 { + if dst_idx >= dst.len() { + return Err(DecompressError::OutputFull); + } + dst[dst_idx] = val as u8; + dst_idx += 1; + } else if val == 256 { + break; + } else { + let len_idx = (val - 257) as usize; + if len_idx >= LENGTH_BASE.len() { + return Err(DecompressError::InvalidData); + } + let len_bits = LENGTH_ADD_BITS[len_idx]; + let mut length = LENGTH_BASE[len_idx] as usize; + if len_bits > 0 { + length += csrc.read(len_bits)? as usize; + } + + let dist_idx = csrc.read_cb(&dyn_dist_cb)? as usize; + if dist_idx >= DIST_BASE.len() { + return Err(DecompressError::InvalidData); + } + let dist_bits = DIST_ADD_BITS[dist_idx]; + let mut dist = DIST_BASE[dist_idx] as usize; + if dist_bits > 0 { + dist += csrc.read(dist_bits)? as usize; + } + + if dst_idx + length > dst.len() { + return Err(DecompressError::OutputFull); + } + if dist > dst_idx { + return Err(DecompressError::InvalidData); + } + lz_copy(dst, dst_idx, dist, length); + dst_idx += length; + } + } + }, + _ => return Err(DecompressError::InvalidHeader), + }; + } + Ok(dst_idx) } } @@ -1362,25 +1583,25 @@ fn gen_tree(codes: &mut [u16], lens: &mut [u8], num_codes: &mut usize, stats: &m *num_codes = 0; return; } - while tot_w > (1 << max_bits) { - for w in stats.iter_mut() { - *w = (*w + 1) >> 1; - } - tot_w = 0; - for &w in stats.iter() { - tot_w += w; + loop { + let mut tree = Tree::new(); + for (sym, &w) in stats.iter().enumerate() { + tree.insert(Node{ sym: sym as u16, w: w as u16, idx0: 64000, idx1: 64000 }); } - } - let mut tree = Tree::new(); - for (sym, &w) in stats.iter().enumerate() { - tree.insert(Node{ sym: sym as u16, w: w as u16, idx0: 64000, idx1: 64000 }); - } - tree.trim(); - tree.build(); + tree.trim(); + tree.build(); - for n in tree.nodes[..tree.nnodes].iter() { - if n.sym != NODE_SYM { - lens[n.sym as usize] = n.w as u8; + for n in tree.nodes[..tree.nnodes].iter() { + if n.sym != NODE_SYM { + lens[n.sym as usize] = n.w as u8; + } + } + if !lens.iter().any(|&x| x > max_bits) { + break; + } else { + for w in stats.iter_mut() { + *w = (*w + 1) >> 1; + } } } lengths_to_codes16(lens, codes); @@ -1449,110 +1670,92 @@ fn check_match(src1: &[u8], src2: &[u8]) -> u16 { const HASH_SIZE: usize = 4096; const MAX_MATCH_LEN: usize = 258; const WINDOW_SIZE: usize = 32768 - MAX_MATCH_LEN; -const NONEXT: usize = WINDOW_SIZE * 2; +const WINDOW_MASK: usize = 32767; struct MatchFinder<'a> { src: &'a [u8], pos: usize, hstart: [usize; HASH_SIZE], - hend: [usize; HASH_SIZE], - hnext: [usize; WINDOW_SIZE * 3], + hnext: [usize; WINDOW_MASK + 1], } impl<'a> MatchFinder<'a> { fn new(src: &'a [u8]) -> Self { - let mut obj = Self { + Self { src, pos: 0, - hstart: [0; HASH_SIZE], - hend: [0; HASH_SIZE], - hnext: [0; WINDOW_SIZE * 3], - }; - obj.build_hash(); - obj + hstart: [src.len(); HASH_SIZE], + hnext: [src.len(); WINDOW_MASK + 1], + } } fn hash(src: &[u8]) -> usize { let _ = src[2]; (((u16::from(src[0]) << 10) ^ (u16::from(src[1]) << 5) ^ u16::from(src[2])) & ((HASH_SIZE as u16) - 1)) as usize } - fn build_hash(&mut self) { - for el in self.hstart.iter_mut() { *el = NONEXT; } - for el in self.hend.iter_mut() { *el = NONEXT; } - for el in self.hnext.iter_mut() { *el = NONEXT; } - if self.pos + 3 >= self.src.len() { - return; - } - let end = (self.src.len() - 3).min(self.pos + NONEXT); - for i in (self.pos .. end).rev() { - let key = Self::hash(&self.src[i..]); - if self.hstart[key] == NONEXT { - self.hstart[key] = i; - self.hend[key] = i; - self.hnext[key] = NONEXT; - } else { - self.hnext[self.hend[key]] = i; - self.hend[key] = i; - } - } + fn add_hash(&mut self, hash: usize) { + self.hnext[self.pos & WINDOW_MASK] = self.hstart[hash]; + self.hstart[hash] = self.pos; + self.hnext[(self.pos + 1) & WINDOW_MASK] = self.src.len(); } fn find_match(&mut self) -> (u16, u16) { if self.pos == 0 || self.pos + 3 > self.src.len() { return (0, 0); } - let key = Self::hash(&self.src[self.pos..]) as usize; + let key = Self::hash(&self.src[self.pos..]); let mut best_pos = 0; let mut best_len = 0; let mut idx = self.hstart[key]; - while idx != NONEXT && idx + WINDOW_SIZE > self.pos { - if idx < self.pos { - let cur_len = check_match(&self.src[self.pos..], &self.src[idx..]); - if cur_len > best_len { - best_len = cur_len; - best_pos = self.pos - idx; - if best_len >= (MAX_MATCH_LEN as u16) { - return (best_pos as u16, MAX_MATCH_LEN as u16); - } + let search_end = self.pos.saturating_sub(WINDOW_SIZE); + let mut tries = 0; + while idx >= search_end && idx < self.pos { + let cur_len = check_match(&self.src[self.pos..], &self.src[idx..]); + if cur_len > best_len { + best_len = cur_len; + best_pos = self.pos - idx; + if best_len >= (MAX_MATCH_LEN as u16) { + return (best_pos as u16, MAX_MATCH_LEN as u16); } } - idx = self.hnext[idx]; + tries += 1; + if tries > 16 { + break; + } + idx = self.hnext[idx & WINDOW_MASK]; } + self.add_hash(key); (best_pos as u16, best_len) } fn find_all_matches(&mut self, dists: &mut [u16; MAX_MATCH_LEN + 1]) { if self.pos == 0 || self.pos + 3 > self.src.len() { return; } - let key = Self::hash(&self.src[self.pos..]) as usize; + let key = Self::hash(&self.src[self.pos..]); let mut idx = self.hstart[key]; - while idx != NONEXT && idx + WINDOW_SIZE > self.pos { - if idx < self.pos { - let cur_len = (check_match(&self.src[self.pos..], &self.src[idx..]) as usize).min(MAX_MATCH_LEN); - if cur_len > 0 && dists[cur_len] == 0 { - dists[cur_len] = (self.pos - idx) as u16; - } + let search_end = self.pos.saturating_sub(WINDOW_SIZE); + let mut tries = 0; + while idx >= search_end && idx < self.pos { + let cur_len = (check_match(&self.src[self.pos..], &self.src[idx..]) as usize).min(MAX_MATCH_LEN); + if cur_len > 0 && dists[cur_len] == 0 { + dists[cur_len] = (self.pos - idx) as u16; + } + idx = self.hnext[idx & WINDOW_MASK]; + tries += 1; + if tries > 128 { + break; } - idx = self.hnext[idx]; } + self.add_hash(key); } fn advance(&mut self, num: usize) { - self.pos += num; - - if self.pos >= NONEXT { - let (_, tail) = self.src.split_at(self.pos - WINDOW_SIZE); - self.src = tail; - self.pos = WINDOW_SIZE; - self.build_hash(); - } else { - for (start, end) in self.hstart.iter_mut().zip(self.hend.iter_mut()) { - let mut idx = *start; - while idx != NONEXT && idx + WINDOW_SIZE < self.pos { - idx = self.hnext[idx]; - *start = idx; - } - if idx == NONEXT { - *end = NONEXT; + self.pos += 1; + if num > 1 { + for _ in 1..num { + if self.pos + 3 <= self.src.len() { + let key = Self::hash(&self.src[self.pos..]); + self.add_hash(key); } + self.pos += 1; } } } @@ -1715,22 +1918,19 @@ impl LZParse for OptimalParser { } ///! Deflate compression mode. -#[derive(Clone,Copy,Debug,PartialEq)] +#[derive(Clone,Copy,Debug,PartialEq,Default)] pub enum DeflateMode { ///! No compression. NoCompr, ///! Fast compression. Fast, ///! Still fast but better compression. + #[default] Better, ///! Slow but the best compression. Best, } -impl Default for DeflateMode { - fn default() -> Self { DeflateMode::Better } -} - pub const DEFLATE_MODE_DESCRIPTION: &str = "Deflate compression level."; ///! Deflate option for no compression. pub const DEFLATE_MODE_NONE: &str = "none";