From 217de10bc01258d3a479c2784d6c5542c30b0f63 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Sat, 30 Sep 2023 11:40:32 +0200 Subject: [PATCH] Bink Video encoder (only 'b' version for now) --- nihav-rad/Cargo.toml | 12 +- nihav-rad/src/codecs/binkvidenc.rs | 930 ++++++++++++++++++++++ nihav-rad/src/codecs/binkvidenc/bundle.rs | 233 ++++++ nihav-rad/src/codecs/binkvidenc/dsp.rs | 598 ++++++++++++++ nihav-rad/src/codecs/binkvidenc/mc.rs | 79 ++ nihav-rad/src/codecs/binkvidenc/rc.rs | 112 +++ nihav-rad/src/codecs/mod.rs | 19 +- nihav-rad/src/lib.rs | 2 + 8 files changed, 1982 insertions(+), 3 deletions(-) create mode 100644 nihav-rad/src/codecs/binkvidenc.rs create mode 100644 nihav-rad/src/codecs/binkvidenc/bundle.rs create mode 100644 nihav-rad/src/codecs/binkvidenc/dsp.rs create mode 100644 nihav-rad/src/codecs/binkvidenc/mc.rs create mode 100644 nihav-rad/src/codecs/binkvidenc/rc.rs diff --git a/nihav-rad/Cargo.toml b/nihav-rad/Cargo.toml index d155427..23687dc 100644 --- a/nihav-rad/Cargo.toml +++ b/nihav-rad/Cargo.toml @@ -11,8 +11,11 @@ path = "../nihav-core" path = "../nihav-codec-support" features = ["dct", "fft"] +[dev-dependencies] +nihav_commonfmt = { path = "../nihav-commonfmt", default-features=false, features = ["demuxer_y4m", "decoder_rawvideo"] } + [features] -default = ["all_decoders", "all_demuxers"] +default = ["all_decoders", "all_demuxers", "all_encoders"] demuxers = [] all_demuxers = ["demuxer_smk", "demuxer_bink"] demuxer_smk = ["demuxers"] @@ -28,4 +31,9 @@ decoder_bink2 = ["decoders"] all_audio_decoders = ["decoder_smkaud", "decoder_binkaud"] decoder_binkaud = ["decoders"] -decoder_smkaud = ["decoders"] \ No newline at end of file +decoder_smkaud = ["decoders"] + +encoders = [] +all_encoders = ["encoder_binkvid", "encoder_binkaud"] +encoder_binkaud = ["encoders"] +encoder_binkvid = ["encoders"] \ No newline at end of file diff --git a/nihav-rad/src/codecs/binkvidenc.rs b/nihav-rad/src/codecs/binkvidenc.rs new file mode 100644 index 0000000..c84b115 --- /dev/null +++ b/nihav-rad/src/codecs/binkvidenc.rs @@ -0,0 +1,930 @@ +use nihav_core::codecs::*; +use nihav_core::io::bitwriter::*; + +use super::binkviddata::*; + +mod bundle; +use bundle::*; +mod dsp; +use dsp::*; +mod mc; +use mc::*; +mod rc; +use rc::*; + +const THRESHOLD: u32 = 64; +const MAX_DIST: u32 = std::u32::MAX; + +#[derive(Clone,Copy,Default,PartialEq)] +enum DoublingMode { + #[default] + None, + Height2X, + HeightIlace, + Scale2X, + Width2X, + Width2XIlace +} + +impl std::string::ToString for DoublingMode { + fn to_string(&self) -> String { + match *self { + DoublingMode::None => "none".to_string(), + DoublingMode::Height2X => "height2x".to_string(), + DoublingMode::HeightIlace => "height_il".to_string(), + DoublingMode::Scale2X => "scale2x".to_string(), + DoublingMode::Width2X => "width2x".to_string(), + DoublingMode::Width2XIlace => "width2x_il".to_string(), + } + } +} + +#[allow(dead_code)] +#[derive(Clone,Copy,Debug,PartialEq)] +enum BlockMode { + Skip, + Scaled, + Run, + Intra, + Residue, + Inter, + Fill, + Pattern, + Motion, + Raw, +} + +const BLOCK_MODE_NAMES: &[&str] = &[ + "skip", "run", "intra", "residue", "inter", "fill", "pattern", "motion", "raw", "scaled" +]; + +impl From for usize { + fn from(bmode: BlockMode) -> usize { + match bmode { + BlockMode::Skip => 0, + BlockMode::Run => 1, + BlockMode::Intra => 2, + BlockMode::Residue => 3, + BlockMode::Inter => 4, + BlockMode::Fill => 5, + BlockMode::Pattern => 6, + BlockMode::Motion => 7, + BlockMode::Raw => 8, + BlockMode::Scaled => 9, + } + } +} + +impl std::str::FromStr for BlockMode { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "skip" => Ok(BlockMode::Skip), + "run" => Ok(BlockMode::Run), + "intra" => Ok(BlockMode::Intra), + "residue" => Ok(BlockMode::Residue), + "inter" => Ok(BlockMode::Inter), + "fill" => Ok(BlockMode::Fill), + "pattern" => Ok(BlockMode::Pattern), + "motion" => Ok(BlockMode::Motion), + "raw" => Ok(BlockMode::Raw), + "scaled" => Ok(BlockMode::Scaled), + _ => Err(()), + } + } +} + +impl std::string::ToString for BlockMode { + fn to_string(&self) -> String { BLOCK_MODE_NAMES[usize::from(*self)].to_string() } +} + +impl BlockMode { + fn to_code_b(self) -> u8 { + match self { + BlockMode::Skip => 0, + BlockMode::Run => 1, + BlockMode::Intra => 2, + BlockMode::Residue => 3, + BlockMode::Inter => 4, + BlockMode::Fill => 5, + BlockMode::Pattern => 6, + BlockMode::Motion => 7, + BlockMode::Raw => 8, + _ => unreachable!(), + } + } +} + +fn close_enough(base: u8, thr: u8, pix: u8) -> bool { + pix >= base.saturating_sub(thr) && pix <= base.saturating_add(thr) +} + +fn write_run(tokens: &mut BlockTokens, pos: usize, len: usize, is_run: bool, clrs: &[u8; 64], is_b: bool) { + tokens.other.push((is_run as u16, 1)); + if is_b { + tokens.other.push(((len - 1) as u16, BINKB_RUN_BITS[pos])); + } else { + unimplemented!(); + } + if is_run { + tokens.colors.push(clrs[pos]); + } else { + tokens.colors.extend_from_slice(&clrs[pos..][..len]); + } +} + +fn find_run_pattern(cur_blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, thr: u8) -> u32 { + tokens.clear(); + let mut best_diff = MAX_DIST; + let mut clrs = [0; 64]; + for (id, pattern) in BINK_PATTERNS.iter().enumerate() { + tmp_tok.clear(); + tmp_tok.other.push((id as u16, 4)); + + for (dst, &idx) in clrs.iter_mut().zip(pattern.iter()) { + *dst = cur_blk[usize::from(idx)]; + } + + let mut cur_diff = 0; + let mut last_val = 0; + let mut last_pos = 0; + let mut len = 0; + let mut is_run = false; + + for (i, &val) in clrs.iter().enumerate() { + if len > 0 && close_enough(last_val, thr, val) { + if is_run || len == 1 { + is_run = true; + len += 1; + } else { + write_run(tmp_tok, last_pos, len - 1, is_run, &clrs, is_b); + last_pos = i - 1; + last_val = clrs[last_pos]; + len = 2; + } + cur_diff += pix_dist(last_val, val); + } else { + if len > 0 { + write_run(tmp_tok, last_pos, len, is_run, &clrs, is_b); + } + last_pos = i; + last_val = val; + len = 1; + is_run = false; + } + } + match len { + 0 => {}, + 1 => tmp_tok.colors.push(last_val), + _ => write_run(tmp_tok, last_pos, len, is_run, &clrs, is_b), + }; + + if cur_diff < best_diff { + best_diff = cur_diff; + std::mem::swap(tokens, tmp_tok); + } + } + best_diff +} + +fn find_pattern(cur_blk: &[u8; 64], tokens: &mut BlockTokens) -> u32 { + let sum = cur_blk.iter().fold(0u16, + |acc, &a| acc + u16::from(a)); + let avg = ((sum + 32) >> 6) as u8; + let mut sum_ba = 0u16; + let mut sum_aa = 0u16; + let mut cnt_ba = 0u16; + let mut cnt_aa = 0u16; + for &pix in cur_blk.iter() { + if pix < avg { + sum_ba += u16::from(pix); + cnt_ba += 1; + } else { + sum_aa += u16::from(pix); + cnt_aa += 1; + } + } + if cnt_ba > 0 { // not flat + let clr_ba = ((sum_ba + cnt_ba / 2) / cnt_ba) as u8; + let clr_aa = ((sum_aa + cnt_aa / 2) / cnt_aa) as u8; + tokens.clear(); + tokens.colors.push(clr_ba); + tokens.colors.push(clr_aa); + let mut diff = 0; + for row in cur_blk.chunks_exact(8) { + let mut pat = 0; + let mut mask = 1; + for &p in row.iter() { + if p < avg { + diff += pix_dist(p, clr_ba); + } else { + pat |= mask; + diff += pix_dist(p, clr_aa); + } + mask <<= 1; + } + tokens.pattern.push(pat); + } + diff + } else { + MAX_DIST + } +} + +struct BinkEncoder { + stream: Option, + pkt: Option, + cur_frm: NAVideoBufferRef, + last_frm: NAVideoBufferRef, + scale_mode: DoublingMode, + version: char, + bundles: Bundles, + nframes: u64, + frame_no: u64, + + bst_tokens: BlockTokens, + tmp_tokens: BlockTokens, + tmp_tok2: BlockTokens, + + dsp: DSP, + rc: RateControl, + + forbidden: [bool; 12], + + print_stats: bool, + blk_stats: [usize; 12], + tot_size: usize, + iq_stats: [usize; 16], + pq_stats: [usize; 16], +} + +impl BinkEncoder { + fn new() -> Self { + let frm = alloc_video_buffer(NAVideoInfo::new(4, 4, false, YUV420_FORMAT), 0).unwrap(); + let cur_frm = frm.get_vbuf().unwrap(); + let last_frm = cur_frm.clone(); + Self { + stream: None, + pkt: None, + cur_frm, last_frm, + scale_mode: DoublingMode::default(), + version: 'b', + bundles: Bundles::default(), + nframes: 0, + frame_no: 0, + + bst_tokens: BlockTokens::new(), + tmp_tokens: BlockTokens::new(), + tmp_tok2: BlockTokens::new(), + + dsp: DSP::new(), + rc: RateControl::new(), + + forbidden: [false; 12], + + print_stats: false, + blk_stats: [0; 12], + tot_size: 0, + iq_stats: [0; 16], + pq_stats: [0; 16], + } + } + fn encode_frame(&mut self, bw: &mut BitWriter, srcbuf: &NAVideoBuffer) -> EncoderResult { + let frm = NASimpleVideoFrame::from_video_buf(&mut self.cur_frm).unwrap(); + let src = srcbuf.get_data(); + let last = self.last_frm.get_data(); + + let is_b = self.version == 'b'; + let first_frame = self.frame_no == 1; + let pat_run_thr = self.rc.pattern_run_threshold(); + + let mut cur_blk = [0; 64]; + let mut mv_blk = [0; 64]; + let mut is_intra = true; + let mut cur_forbidden = self.forbidden; + self.rc.modify_forbidden_btypes(&mut cur_forbidden); + self.dsp.set_quant_ranges(self.rc.get_quant_ranges()); + for plane in 0..3 { + let loff = self.last_frm.get_offset(plane); + let soff = srcbuf.get_offset(plane); + let doff = frm.offset[plane]; + let lstride = self.last_frm.get_stride(plane); + let sstride = srcbuf.get_stride(plane); + let dstride = frm.stride[plane]; + + let cur_w = (frm.width[plane] + 7) & !7; + let cur_h = (frm.height[plane] + 7) & !7; + let dst = &mut frm.data[doff..]; + let src = &src[soff..]; + let last = &last[loff..]; + + if is_b { + // copy last frame as motion search is performed on partially updated frame + for (dline, sline) in dst.chunks_mut(dstride).zip(last.chunks(lstride)).take(cur_h) { + dline[..cur_w].copy_from_slice(&sline[..cur_w]); + } + } + + self.bundles.reset(); + for (row, stripe) in src.chunks(sstride * 8).take(cur_h / 8).enumerate() { + self.bundles.new_row(row); + let y = row * 8; + for x in (0..cur_w).step_by(8) { + for (dst, src) in cur_blk.chunks_exact_mut(8).zip(stripe[x..].chunks(sstride)) { + dst.copy_from_slice(&src[..8]); + } + + let (skip_dist, skip_diff) = if !first_frame && !cur_forbidden[usize::from(BlockMode::Skip)] { + let diff = calc_diff(&cur_blk, 8, &last[x + y * lstride..], lstride); + (self.rc.metric(diff, 0), diff) + } else { // no skip blocks for the first frame + (MAX_DIST, MAX_DIST) + }; + let mut block_mode = BlockMode::Skip; + let mut best_dist = skip_dist; + self.bst_tokens.clear(); + + if best_dist > THRESHOLD && !first_frame && !cur_forbidden[usize::from(BlockMode::Motion)] { + let (mv, diff) = if is_b { + mv_search(dst, dstride, cur_w, cur_h, x, y, skip_diff, &cur_blk, &mut mv_blk) + } else { + mv_search(last, lstride, cur_w, cur_h, x, y, skip_diff, &cur_blk, &mut mv_blk) + }; + if is_b { + get_block(dst, dstride, x, y, mv, &mut mv_blk); + } else { + get_block(last, lstride, x, y, mv, &mut mv_blk); + } + self.tmp_tokens.clear(); + self.tmp_tokens.xoff.push(mv.x as i8); + self.tmp_tokens.yoff.push(mv.y as i8); + let mv_dist = self.rc.metric(diff, self.tmp_tokens.bits(is_b)); + if mv_dist < best_dist { + block_mode = BlockMode::Motion; + best_dist = mv_dist; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + self.dsp.get_diff(&mv_blk, &cur_blk); + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Residue)] { + self.tmp_tokens.clear(); + self.tmp_tokens.xoff.push(mv.x as i8); + self.tmp_tokens.yoff.push(mv.y as i8); + let diff = self.dsp.try_residue(&mut self.tmp_tokens); + let res_dist = self.rc.metric(diff, self.tmp_tokens.bits(is_b)); + if res_dist < best_dist { + best_dist = res_dist; + block_mode = BlockMode::Residue; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Inter)] { + self.tmp_tokens.clear(); + self.tmp_tokens.xoff.push(mv.x as i8); + self.tmp_tokens.yoff.push(mv.y as i8); + let dct_p_dist = self.dsp.try_dct_inter(&mv_blk, &cur_blk, &mut self.tmp_tokens, &mut self.tmp_tok2, is_b, &self.rc, best_dist); + if dct_p_dist < best_dist { + best_dist = dct_p_dist; + block_mode = BlockMode::Inter; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + } + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Fill)] { + let sum = cur_blk.iter().fold(0u16, + |acc, &a| acc + u16::from(a)); + let avg = ((sum + 32) >> 6) as u8; + self.tmp_tokens.clear(); + self.tmp_tokens.colors.push(avg); + let diff = cur_blk.iter().fold(0u32, + |acc, &a| acc + pix_dist(a, avg)); + let fill_dist = self.rc.metric(diff, self.tmp_tokens.bits(is_b)); + if fill_dist < best_dist { + block_mode = BlockMode::Fill; + best_dist = fill_dist; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Pattern)] { + let diff = find_pattern(&cur_blk, &mut self.tmp_tokens); + let pat_dist = self.rc.metric(diff, self.tmp_tokens.bits(is_b)); + if pat_dist < best_dist { + best_dist = pat_dist; + block_mode = BlockMode::Pattern; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Run)] { + let diff = find_run_pattern(&cur_blk, &mut self.tmp_tokens, &mut self.tmp_tok2, is_b, pat_run_thr); + let run_dist = self.rc.metric(diff, self.tmp_tokens.bits(is_b)); + if run_dist < best_dist { + best_dist = run_dist; + block_mode = BlockMode::Run; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Intra)] { + let dct_i_dist = self.dsp.try_dct_intra(&cur_blk, &mut self.tmp_tokens, &mut self.tmp_tok2, is_b, &self.rc, best_dist); + if dct_i_dist < best_dist { + best_dist = dct_i_dist; + block_mode = BlockMode::Intra; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + if best_dist > THRESHOLD && !cur_forbidden[usize::from(BlockMode::Raw)] + && (!is_b || self.bundles.can_fit_raw_block()) { + self.tmp_tokens.clear(); + self.tmp_tokens.colors.extend_from_slice(&cur_blk); + let raw_dist = self.rc.metric(0, self.tmp_tokens.bits(is_b)); + if raw_dist < best_dist { + best_dist = raw_dist; + block_mode = BlockMode::Raw; + std::mem::swap(&mut self.bst_tokens, &mut self.tmp_tokens); + } + } + let _ = best_dist; // silence a warning + + let bmode = if is_b { block_mode.to_code_b() } else { unimplemented!() }; + self.bundles.add_block_type(bmode); + self.bundles.add_tokens(&self.bst_tokens); + + self.blk_stats[usize::from(block_mode)] += 1; + + match block_mode { + BlockMode::Skip if is_b => {}, + BlockMode::Skip => { + for (dline, sline) in dst[x + y * dstride..].chunks_mut(dstride) + .zip(last[x + y * lstride..].chunks(lstride)).take(8) { + dline[..8].copy_from_slice(&sline[..8]); + } + is_intra = false; + } + BlockMode::Fill => { + let avg = self.bst_tokens.colors[0]; + for dline in dst[x + y * dstride..].chunks_mut(dstride).take(8) { + for el in dline[..8].iter_mut() { + *el = avg; + } + } + }, + BlockMode::Pattern => { + for (&pat, dline) in self.bst_tokens.pattern.iter().zip(dst[x + y * dstride..].chunks_mut(dstride)) { + let mut pattern = pat as usize; + for el in dline[..8].iter_mut() { + *el = self.bst_tokens.colors[pattern & 1]; + pattern >>= 1; + } + } + }, + BlockMode::Run => { + let mut clrs = self.bst_tokens.colors.iter(); + let mut data = self.bst_tokens.other.iter(); + let &(idx, _) = data.next().unwrap(); + let pattern = BINK_PATTERNS[usize::from(idx)]; + let mut len = 0; + let mut is_run = false; + let mut run_val = 0; + for (i, &idx) in pattern.iter().enumerate() { + let dst_idx = (idx & 7) as usize + ((idx >> 3) as usize) * dstride; + if len == 0 { + if i < 63 { + let &(flag, _nbits) = data.next().unwrap(); +assert_eq!(_nbits, 1); + is_run = flag != 0; + let &(len1, _) = data.next().unwrap(); + len = usize::from(len1) + 1; + if is_run { + run_val = *clrs.next().unwrap(); + } + } else { + len = 1; + is_run = false; + } + } + dst[x + y * dstride + dst_idx] = if is_run { + run_val + } else { + *clrs.next().unwrap() + }; + len -= 1; + } + }, + BlockMode::Raw => { + put_block(&mut dst[x + y * dstride..], dstride, &cur_blk); + }, + BlockMode::Motion => { + put_block(&mut dst[x + y * dstride..], dstride, &mv_blk); + is_intra = false; + }, + BlockMode::Residue => { + self.dsp.recon_residue(&mut dst[x + y * dstride..], dstride, &mv_blk); + is_intra = false; + }, + BlockMode::Inter => { + self.dsp.recon_dct_p(&mut dst[x + y * dstride..], dstride); + let q = if is_b { + usize::from(self.bst_tokens.interq[0]) + } else { + let (qval, _) = self.bst_tokens.other[self.bst_tokens.other.len() - 1]; + usize::from(qval) + }; + self.pq_stats[q] += 1; + is_intra = false; + }, + BlockMode::Intra => { + self.dsp.recon_dct_i(&mut dst[x + y * dstride..], dstride); + let q = if is_b { + usize::from(self.bst_tokens.intraq[0]) + } else { + let (qval, _) = self.bst_tokens.other[self.bst_tokens.other.len() - 1]; + usize::from(qval) + }; + self.iq_stats[q] += 1; + }, + _ => unimplemented!(), + }; + } + self.bundles.end_row(); + } + for row in 0..(cur_h / 8) { + self.bundles.write(bw, row); + } + while (bw.tell() & 0x1F) != 0 { + bw.write0(); + } + } + Ok(is_intra) + } + fn encode_skip(&mut self, bw: &mut BitWriter) -> EncoderResult<()> { + let src = self.last_frm.get_data(); + let dst = self.cur_frm.get_data_mut().unwrap(); + dst.copy_from_slice(src); + + for plane in 0..3 { + let (width, height) = self.cur_frm.get_dimensions(plane); + let tiles_w = (width + 7) >> 3; + let tiles_h = (height + 7) >> 3; + self.bundles.reset(); + for row in 0..tiles_h { + self.bundles.new_row(row); + for _ in 0..tiles_w { + self.bundles.add_block_type(0); // skip always has code 0 + } + self.bundles.end_row(); + } + for row in 0..tiles_h { + self.bundles.write(bw, row); + } + } + + Ok(()) + } +} + +impl NAEncoder for BinkEncoder { + fn negotiate_format(&self, encinfo: &EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => { + Ok(EncodeParameters { + format: NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, true, YUV420_FORMAT)), + ..Default::default() + }) + }, + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + let outinfo = NAVideoInfo::new(vinfo.width, vinfo.height, false, YUV420_FORMAT); + let mut ofmt = *encinfo; + ofmt.format = NACodecTypeInfo::Video(outinfo); + Ok(ofmt) + } + } + } + fn get_capabilities(&self) -> u64 { ENC_CAPS_SKIPFRAME } + fn init(&mut self, stream_id: u32, encinfo: EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => Err(EncoderError::FormatError), + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + if vinfo.format != YUV420_FORMAT { + return Err(EncoderError::FormatError); + } + + let mut edata = vec![b'B', b'I', b'K', self.version as u8, 0, 0, 0, 0]; + match self.scale_mode { + DoublingMode::None => {}, + DoublingMode::Height2X => { + edata[7] |= 0x10; + }, + DoublingMode::HeightIlace => { + edata[7] |= 0x20; + }, + DoublingMode::Width2X => { + edata[7] |= 0x30; + }, + DoublingMode::Scale2X => { + edata[7] |= 0x40; + }, + DoublingMode::Width2XIlace => { + edata[7] |= 0x50; + }, + }; + + if self.nframes == 0 { + println!("Bink should set the number of frames in the stream"); + return Err(EncoderError::FormatError); + } + + let out_info = NAVideoInfo::new(vinfo.width, vinfo.height, false, YUV420_FORMAT); + let info = NACodecInfo::new("bink-video", NACodecTypeInfo::Video(out_info), Some(edata)); + let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den, self.nframes); + stream.set_num(stream_id as usize); + let stream = stream.into_ref(); + + self.stream = Some(stream.clone()); + + let frm = alloc_video_buffer(out_info, 4)?; + self.cur_frm = frm.get_vbuf().unwrap(); + let frm = alloc_video_buffer(out_info, 4)?; + self.last_frm = frm.get_vbuf().unwrap(); + + let cdata = self.cur_frm.get_data_mut().unwrap(); + for el in cdata.iter_mut() { + *el = 0x00; + } + let cdata = self.last_frm.get_data_mut().unwrap(); + for el in cdata.iter_mut() { + *el = 0x00; + } + + self.rc.init(encinfo.tb_num, encinfo.tb_den, encinfo.bitrate, encinfo.quality); + + Ok(stream) + }, + } + } + fn encode(&mut self, frm: &NAFrame) -> EncoderResult<()> { + if self.frame_no >= self.nframes { + return Ok(()); + } + self.frame_no += 1; + let mut bw = BitWriter::new(Vec::with_capacity(42), BitWriterMode::LE); + + let is_intra = match frm.get_buffer() { + NABufferType::Video(ref buf) => { + self.encode_frame(&mut bw, buf)? + }, + NABufferType::None => { + self.encode_skip(&mut bw)?; + false + }, + _ => return Err(EncoderError::InvalidParameters), + }; + let dbuf = bw.end(); + self.tot_size += dbuf.len(); + self.rc.update_size(dbuf.len()); + self.pkt = Some(NAPacket::new(self.stream.clone().unwrap(), frm.ts, is_intra, dbuf)); + + std::mem::swap(&mut self.cur_frm, &mut self.last_frm); + Ok(()) + } + fn get_packet(&mut self) -> EncoderResult> { + let mut npkt = None; + std::mem::swap(&mut self.pkt, &mut npkt); + Ok(npkt) + } + fn flush(&mut self) -> EncoderResult<()> { + Ok(()) + } +} + +impl Drop for BinkEncoder { + fn drop(&mut self) { + if self.print_stats { + println!("encoded {} frame(s)", self.frame_no); + println!("block statistics:"); + for (name, &count) in BLOCK_MODE_NAMES.iter().zip(self.blk_stats.iter()) { + if count != 0 { + println!(" {:8}: {:8}", name, count); + } + } + if self.blk_stats[usize::from(BlockMode::Intra)] != 0 { + print!("intra block quants:"); + for &count in self.iq_stats.iter() { + print!(" {}", count); + } + println!(); + } + if self.blk_stats[usize::from(BlockMode::Inter)] != 0 { + print!("inter block quants:"); + for &count in self.pq_stats.iter() { + print!(" {}", count); + } + println!(); + } + if self.frame_no > 0 { + println!("average frame size {} byte(s)", self.tot_size / (self.frame_no as usize)); + if let Some(ref stream) = self.stream { + let bitrate = (self.tot_size as u64) * 8 * u64::from(stream.tb_den) / u64::from(stream.tb_num) / self.frame_no; + let br_fmt = if bitrate >= 10_000_000 { + format!("{}mbps", bitrate / 1000000) + } else if bitrate >= 10_000 { + format!("{}kbps", bitrate / 1000) + } else { + format!("{}bps", bitrate) + }; + println!("average bitrate {}", br_fmt); + } + } + } + } +} + +const ENCODER_OPTS: &[NAOptionDefinition] = &[ + NAOptionDefinition { + name: "nframes", description: "duration in frames", + opt_type: NAOptionDefinitionType::Int(Some(0), None) }, + NAOptionDefinition { + name: "version", description: "codec version", + opt_type: NAOptionDefinitionType::String(Some(&["b", "f", "g", "h", "i", "k"])) }, + NAOptionDefinition { + name: "scale_mode", description: "output scaling mode", + opt_type: NAOptionDefinitionType::String(Some(&["none", "height2x", "height_il", + "width2x", "width2x_il", "scale2x"])) }, + NAOptionDefinition { + name: "forbidden", description: "block coding modes to omit (e.g. inter+residue+run)", + opt_type: NAOptionDefinitionType::String(None) }, + NAOptionDefinition { + name: "print_stats", description: "print internal encoding statistics at the end", + opt_type: NAOptionDefinitionType::Bool }, +]; + +impl NAOptionHandler for BinkEncoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { ENCODER_OPTS } + fn set_options(&mut self, options: &[NAOption]) { + for option in options.iter() { + for opt_def in ENCODER_OPTS.iter() { + if opt_def.check(option).is_ok() { + match option.name { + "version" => { + if let NAValue::String(ref strval) = option.value { + match strval.as_str() { + "b" => self.version = 'b', + _ => { + println!("versions beside 'b' are not supported"); + }, + }; + } + }, + "scale_mode" => { + if let NAValue::String(ref strval) = option.value { + match strval.as_str() { + "none" => self.scale_mode = DoublingMode::None, + "height2x" => self.scale_mode = DoublingMode::Height2X, + "height_il" => self.scale_mode = DoublingMode::HeightIlace, + "scale2x" => self.scale_mode = DoublingMode::Scale2X, + "width2x" => self.scale_mode = DoublingMode::Width2X, + "width2x_il" => self.scale_mode = DoublingMode::Width2XIlace, + _ => {}, + }; + } + }, + "nframes" => { + if let NAValue::Int(ival) = option.value { + self.nframes = ival as u64; + } + }, + "forbidden" => { + if let NAValue::String(ref strval) = option.value { + for el in self.forbidden.iter_mut() { + *el = false; + } + for name in strval.split('+') { + if let Ok(bmode) = name.parse::() { + self.forbidden[usize::from(bmode)] = true; + } + } + } + }, + "print_stats" => { + if let NAValue::Bool(bval) = option.value { + self.print_stats = bval; + } + }, + _ => {}, + }; + } + } + } + } + fn query_option_value(&self, name: &str) -> Option { + match name { + "version" => Some(NAValue::String(self.version.to_string())), + "scale_mode" => Some(NAValue::String(self.scale_mode.to_string())), + "nframes" => Some(NAValue::Int(self.nframes as i64)), + "forbidden" => { + let mut result = String::new(); + for (name, &flag) in BLOCK_MODE_NAMES.iter().zip(self.forbidden.iter()) { + if flag { + if !result.is_empty() { + result.push('+'); + } + result += name; + } + } + Some(NAValue::String(result)) + }, + "print_stats" => Some(NAValue::Bool(self.print_stats)), + _ => None, + } + } +} + +pub fn get_encoder() -> Box { + Box::new(BinkEncoder::new()) +} + +#[cfg(test)] +mod test { + use nihav_core::codecs::*; + use nihav_core::demuxers::*; + use nihav_core::muxers::*; + use crate::*; + use nihav_codec_support::test::enc_video::*; + use nihav_commonfmt::*; + + fn test_bink_encoder(out_name: &'static str, enc_options: &[NAOption], br: u32, quality: u8, hash: &[u32; 4]) { + let mut dmx_reg = RegisteredDemuxers::new(); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredDecoders::new(); + generic_register_all_decoders(&mut dec_reg); + let mut mux_reg = RegisteredMuxers::new(); + rad_register_all_muxers(&mut mux_reg); + let mut enc_reg = RegisteredEncoders::new(); + rad_register_all_encoders(&mut enc_reg); + + // sample from private collection + let dec_config = DecoderTestParams { + demuxer: "yuv4mpeg", + in_name: "assets/day3b.y4m", + stream_type: StreamType::Video, + limit: None, + dmx_reg, dec_reg, + }; + let enc_config = EncoderTestParams { + muxer: "bink", + enc_name: "bink-video", + out_name, + mux_reg, enc_reg, + }; + let dst_vinfo = NAVideoInfo { + width: 0, + height: 0, + format: YUV420_FORMAT, + flipped: false, + bits: 8, + }; + let enc_params = EncodeParameters { + format: NACodecTypeInfo::Video(dst_vinfo), + quality, + bitrate: br * 1000, + tb_num: 0, + tb_den: 0, + flags: 0, + }; + let _ = hash; + //test_encoding_to_file(&dec_config, &enc_config, enc_params, enc_options); + test_encoding_md5(&dec_config, &enc_config, enc_params, enc_options, hash); + } + #[test] + fn test_binkb_quality() { + let enc_options = &[ + NAOption { name: "nframes", value: NAValue::Int(7) }, + //NAOption { name: "print_stats", value: NAValue::Bool(true) }, + ]; + test_bink_encoder("bink-b-q50.bik", enc_options, 0, 50, &[0xd83936aa, 0xec3f55d4, 0x25e5c1fb, 0x0f3454ce]); + let enc_options = &[ + NAOption { name: "nframes", value: NAValue::Int(7) }, + //NAOption { name: "print_stats", value: NAValue::Bool(true) }, + ]; + test_bink_encoder("bink-b-q75.bik", enc_options, 0, 75, &[0x45ccd3d4, 0xf09bd106, 0xc88751db, 0xca5294d7]); + let enc_options = &[ + NAOption { name: "nframes", value: NAValue::Int(7) }, + //NAOption { name: "print_stats", value: NAValue::Bool(true) }, + ]; + test_bink_encoder("bink-b-q99.bik", enc_options, 0, 99, &[0xb516554e, 0xca025167, 0xd6c3dc06, 0x00e6ba25]); + } + #[test] + fn test_binkb_features() { + let enc_options = &[ + NAOption { name: "nframes", value: NAValue::Int(7) }, + NAOption { name: "forbidden", value: NAValue::String("intra+inter".to_string()) }, + //NAOption { name: "print_stats", value: NAValue::Bool(true) }, + ]; + test_bink_encoder("bink-b-nodct.bik", enc_options, 0, 0, &[0x5e098760, 0x31c8982a, 0x90ce8441, 0x859d3cc6]); + let enc_options = &[ + NAOption { name: "nframes", value: NAValue::Int(7) }, + NAOption { name: "forbidden", value: NAValue::String("skip+fill+run+pattern+residue+raw".to_string()) }, + //NAOption { name: "print_stats", value: NAValue::Bool(true) }, + ]; + test_bink_encoder("bink-b-dct.bik", enc_options, 0, 0, &[0xed2fc7d2, 0x8a7a05ef, 0xd0b4ae2c, 0x622a4ef0]); + } +} diff --git a/nihav-rad/src/codecs/binkvidenc/bundle.rs b/nihav-rad/src/codecs/binkvidenc/bundle.rs new file mode 100644 index 0000000..5853169 --- /dev/null +++ b/nihav-rad/src/codecs/binkvidenc/bundle.rs @@ -0,0 +1,233 @@ +use std::collections::VecDeque; +use nihav_core::io::bitwriter::*; + +const BUNDLE_LEN_BITS: u8 = 13; +const MAX_BUNDLE_LEN: usize = 1 << BUNDLE_LEN_BITS; + +#[derive(Default)] +struct Bundle { + bits: u8, + data: VecDeque<(usize, Vec)>, + row: usize, + tmp: Vec, + end: bool, + last_w: usize, +} + +impl Bundle { + fn reset(&mut self, bits: u8) { + self.bits = bits; + self.end = false; + self.last_w = 0; + } + fn new_row(&mut self, row: usize) { + self.row = row; + } + fn push(&mut self, val: T) { + self.tmp.push(val); + } + fn push_all(&mut self, slc: &[T]) { + self.tmp.extend_from_slice(slc); + } + fn end_row(&mut self) { + if !self.tmp.is_empty() { + let mut tmp = Vec::new(); + std::mem::swap(&mut tmp, &mut self.tmp); + self.data.push_back((self.row, tmp)); + } + } +} + +trait IntoU32 { + fn into_u32(self) -> u32; +} + +impl IntoU32 for u8 { + fn into_u32(self) -> u32 { u32::from(self) } +} + +impl IntoU32 for i8 { + fn into_u32(self) -> u32 { (self + 16) as u32 } +} + +impl IntoU32 for u16 { + fn into_u32(self) -> u32 { u32::from(self) } +} + +impl IntoU32 for i16 { + fn into_u32(self) -> u32 { (self + 1024) as u32 } +} + +impl Bundle<(u16, u8)> { + fn write(&mut self, bw: &mut BitWriter, cur_row: usize) { + if !self.data.is_empty() && self.data[0].0 == cur_row { + let (_, row_data) = self.data.pop_front().unwrap(); + for &(bits, len) in row_data.iter() { + bw.write(u32::from(bits), len); + } + } + } +} + +impl Bundle { + fn write(&mut self, bw: &mut BitWriter, cur_row: usize) { + if !self.end && cur_row == self.last_w { + let mut num_out = 0; + let mut len_out = 0; + for (_, row) in self.data.iter() { + if len_out + row.len() < MAX_BUNDLE_LEN { + len_out += row.len(); + num_out += 1; + } else { + break; + } + } + + bw.write(len_out as u32, BUNDLE_LEN_BITS); + if len_out == 0 { + self.end = true; + return; + } + for _ in 0..num_out { + let (row_no, row_data) = self.data.pop_front().unwrap(); + self.last_w = row_no + 1; + for &el in row_data.iter() { + bw.write(el.into_u32(), self.bits); + } + } + } + } +} + +#[derive(Default)] +pub struct Bundles { + btype: Bundle, + colors: Bundle, + pattern: Bundle, + xoff: Bundle, + yoff: Bundle, + intradc: Bundle, + interdc: Bundle, + intraq: Bundle, + interq: Bundle, + nresidues: Bundle, + other: Bundle<(u16, u8)>, +} + +macro_rules! whole_bundle { + ($self:expr, $func:ident) => { + $self.btype.$func(); + $self.colors.$func(); + $self.pattern.$func(); + $self.xoff.$func(); + $self.yoff.$func(); + $self.intradc.$func(); + $self.interdc.$func(); + $self.intraq.$func(); + $self.interq.$func(); + $self.nresidues.$func(); + $self.other.$func(); + }; + ($self:expr, $func:ident, $($args:expr),*) => { + $self.btype.$func($($args),*); + $self.colors.$func($($args),*); + $self.pattern.$func($($args),*); + $self.xoff.$func($($args),*); + $self.yoff.$func($($args),*); + $self.intradc.$func($($args),*); + $self.interdc.$func($($args),*); + $self.intraq.$func($($args),*); + $self.interq.$func($($args),*); + $self.nresidues.$func($($args),*); + $self.other.$func($($args),*); + } +} + +impl Bundles { + pub fn reset(&mut self) { + self.btype.reset(4); + self.colors.reset(8); + self.pattern.reset(8); + self.xoff.reset(5); + self.yoff.reset(5); + self.intradc.reset(11); + self.interdc.reset(11); + self.intraq.reset(4); + self.interq.reset(4); + self.nresidues.reset(7); + } + pub fn add_block_type(&mut self, btype: u8) { + self.btype.push(btype); + } + pub fn write(&mut self, bw: &mut BitWriter, row: usize) { + whole_bundle!(self, write, bw, row); + } + pub fn new_row(&mut self, row: usize) { + whole_bundle!(self, new_row, row); + } + pub fn end_row(&mut self) { + whole_bundle!(self, end_row); + } + + pub fn can_fit_raw_block(&self) -> bool { + self.colors.data.len() < MAX_BUNDLE_LEN - 1 - 64 + } + pub fn add_tokens(&mut self, tokens: &BlockTokens) { + self.colors.push_all(&tokens.colors); + self.pattern.push_all(&tokens.pattern); + self.xoff.push_all(&tokens.xoff); + self.yoff.push_all(&tokens.yoff); + self.intradc.push_all(&tokens.intradc); + self.interdc.push_all(&tokens.interdc); + self.intraq.push_all(&tokens.intraq); + self.interq.push_all(&tokens.interq); + self.nresidues.push_all(&tokens.nresidues); + self.other.push_all(&tokens.other); + } +} + +#[derive(Default)] +pub struct BlockTokens { + pub colors: Vec, + pub pattern: Vec, + pub xoff: Vec, + pub yoff: Vec, + pub intradc: Vec, + pub interdc: Vec, + pub intraq: Vec, + pub interq: Vec, + pub nresidues: Vec, + pub other: Vec<(u16, u8)>, +} + +impl BlockTokens { + pub fn new() -> Self { Self::default() } + pub fn clear(&mut self) { + self.colors.clear(); + self.pattern.clear(); + self.xoff.clear(); + self.yoff.clear(); + self.intradc.clear(); + self.interdc.clear(); + self.intraq.clear(); + self.interq.clear(); + self.nresidues.clear(); + self.other.clear(); + } + pub fn bits(&self, is_b: bool) -> usize { + if is_b { + self.colors.len() * 8 + + self.pattern.len() * 8 + + self.xoff.len() * 5 + + self.yoff.len() * 5 + + self.intradc.len() * 11 + + self.interdc.len() * 11 + + self.intraq.len() * 4 + + self.interq.len() * 4 + + self.nresidues.len() * 7 + + self.other.iter().fold(0usize, |acc, &(_, len)| acc + usize::from(len)) + } else { +unimplemented!() + } + } +} diff --git a/nihav-rad/src/codecs/binkvidenc/dsp.rs b/nihav-rad/src/codecs/binkvidenc/dsp.rs new file mode 100644 index 0000000..e780909 --- /dev/null +++ b/nihav-rad/src/codecs/binkvidenc/dsp.rs @@ -0,0 +1,598 @@ +use super::{BlockTokens, MAX_DIST, RateControl, calc_diff}; +use super::super::binkviddata::*; + +const BINK_INV_SCAN: [usize; 64] = [ + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 44, 45, 16, 17, 20, 21, + 26, 27, 46, 47, 18, 19, 22, 23, + 28, 29, 32, 33, 48, 49, 52, 53, + 30, 31, 34, 35, 50, 51, 54, 55, + 36, 37, 40, 41, 56, 57, 60, 61, + 38, 39, 42, 43, 58, 59, 62, 63 +]; + +trait WriteBit { + fn write_bit(&mut self, val: u16); + fn write_coef(&mut self, aval: u32, sign: bool, bits: u8); +} + +impl WriteBit for BlockTokens { + fn write_bit(&mut self, val: u16) { + self.other.push((val, 1)); + } + fn write_coef(&mut self, aval: u32, sign: bool, bits: u8) { + if bits > 1 { + self.other.push((aval as u16 & ((1 << (bits - 1)) - 1), bits - 1)); + } + self.write_bit(sign as u16); + } +} + +pub struct DSP { + diff: [i16; 64], + dct_i: [u8; 64], + dct_p: [u8; 64], + qmats: QuantMats, + i_start: usize, + i_len: usize, + p_start: usize, + p_len: usize, +} + +impl DSP { + pub fn new() -> Self { + let mut qmats = QuantMats::default(); + qmats.calc_binkb_quants(); + Self { + diff: [0; 64], + dct_i: [0; 64], + dct_p: [0; 64], + qmats, + i_start: 0, + i_len: 16, + p_start: 0, + p_len: 16, + } + } + pub fn get_diff(&mut self, mc_blk: &[u8; 64], cur_blk: &[u8; 64]) { + for (dst, (&prev, &cur)) in self.diff.iter_mut() + .zip(mc_blk.iter().zip(cur_blk.iter())) { + *dst = i16::from(cur) - i16::from(prev); + } + } + pub fn recon_residue(&self, dst: &mut [u8], dstride: usize, mc_blk: &[u8; 64]) { + for (dline, (prow, drow)) in dst.chunks_mut(dstride) + .zip(mc_blk.chunks_exact(8).zip(self.diff.chunks_exact(8))) { + for (dst, (&prev, &diff)) in dline.iter_mut().zip(prow.iter().zip(drow.iter())) { + *dst = (i16::from(prev) + diff) as u8; + } + } + } + pub fn recon_dct_i(&self, dst: &mut [u8], dstride: usize) { + for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_i.chunks_exact(8)) { + dline[..8].copy_from_slice(srow); + } + } + pub fn recon_dct_p(&self, dst: &mut [u8], dstride: usize) { + for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_p.chunks_exact(8)) { + dline[..8].copy_from_slice(srow); + } + } + + pub fn try_residue(&self, tokens: &mut BlockTokens) -> u32 { + let mut tree = Tree::new(true); + let mut flat = [0; 64]; + let mut blen = [0; 64]; + + for (&idx, &val) in BINK_INV_SCAN.iter().zip(self.diff.iter()) { + flat[idx] = val; + let aval = val.unsigned_abs(); + let mut b = 0u8; + while (1 << b) <= aval { + b += 1; + } + blen[idx] = b.saturating_sub(1); + } + + let mut max_val = 0; + let mut max_bits = 0; + let mut bits = 0; + let mut avals = [0; 64]; + let mut signs = [false; 64]; + for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut()) + .zip(flat.iter().zip(blen.iter())) { + *aval = val.unsigned_abs(); + *sign = val < 0; + max_val = max_val.max(*aval); + max_bits = max_bits.max(vlen); + bits += aval.count_ones(); + } + + if max_bits > 7 || bits > 127 { + return MAX_DIST; + } + + tokens.nresidues.push(bits as u8); + tokens.other.push((max_bits as u16, 3)); + + let mut nz_cand = Vec::with_capacity(64); + let mut masks_left = bits + 1; + 'tree_loop: for cur_bits in (0..=max_bits).rev() { + let mask = 1 << cur_bits; + for &idx in nz_cand.iter() { + tokens.write_bit(((avals[idx] & mask) != 0) as u16); + if (avals[idx] & mask) != 0 { + masks_left -= 1; + if masks_left == 0 { + break 'tree_loop; + } + } + } + + let mut pos = tree.start; + while pos < tree.end { + if tree.state[pos] == TreeState::None { + pos += 1; + continue; + } + if let TreeState::Candidate(idx) = tree.state[pos] { + let idx = usize::from(idx); + if blen[idx] == cur_bits && flat[idx] != 0 { + tree.state[pos] = TreeState::None; + tokens.write_bit(1); + tokens.write_bit(signs[idx] as u16); + nz_cand.push(idx); + masks_left -= 1; + if masks_left == 0 { + break 'tree_loop; + } + } else { + tokens.write_bit(0); + pos += 1; + } + continue; + } + let range = tree.state[pos].get_range(); + let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a)); + if cur_max_bits == cur_bits { + tokens.write_bit(1); + match tree.state[pos] { + TreeState::Twenty(val) => { + tree.state[pos] = TreeState::Sixteen(val + 4); + for i in 0..4 { + let idx = usize::from(val) + i; + if blen[idx] == cur_bits && flat[idx] != 0 { + tokens.write_bit(0); + tokens.write_bit(signs[idx] as u16); + nz_cand.push(idx); + masks_left -= 1; + if masks_left == 0 { + break 'tree_loop; + } + } else { + tokens.write_bit(1); + tree.add_to_head(TreeState::Candidate(idx as u8)); + } + } + }, + TreeState::Sixteen(val) => { + tree.state[pos] = TreeState::Four(val); + for i in 1u8..4 { + tree.add_to_tail(TreeState::Four(val + i * 4)); + } + }, + TreeState::Four(val) => { + tree.state[pos] = TreeState::None; + for i in 0..4 { + let idx = usize::from(val) + i; + if blen[idx] == cur_bits && flat[idx] != 0 { + tokens.write_bit(0); + tokens.write_bit(signs[idx] as u16); + nz_cand.push(idx); + masks_left -= 1; + if masks_left == 0 { + break 'tree_loop; + } + } else { + tokens.write_bit(1); + tree.add_to_head(TreeState::Candidate(idx as u8)); + } + } + }, + _ => unreachable!(), + }; + } else { + tokens.write_bit(0); + pos += 1; + } + } + } + + 0 + } + + pub fn set_quant_ranges(&mut self, ranges: [u8; 4]) { + self.i_start = usize::from(ranges[0]); + self.i_len = usize::from(ranges[1]); + self.p_start = usize::from(ranges[2]); + self.p_len = usize::from(ranges[3]); + } + pub fn try_dct_intra(&mut self, blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 { + tokens.clear(); + if self.i_len == 0 { + return MAX_DIST; + } + + let mut ref_coeffs = [0i32; 64]; + for (dst, &src) in ref_coeffs.iter_mut().zip(blk.iter()) { + *dst = i32::from(src); + } + dct(&mut ref_coeffs); + + let mut dct_out = [0u8; 64]; + let qmats = if is_b { &self.qmats.intra_qmat } else { BINK_INTRA_QUANT }; + for (qidx, qmat) in qmats.iter().enumerate().skip(self.i_start).take(self.i_len) { + let mut coeffs = ref_coeffs; + for (idx, el) in coeffs.iter_mut().enumerate() { + *el /= qmat[BINK_INV_SCAN[idx]]; + } + + if coeffs[0] >= 2048 { + continue; + } + + tmp_tok.clear(); + tmp_tok.intradc.push(coeffs[0] as u16); + Self::code_dct_coeffs(&coeffs, tmp_tok); + if is_b { + tmp_tok.intraq.push(qidx as u8); + } else { + tmp_tok.other.push((qidx as u16, 4)); + } + let bits = tmp_tok.bits(is_b); + if rc.metric(0, bits) >= best_dist { + continue; + } + + for (idx, el) in coeffs.iter_mut().enumerate() { + if *el != 0 { + *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11; + } + } + idct(&mut coeffs); + for (dst, &src) in dct_out.iter_mut().zip(coeffs.iter()) { + *dst = src as u8; + } + let diff = calc_diff(&dct_out, 8, blk, 8); + let dist = rc.metric(diff, bits); + if dist < best_dist { + best_dist = dist; + std::mem::swap(tokens, tmp_tok); + self.dct_i.copy_from_slice(&dct_out); + } + } + + best_dist + } + pub fn try_dct_inter(&mut self, ref_blk: &[u8; 64], cur_blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 { + let mv_x = tokens.xoff[0]; + let mv_y = tokens.yoff[0]; + + let mut ref_coeffs = [0i32; 64]; + for (dst, &src) in ref_coeffs.iter_mut().zip(self.diff.iter()) { + *dst = i32::from(src); + } + dct(&mut ref_coeffs); + + let mut dct_out = [0u8; 64]; + let qmats = if is_b { &self.qmats.inter_qmat } else { BINK_INTER_QUANT }; + for (qidx, qmat) in qmats.iter().enumerate().skip(self.p_start).take(self.p_len) { + let mut coeffs = ref_coeffs; + + for (idx, el) in coeffs.iter_mut().enumerate() { + *el /= qmat[BINK_INV_SCAN[idx]]; + } + + if coeffs[0].unsigned_abs() >= 1024 { + continue; + } + + tmp_tok.clear(); + tmp_tok.interdc.push(coeffs[0] as i16); + tmp_tok.xoff.push(mv_x); + tmp_tok.yoff.push(mv_y); + Self::code_dct_coeffs(&coeffs, tmp_tok); + if is_b { + tmp_tok.interq.push(qidx as u8); + } else { + tmp_tok.other.push((qidx as u16, 4)); + } + let bits = tmp_tok.bits(is_b); + if rc.metric(0, bits) >= best_dist { + continue; + } + + for (idx, el) in coeffs.iter_mut().enumerate() { + if *el != 0 { + *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11; + } + } + idct(&mut coeffs); + for (dst, (&prev, &diff)) in dct_out.iter_mut().zip(ref_blk.iter().zip(coeffs.iter())) { + *dst = (i32::from(prev) + diff) as u8; + } + let diff = calc_diff(&dct_out, 8, cur_blk, 8); + let dist = rc.metric(diff, bits); + if dist < best_dist { + best_dist = dist; + std::mem::swap(tokens, tmp_tok); + self.dct_p.copy_from_slice(&dct_out); + } + } + + best_dist + } + + fn code_dct_coeffs(coeffs: &[i32; 64], tokens: &mut BlockTokens) { + let mut tree = Tree::new(false); + let mut flat = [0; 64]; + let mut blen = [0; 64]; + + for (&idx, &val) in BINK_INV_SCAN.iter().zip(coeffs.iter()).skip(1) { + flat[idx] = val; + let aval = val.unsigned_abs(); + let mut b = 0u8; + while (1 << b) <= aval { + b += 1; + } + blen[idx] = b; + } + + let mut max_val = 0; + let mut max_bits = 0; + let mut avals = [0; 64]; + let mut signs = [false; 64]; + for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut()) + .zip(flat.iter().zip(blen.iter())) { + *aval = val.unsigned_abs(); + *sign = val < 0; + max_val = max_val.max(*aval); + max_bits = max_bits.max(vlen); + } + + tokens.other.push((u16::from(max_bits), 4)); + for cur_bits in (1..=max_bits).rev() { + let mut pos = tree.start; + while pos < tree.end { + if tree.state[pos] == TreeState::None { + pos += 1; + continue; + } + if let TreeState::Candidate(idx) = tree.state[pos] { + let idx = usize::from(idx); + if blen[idx] == cur_bits && flat[idx] != 0 { + tree.state[pos] = TreeState::None; + tokens.write_bit(1); + tokens.write_coef(avals[idx], signs[idx], cur_bits); + } else { + tokens.write_bit(0); + pos += 1; + } + continue; + } + let range = tree.state[pos].get_range(); + let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a)); + if cur_max_bits == cur_bits { + tokens.write_bit(1); + match tree.state[pos] { + TreeState::Twenty(val) => { + tree.state[pos] = TreeState::Sixteen(val + 4); + for i in 0..4 { + let idx = usize::from(val) + i; + if blen[idx] == cur_bits && flat[idx] != 0 { + tokens.write_bit(0); + tokens.write_coef(avals[idx], signs[idx], cur_bits); + } else { + tokens.write_bit(1); + tree.add_to_head(TreeState::Candidate(idx as u8)); + } + } + }, + TreeState::Sixteen(val) => { + tree.state[pos] = TreeState::Four(val); + for i in 1u8..4 { + tree.add_to_tail(TreeState::Four(val + i * 4)); + } + }, + TreeState::Four(val) => { + tree.state[pos] = TreeState::None; + for i in 0..4 { + let idx = usize::from(val) + i; + if blen[idx] == cur_bits && flat[idx] != 0 { + tokens.write_bit(0); + tokens.write_coef(avals[idx], signs[idx], cur_bits); + } else { + tokens.write_bit(1); + tree.add_to_head(TreeState::Candidate(idx as u8)); + } + } + }, + _ => unreachable!(), + }; + } else { + tokens.write_bit(0); + pos += 1; + } + } + } + } +} + +#[derive(Clone,Copy,Debug,PartialEq)] +enum TreeState { + None, + Twenty(u8), + Sixteen(u8), + Four(u8), + Candidate(u8), +} + +impl TreeState { + fn get_range(self) -> std::ops::Range { + let (base, len) = match self { + TreeState::None => (0, 0), + TreeState::Twenty(val) => (val, 20), + TreeState::Sixteen(val) => (val, 16), + TreeState::Four(val) => (val, 4), + TreeState::Candidate(val) => (val, 1), + }; + usize::from(base)..usize::from(base + len) + } +} + +struct Tree { + state: [TreeState; 128], + start: usize, + end: usize, +} + +impl Tree { + fn new(is_res: bool) -> Self { + let mut state = [TreeState::None; 128]; + let start = 64; + let mut end = start; + + state[end] = TreeState::Twenty(4); + end += 1; + state[end] = TreeState::Twenty(24); + end += 1; + state[end] = TreeState::Twenty(44); + end += 1; + if is_res { + state[end] = TreeState::Four(0); + end += 1; + } else { + for i in 1..4 { + state[end] = TreeState::Candidate(i); + end += 1; + } + } + Self { state, start, end } + } + fn add_to_tail(&mut self, ts: TreeState) { + self.state[self.end] = ts; + self.end += 1; + } + fn add_to_head(&mut self, ts: TreeState) { + self.start -= 1; + self.state[self.start] = ts; + } +} + +const A1: i32 = 2896; +const A2: i32 = 2217; +const A3: i32 = 3784; +const A4: i32 = -5352; + +macro_rules! idct { + ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => { + let a0 = $src[$off + 0 * $sstep] + $src[$off + 4 * $sstep]; + let a1 = $src[$off + 0 * $sstep] - $src[$off + 4 * $sstep]; + let a2 = $src[$off + 2 * $sstep] + $src[$off + 6 * $sstep]; + let a3 = A1.wrapping_mul($src[$off + 2 * $sstep] - $src[$off + 6 * $sstep]) >> 11; + let a4 = $src[$off + 5 * $sstep] + $src[$off + 3 * $sstep]; + let a5 = $src[$off + 5 * $sstep] - $src[$off + 3 * $sstep]; + let a6 = $src[$off + 1 * $sstep] + $src[$off + 7 * $sstep]; + let a7 = $src[$off + 1 * $sstep] - $src[$off + 7 * $sstep]; + let b0 = a4 + a6; + let b1 = A3.wrapping_mul(a5 + a7) >> 11; + let b2 = (A4.wrapping_mul(a5) >> 11) - b0 + b1; + let b3 = (A1.wrapping_mul(a6 - a4) >> 11) - b2; + let b4 = (A2.wrapping_mul(a7) >> 11) + b3 - b1; + let c0 = a0 + a2; + let c1 = a0 - a2; + let c2 = a1 + (a3 - a2); + let c3 = a1 - (a3 - a2); + + $dst[$off + 0 * $dstep] = (c0 + b0 + $bias) >> $shift; + $dst[$off + 1 * $dstep] = (c2 + b2 + $bias) >> $shift; + $dst[$off + 2 * $dstep] = (c3 + b3 + $bias) >> $shift; + $dst[$off + 3 * $dstep] = (c1 - b4 + $bias) >> $shift; + $dst[$off + 4 * $dstep] = (c1 + b4 + $bias) >> $shift; + $dst[$off + 5 * $dstep] = (c3 - b3 + $bias) >> $shift; + $dst[$off + 6 * $dstep] = (c2 - b2 + $bias) >> $shift; + $dst[$off + 7 * $dstep] = (c0 - b0 + $bias) >> $shift; + }; +} + +fn idct(coeffs: &mut [i32; 64]) { + let mut tmp: [i32; 64] = [0; 64]; + let mut row: [i32; 8] = [0; 8]; + for i in 0..8 { + idct!(coeffs, 8, tmp, 8, i, 0, 0); + } + for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) { + idct!(srow, 1, row, 1, 0, 0x7F, 8); + drow.copy_from_slice(&row); + } +} + +const B1: i32 = 2896; +const B2: i32 = 3789; +const B3: i32 = 1569; +const B4: i32 = 4464; +const B5: i32 = 6679; +const B6: i32 = 1327; +const B7: i32 = 888; +macro_rules! dct { + ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => { + let a0 = $src[$off + 0 * $sstep] + $src[$off + 7 * $sstep]; + let a1 = $src[$off + 0 * $sstep] - $src[$off + 7 * $sstep]; + let a2 = $src[$off + 1 * $sstep] + $src[$off + 6 * $sstep]; + let a3 = $src[$off + 1 * $sstep] - $src[$off + 6 * $sstep]; + let a4 = $src[$off + 2 * $sstep] + $src[$off + 5 * $sstep]; + let a5 = $src[$off + 2 * $sstep] - $src[$off + 5 * $sstep]; + let a6 = $src[$off + 3 * $sstep] + $src[$off + 4 * $sstep]; + let a7 = $src[$off + 3 * $sstep] - $src[$off + 4 * $sstep]; + + let b0 = (a0 + a4) << 7; + let b1 = (a0 - a4) << 7; + let b2 = (a2 + a6) << 7; + let b3 = (a2 - a6) << 7; + + $dst[$off + 0 * $dstep] = (b0 + b2 + $bias) >> $shift; + $dst[$off + 4 * $dstep] = (b1 - b3 + $bias) >> $shift; + + let c0 = (a0 - a6) << 7; + let c1 = B1.wrapping_mul((b1 + b3) >> 7) >> 5; + $dst[$off + 2 * $dstep] = (c0 + c1 + $bias) >> $shift; + $dst[$off + 6 * $dstep] = (c0 - c1 + $bias) >> $shift; + + let d0 = B2.wrapping_mul(a1) + B3.wrapping_mul(a7); + let d1 = ( d0 + B4.wrapping_mul(a5) + B5.wrapping_mul(a3) + (1 << 4)) >> 5; + let d2 = (-d0 + B6.wrapping_mul(a5) - B7.wrapping_mul(a3) + (1 << 4)) >> 5; + $dst[$off + 1 * $dstep] = ((a1 << 7) + d1 + $bias) >> $shift; + $dst[$off + 7 * $dstep] = ((a1 << 7) + d2 + $bias) >> $shift; + + let e0 = B3.wrapping_mul(a1) - B2.wrapping_mul(a7); + let e1 = ( e0 - B6.wrapping_mul(a3) - B5.wrapping_mul(a5) + (1 << 4)) >> 5; + let e2 = (-e0 - B4.wrapping_mul(a3) + B7.wrapping_mul(a5) + (1 << 4)) >> 5; + + $dst[$off + 3 * $dstep] = ((a1 << 7) + e1 + $bias) >> $shift; + $dst[$off + 5 * $dstep] = ((a1 << 7) + e2 + $bias) >> $shift; + }; +} + +fn dct(coeffs: &mut [i32; 64]) { + let mut tmp: [i32; 64] = [0; 64]; + let mut row: [i32; 8] = [0; 8]; + for i in 0..8 { + dct!(coeffs, 8, tmp, 8, i, 1, 1); + } + for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) { + dct!(srow, 1, row, 1, 0, 0, 0); + drow.copy_from_slice(&row); + } +} + diff --git a/nihav-rad/src/codecs/binkvidenc/mc.rs b/nihav-rad/src/codecs/binkvidenc/mc.rs new file mode 100644 index 0000000..ebaa2b4 --- /dev/null +++ b/nihav-rad/src/codecs/binkvidenc/mc.rs @@ -0,0 +1,79 @@ +use nihav_codec_support::codecs::{MV, ZERO_MV}; + +pub fn pix_dist(a: u8, b: u8) -> u32 { + ((i32::from(a) - (i32::from(b))) * (i32::from(a) - (i32::from(b)))) as u32 +} + +pub fn calc_diff(src1: &[u8], stride1: usize, src2: &[u8], stride2: usize) -> u32 { + src1.chunks(stride1).zip(src2.chunks(stride2)).take(8).fold(0u32, + |acc, (line1, line2)| acc + line1[..8].iter().zip(line2.iter()).fold(0u32, + |acc2, (&a, &b)| acc2 + pix_dist(a, b))) +} + +const DIA_LARGE: [MV; 4] = [MV{x: 2, y: 0}, MV{x: 0, y: 2}, MV{x: -2, y: 0}, MV{x: 0, y: -2}]; +const DIA_SMALL: [MV; 4] = [MV{x: 1, y: 0}, MV{x: 0, y: 1}, MV{x: -1, y: 0}, MV{x: 0, y: -1}]; +fn check_mv(x: usize, y: usize, width: usize, height: usize, mv: MV) -> bool { + let xpos = (x as isize) + isize::from(mv.x); + let ypos = (y as isize) + isize::from(mv.y); + + mv.x.abs() <= 15 && mv.y.abs() <= 15 && + xpos >= 0 && (xpos + 8 <= (width as isize)) && + ypos >= 0 && (ypos + 8 <= (height as isize)) +} + +pub fn mv_search(src: &[u8], stride: usize, width: usize, height: usize, + x: usize, y: usize, skip_diff: u32, + ref_blk: &[u8; 64], tmp: &mut [u8; 64]) -> (MV, u32) { + let mut best_diff = skip_diff; + let mut best_mv = ZERO_MV; + loop { + let last_mv = best_mv; + for &off_mv in DIA_LARGE.iter() { + let mv = best_mv + off_mv; + if !check_mv(x, y, width, height, mv) { + continue; + } + get_block(src, stride, x, y, mv, tmp); + let diff = calc_diff(ref_blk, 8, tmp, 8); + if diff < best_diff { + best_diff = diff; + best_mv = mv; + } + } + if best_mv == last_mv { + break; + } + } + loop { + let last_mv = best_mv; + for &off_mv in DIA_SMALL.iter() { + let mv = best_mv + off_mv; + if !check_mv(x, y, width, height, mv) { + continue; + } + get_block(src, stride, x, y, mv, tmp); + let diff = calc_diff(ref_blk, 8, tmp, 8); + if diff < best_diff { + best_diff = diff; + best_mv = mv; + } + } + if best_mv == last_mv { + break; + } + } + (best_mv, best_diff) +} + +pub fn get_block(src: &[u8], stride: usize, x: usize, y: usize, mv: MV, dst: &mut [u8; 64]) { + let pos = (x as isize + isize::from(mv.x) + (y as isize + isize::from(mv.y)) * (stride as isize)) as usize; + for (dline, sline) in dst.chunks_exact_mut(8).zip(src[pos..].chunks(stride)) { + dline.copy_from_slice(&sline[..8]); + } +} + +pub fn put_block(dst: &mut [u8], dstride: usize, cur_blk: &[u8; 64]) { + for (dline, sline) in dst.chunks_mut(dstride).zip(cur_blk.chunks_exact(8)) { + dline[..8].copy_from_slice(sline); + } +} diff --git a/nihav-rad/src/codecs/binkvidenc/rc.rs b/nihav-rad/src/codecs/binkvidenc/rc.rs new file mode 100644 index 0000000..01945bf --- /dev/null +++ b/nihav-rad/src/codecs/binkvidenc/rc.rs @@ -0,0 +1,112 @@ +use super::BlockMode; + +#[derive(Default)] +pub struct RateControl { + bitrate: u32, + bitpool: u32, + tb_num: u32, + tb_den: u32, + fpos: u32, + quality: u8, + lambda: f32, + first: bool, +} + +impl RateControl { + pub fn new() -> Self { + Self { + lambda: 1.0, + ..Default::default() + } + } + pub fn init(&mut self, tb_num: u32, tb_den: u32, bitrate: u32, quality: u8) { + self.tb_num = tb_num; + self.tb_den = tb_den; + self.bitrate = bitrate; + self.quality = quality; + + self.bitpool = self.bitrate; + self.fpos = 0; + self.first = true; + } + pub fn metric(&self, diff: u32, bits: usize) -> u32 { + diff.saturating_add((self.get_weight() * (bits as f32)) as u32) + } + fn get_weight(&self) -> f32 { + if (0..=100).contains(&self.quality) { + self.lambda * ((100 - self.quality) as f32) + } else { + self.lambda + } + } + pub fn expected_size(&self) -> u32 { + if self.bitrate != 0 { + (if !self.first { + let ticks = self.tb_den - self.fpos; + u64::from(self.bitpool) * u64::from(self.tb_num) / u64::from(ticks) + } else { + u64::from(self.bitrate) * 4 * u64::from(self.tb_num) / u64::from(self.tb_den) + }) as u32 + } else { + 0 + } + } + pub fn update_size(&mut self, real_size: usize) { + if self.bitrate != 0 { + let bits = (real_size * 8) as u32; + let tgt_size = self.expected_size(); + + self.fpos += self.tb_num; + while self.fpos >= self.tb_den { + self.fpos -= self.tb_den; + self.bitpool += self.bitrate; + } + self.bitpool = self.bitpool.saturating_sub(bits); + + if bits > tgt_size + tgt_size / 8 { + self.lambda += 0.1; + } + if bits < tgt_size - tgt_size / 8 { + self.lambda -= 0.1; + if self.lambda < 0.0 { + self.lambda = 0.0; + } + } + self.first = false; + } + } + pub fn pattern_run_threshold(&self) -> u8 { + match self.quality { + 1..=39 => 4, + 40..=59 => 3, + 60..=79 => 2, + 80..=89 => 1, + _ => 0, + } + } + pub fn get_quant_ranges(&self) -> [u8; 4] { + match self.quality { + 98..=100 => [ 0, 0, 0, 2 ], + 92..=97 => [ 2, 16, 4, 16 ], + 85..=91 => [ 5, 16, 7, 16 ], + 75..=84 => [ 8, 16, 10, 16 ], + 55..=74 => [ 11, 16, 12, 16 ], + 1..=54 => [ 12, 16, 13, 16 ], + _ => [ 0, 16, 0, 16 ], + } + } + pub fn modify_forbidden_btypes(&self, forbidden: &mut [bool; 12]) { + if self.quality > 98 { + forbidden[usize::from(BlockMode::Intra)] = true; + } + if self.quality > 0 { + if self.quality < 80 { + forbidden[usize::from(BlockMode::Run)] = true; + forbidden[usize::from(BlockMode::Residue)] = true; + } + if self.quality < 90 { + forbidden[usize::from(BlockMode::Raw)] = true; + } + } + } +} diff --git a/nihav-rad/src/codecs/mod.rs b/nihav-rad/src/codecs/mod.rs index 977324c..14db405 100644 --- a/nihav-rad/src/codecs/mod.rs +++ b/nihav-rad/src/codecs/mod.rs @@ -15,7 +15,7 @@ mod smacker; mod binkaud; #[cfg(feature="decoder_binkvid")] mod binkvid; -#[cfg(feature="decoder_binkvid")] +#[cfg(any(feature="decoder_binkvid", feature="encoder_binkvid"))] mod binkviddata; #[cfg(feature="decoder_bink2")] mod bink2; @@ -41,3 +41,20 @@ pub fn rad_register_all_decoders(rd: &mut RegisteredDecoders) { rd.add_decoder(*decoder); } } + +#[cfg(feature="encoder_binkvid")] +mod binkvidenc; + +#[cfg(feature="encoders")] +const ENCODERS: &[EncoderInfo] = &[ +#[cfg(feature="encoder_binkvid")] + EncoderInfo { name: "bink-video", get_encoder: binkvidenc::get_encoder }, +]; + +/// Registers all available encoders provided by this crate. +#[cfg(feature="encoders")] +pub fn rad_register_all_encoders(re: &mut RegisteredEncoders) { + for encoder in ENCODERS.iter() { + re.add_encoder(*encoder); + } +} diff --git a/nihav-rad/src/lib.rs b/nihav-rad/src/lib.rs index f7bce33..743b57b 100644 --- a/nihav-rad/src/lib.rs +++ b/nihav-rad/src/lib.rs @@ -17,6 +17,8 @@ extern crate nihav_codec_support; mod codecs; #[cfg(feature="decoders")] pub use crate::codecs::rad_register_all_decoders; +#[cfg(feature="encoders")] +pub use crate::codecs::rad_register_all_encoders; #[cfg(feature="demuxers")] #[allow(clippy::comparison_chain)] -- 2.39.5