From c5d5793c1fd18882a32acabb8141a221b0a97b61 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Wed, 2 Mar 2022 18:31:00 +0100 Subject: [PATCH] VP7 encoder --- nihav-duck/Cargo.toml | 3 +- nihav-duck/src/codecs/mod.rs | 11 +- nihav-duck/src/codecs/vp6enc/dsp.rs | 19 +- nihav-duck/src/codecs/vp6enc/mb.rs | 13 +- nihav-duck/src/codecs/vp7enc/blocks.rs | 864 ++++++++++++++++++++ nihav-duck/src/codecs/vp7enc/coder.rs | 601 ++++++++++++++ nihav-duck/src/codecs/vp7enc/frame_coder.rs | 603 ++++++++++++++ nihav-duck/src/codecs/vp7enc/mb_coding.rs | 522 ++++++++++++ nihav-duck/src/codecs/vp7enc/mod.rs | 610 ++++++++++++++ nihav-duck/src/codecs/vp7enc/models.rs | 119 +++ nihav-duck/src/codecs/vp7enc/motion_est.rs | 523 ++++++++++++ nihav-duck/src/codecs/vp7enc/rdo.rs | 207 +++++ nihav-duck/src/codecs/vpenc/models.rs | 6 +- nihav-duck/src/codecs/vpenc/motion_est.rs | 104 +-- 14 files changed, 4140 insertions(+), 65 deletions(-) create mode 100644 nihav-duck/src/codecs/vp7enc/blocks.rs create mode 100644 nihav-duck/src/codecs/vp7enc/coder.rs create mode 100644 nihav-duck/src/codecs/vp7enc/frame_coder.rs create mode 100644 nihav-duck/src/codecs/vp7enc/mb_coding.rs create mode 100644 nihav-duck/src/codecs/vp7enc/mod.rs create mode 100644 nihav-duck/src/codecs/vp7enc/models.rs create mode 100644 nihav-duck/src/codecs/vp7enc/motion_est.rs create mode 100644 nihav-duck/src/codecs/vp7enc/rdo.rs diff --git a/nihav-duck/Cargo.toml b/nihav-duck/Cargo.toml index 62b73bd..c7ba4ef 100644 --- a/nihav-duck/Cargo.toml +++ b/nihav-duck/Cargo.toml @@ -37,10 +37,11 @@ decoder_dk4_adpcm = ["decoders"] decoder_on2avc = ["decoders"] all_encoders = ["all_video_encoders"] -all_video_encoders = ["encoder_vp6"] +all_video_encoders = ["encoder_vp6", "encoder_vp7"] encoders = [] encoder_vp6 = ["encoders"] +encoder_vp7 = ["encoders"] all_demuxers = ["demuxer_ivf"] demuxers = [] diff --git a/nihav-duck/src/codecs/mod.rs b/nihav-duck/src/codecs/mod.rs index faf27cc..3e977dc 100644 --- a/nihav-duck/src/codecs/mod.rs +++ b/nihav-duck/src/codecs/mod.rs @@ -43,7 +43,7 @@ mod vp6; #[allow(clippy::needless_range_loop)] #[allow(clippy::useless_let_if_seq)] mod vp7; -#[cfg(feature="decoder_vp7")] +#[cfg(any(feature="decoder_vp7", feature="encoder_vp7"))] mod vp7data; #[cfg(any(feature="decoder_vp7", feature="decoder_vp8"))] mod vp78data; @@ -53,11 +53,11 @@ mod vp78data; #[allow(clippy::too_many_arguments)] #[allow(clippy::useless_let_if_seq)] mod vp7dsp; -#[cfg(any(feature="decoder_vp7", feature="decoder_vp8"))] +#[cfg(any(feature="decoder_vp7", feature="decoder_vp8", feature="encoder_vp7"))] #[allow(clippy::needless_range_loop)] #[allow(clippy::useless_let_if_seq)] mod vp78; -#[cfg(any(feature="decoder_vp7", feature="decoder_vp8"))] +#[cfg(any(feature="decoder_vp7", feature="decoder_vp8", feature="encoder_vp7"))] #[allow(clippy::erasing_op)] #[allow(clippy::needless_range_loop)] #[allow(clippy::too_many_arguments)] @@ -132,12 +132,17 @@ mod vpenc; #[cfg(feature="encoder_vp6")] #[allow(clippy::needless_range_loop)] mod vp6enc; +#[cfg(feature="encoder_vp7")] +#[allow(clippy::needless_range_loop)] +mod vp7enc; const DUCK_ENCODERS: &[EncoderInfo] = &[ #[cfg(feature="encoder_vp6")] EncoderInfo { name: "vp6", get_encoder: vp6enc::get_encoder }, #[cfg(feature="encoder_vp6")] EncoderInfo { name: "vp6f", get_encoder: vp6enc::get_encoder_flv }, +#[cfg(feature="encoder_vp7")] + EncoderInfo { name: "vp7", get_encoder: vp7enc::get_encoder }, ]; /// Registers all available encoders provided by this crate. diff --git a/nihav-duck/src/codecs/vp6enc/dsp.rs b/nihav-duck/src/codecs/vp6enc/dsp.rs index b83cd11..2651458 100644 --- a/nihav-duck/src/codecs/vp6enc/dsp.rs +++ b/nihav-duck/src/codecs/vp6enc/dsp.rs @@ -69,6 +69,21 @@ pub fn vp_fdct(blk: &mut [i16; 64]) { } } +pub trait MVSearchModeCreate { + fn create_search(&self) -> Box; +} + +impl MVSearchModeCreate for MVSearchMode { + fn create_search(&self) -> Box { + match *self { + MVSearchMode::Full => Box::new(FullMVSearch::new()), + MVSearchMode::Diamond => Box::new(DiaSearch::new()), + MVSearchMode::Hexagon => Box::new(HexSearch::new()), + _ => unreachable!(), + } + } +} + const MAX_DIST: u32 = std::u32::MAX; const DIST_THRESH: u32 = 256; @@ -190,10 +205,10 @@ macro_rules! pattern_search { impl MVSearch for $struct_name { fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { - search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb) + search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb, DIST_THRESH) } fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { - search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk) + search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk, DIST_THRESH) } } } diff --git a/nihav-duck/src/codecs/vp6enc/mb.rs b/nihav-duck/src/codecs/vp6enc/mb.rs index de480ca..c7e5003 100644 --- a/nihav-duck/src/codecs/vp6enc/mb.rs +++ b/nihav-duck/src/codecs/vp6enc/mb.rs @@ -355,11 +355,8 @@ impl FrameEncoder { let mut mv_est = MVEstimator::new(ref_frame, mc_buf, loop_thr, self.me_range); - let mut mv_search: Box = match self.me_mode { - MVSearchMode::Full => Box::new(FullMVSearch::new()), - MVSearchMode::Diamond => Box::new(DiaSearch::new()), - MVSearchMode::Hexagon => Box::new(HexSearch::new()), - }; + let mut mv_search = self.me_mode.create_search(); + let mut mb_pos = 0; for (mb_y, row) in inter_mbs.chunks_mut(self.mb_w).enumerate() { for (mb_x, mb) in row.iter_mut().enumerate() { @@ -406,11 +403,7 @@ impl FrameEncoder { let mut mv_est = MVEstimator::new(ref_frame, mc_buf, loop_thr, self.me_range); - let mut mv_search: Box = match self.me_mode { - MVSearchMode::Full => Box::new(FullMVSearch::new()), - MVSearchMode::Diamond => Box::new(DiaSearch::new()), - MVSearchMode::Hexagon => Box::new(HexSearch::new()), - }; + let mut mv_search = self.me_mode.create_search(); for i in 0..4 { let xpos = mb_x * 16 + (i & 1) * 8; diff --git a/nihav-duck/src/codecs/vp7enc/blocks.rs b/nihav-duck/src/codecs/vp7enc/blocks.rs new file mode 100644 index 0000000..bfb8fbd --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/blocks.rs @@ -0,0 +1,864 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use nihav_codec_support::data::GenericCache; +use super::super::vp78::{PredMode, MVSplitMode, SubMVRef}; +use super::super::vp78data::*; +use super::super::vp78dsp::*; +use super::super::vp7data::*; +use super::super::vp7dsp::*; + +#[derive(Clone,Copy)] +pub enum MBType { + Intra(PredMode, PredMode), + Intra4x4([PredMode; 16], [u8; 16], PredMode), + InterNoMV(bool, [u8; 4]), + InterNearest(bool, [u8; 4]), + InterNear(bool, [u8; 4]), + InterMV(bool, [u8; 4], MV), + InterSplitMV(bool, [u8; 4], MVSplitMode, [SubMVRef; 16], [MV; 16]), +} + +impl MBType { + pub fn is_intra(&self) -> bool { + match *self { + MBType::Intra(_, _) | + MBType::Intra4x4(_, _, _) => true, + _ => false, + } + } + pub fn get_last(&self) -> bool { + match *self { + MBType::InterNoMV(last, _) | + MBType::InterNearest(last, _) | + MBType::InterNear(last, _) | + MBType::InterMV(last, _, _) | + MBType::InterSplitMV(last, _, _, _, _) => last, + _ => false, + } + } +} + +impl Default for MBType { + fn default() -> Self { MBType::Intra(PredMode::DCPred, PredMode::DCPred) } +} + +pub fn get_block_difference(dst: &mut [i16; 16], src1: &[u8; 16], src2: &[u8; 16]) { + for (dst, (&src1, &src2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) { + *dst = i16::from(src1) - i16::from(src2); + } +} +pub fn get_difference_dist(old: &[u8; 16], new: &[u8; 16], diff: &[i16; 16]) -> u32 { + let mut dist = 0; + for ((&old, &new), &diff) in old.iter().zip(new.iter()).zip(diff.iter()) { + let nval = (i16::from(new) + diff).max(0).min(255); + let oval = i16::from(old); + dist += (i32::from(nval - oval) * i32::from(nval - oval)) as u32; + } + dist +} + +pub fn requant_y2_dc(val: &mut i16, q: usize) { + *val = *val / Y2_DC_QUANTS[q] * Y2_DC_QUANTS[q]; +} + +pub trait DCTBlock { + fn has_nz(&self) -> bool; + fn fdct(&mut self); + fn idct(&mut self); + fn requant_y(&mut self, q: usize); + fn quant(&mut self, q: usize, ctype: usize); + fn dequant(&mut self, q: usize, ctype: usize); +} + +impl DCTBlock for [i16; 16] { + fn has_nz(&self) -> bool { + for &el in self.iter() { + if el != 0 { + return true; + } + } + false + } + #[allow(clippy::erasing_op)] + #[allow(clippy::identity_op)] + fn fdct(&mut self) { + let mut tmp = [0i16; 16]; + for i in 0..4 { + let s0 = i32::from(self[i + 4 * 0]); + let s1 = i32::from(self[i + 4 * 1]); + let s2 = i32::from(self[i + 4 * 2]); + let s3 = i32::from(self[i + 4 * 3]); + + let t0 = (s0 + s3).wrapping_mul(23170) + 0x2000; + let t1 = (s1 + s2).wrapping_mul(23170); + let t2 = s0 - s3; + let t3 = s1 - s2; + let t4 = t2.wrapping_mul(30274) + t3.wrapping_mul(12540) + 0x2000; + let t5 = t2.wrapping_mul(12540) - t3.wrapping_mul(30274) + 0x2000; + + tmp[i + 0 * 4] = ((t0 + t1) >> 14) as i16; + tmp[i + 1 * 4] = ( t4 >> 14) as i16; + tmp[i + 2 * 4] = ((t0 - t1) >> 14) as i16; + tmp[i + 3 * 4] = ( t5 >> 14) as i16; + } + for (src, dst) in tmp.chunks(4).zip(self.chunks_mut(4)) { + let s0 = i32::from(src[0]); + let s1 = i32::from(src[1]); + let s2 = i32::from(src[2]); + let s3 = i32::from(src[3]); + + let t0 = (s0 + s3).wrapping_mul(23170) + 0x8000; + let t1 = (s1 + s2).wrapping_mul(23170); + let t2 = s0 - s3; + let t3 = s1 - s2; + let t4 = t2.wrapping_mul(30274) + t3.wrapping_mul(12540) + 0x8000; + let t5 = t2.wrapping_mul(12540) - t3.wrapping_mul(30274) + 0x8000; + + dst[0] = ((t0 + t1) >> 16) as i16; + dst[1] = ( t4 >> 16) as i16; + dst[2] = ((t0 - t1) >> 16) as i16; + dst[3] = ( t5 >> 16) as i16; + } + } + fn idct(&mut self) { idct4x4(self) } + fn requant_y(&mut self, q: usize) { + self[0] = self[0] / Y_DC_QUANTS[q] * Y_DC_QUANTS[q]; + for el in self[1..].iter_mut() { + *el = *el / Y_AC_QUANTS[q] * Y_AC_QUANTS[q]; + } + } + fn quant(&mut self, q: usize, ctype: usize) { + let (q_dc, q_ac) = match ctype { + 0 | 3 => (Y_DC_QUANTS[q], Y_AC_QUANTS[q]), + 2 => (UV_DC_QUANTS[q], UV_AC_QUANTS[q]), + _ => (Y2_DC_QUANTS[q], Y2_AC_QUANTS[q]), + }; + self[0] /= q_dc; + for el in self[1..].iter_mut() { + *el /= q_ac; + } + } + fn dequant(&mut self, q: usize, ctype: usize) { + let (q_dc, q_ac) = match ctype { + 0 | 3 => (Y_DC_QUANTS[q], Y_AC_QUANTS[q]), + 2 => (UV_DC_QUANTS[q], UV_AC_QUANTS[q]), + _ => (Y2_DC_QUANTS[q], Y2_AC_QUANTS[q]), + }; + self[0] *= q_dc; + for el in self[1..].iter_mut() { + *el *= q_ac; + } + } +} + +pub trait IPredBlock16 { + fn ipred16(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext); +} +pub trait IPredBlock8 { + fn ipred8 (&mut self, stride: usize, mode: PredMode, ipred: &IPredContext); +} +pub trait IPredBlock4 { + fn ipred4 (&mut self, stride: usize, mode: PredMode, ipred: &IPredContext); +} + +impl IPredBlock16 for [u8; 256] { + fn ipred16(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { + match mode { + PredMode::DCPred => IPred16x16::ipred_dc(self, 0, stride, ipred), + PredMode::HPred => IPred16x16::ipred_h (self, 0, stride, ipred), + PredMode::VPred => IPred16x16::ipred_v (self, 0, stride, ipred), + PredMode::TMPred => IPred16x16::ipred_tm(self, 0, stride, ipred), + _ => {}, + } + } +} +impl IPredBlock8 for [u8; 64] { + fn ipred8(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { + match mode { + PredMode::DCPred => IPred8x8::ipred_dc(self, 0, stride, ipred), + PredMode::HPred => IPred8x8::ipred_h (self, 0, stride, ipred), + PredMode::VPred => IPred8x8::ipred_v (self, 0, stride, ipred), + PredMode::TMPred => IPred8x8::ipred_tm(self, 0, stride, ipred), + _ => {}, + } + } +} +impl IPredBlock4 for &mut [u8] { + fn ipred4(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { + match mode { + PredMode::DCPred => IPred4x4::ipred_dc(self, 0, stride, ipred), + PredMode::HPred => IPred4x4::ipred_he(self, 0, stride, ipred), + PredMode::VPred => IPred4x4::ipred_ve(self, 0, stride, ipred), + PredMode::TMPred => IPred4x4::ipred_tm(self, 0, stride, ipred), + PredMode::LDPred => IPred4x4::ipred_ld(self, 0, stride, ipred), + PredMode::RDPred => IPred4x4::ipred_rd(self, 0, stride, ipred), + PredMode::VRPred => IPred4x4::ipred_vr(self, 0, stride, ipred), + PredMode::VLPred => IPred4x4::ipred_vl(self, 0, stride, ipred), + PredMode::HDPred => IPred4x4::ipred_hd(self, 0, stride, ipred), + PredMode::HUPred => IPred4x4::ipred_hu(self, 0, stride, ipred), + _ => {}, + } + } +} +impl IPredBlock4 for [u8; 16] { + fn ipred4(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { + (self as &mut [u8]).ipred4(stride, mode, ipred); + } +} + +pub struct LumaIterator<'a> { + luma: &'a [u8; 256], + blkno: usize, +} + +impl<'a> Iterator for LumaIterator<'a> { + type Item = [u8; 16]; + fn next(&mut self) -> Option { + if self.blkno < 16 { + let mut blk = [0; 16]; + let off = (self.blkno & 3) * 4 + (self.blkno >> 2) * 16 * 4; + for (dst, src) in blk.chunks_exact_mut(4).zip(self.luma[off..].chunks(16)) { + dst.copy_from_slice(&src[..4]); + } + self.blkno += 1; + Some(blk) + } else { + None + } + } +} + +pub struct ChromaIterator<'a> { + chroma: &'a [u8; 64], + blkno: usize, +} + +impl<'a> Iterator for ChromaIterator<'a> { + type Item = [u8; 16]; + fn next(&mut self) -> Option { + if self.blkno < 4 { + let mut blk = [0; 16]; + let off = (self.blkno & 1) * 4 + (self.blkno >> 1) * 8 * 4; + for (dst, src) in blk.chunks_exact_mut(4).zip(self.chroma[off..].chunks(8)) { + dst.copy_from_slice(&src[..4]); + } + self.blkno += 1; + Some(blk) + } else { + None + } + } +} + +pub struct SrcBlock { + pub luma: [u8; 256], + pub chroma: [[u8; 64]; 2], +} + +impl Default for SrcBlock { + fn default() -> Self { + unsafe { std::mem::zeroed() } + } +} + +impl SrcBlock { + pub fn new() -> Self { Self::default() } + pub fn is_flat(&self) -> bool { + let y0 = self.luma[0]; + for &el in self.luma[1..].iter() { + if el != y0 { + return false; + } + } + true + } + pub fn apply_ipred_luma(&self, mode: PredMode, ipred: &IPredContext, dst: &mut Residue) { + let mut tmp = [0; 256]; + (&mut tmp).ipred16(16, mode, ipred); + dst.set_luma_from_diff(&self.luma, &tmp); + } + pub fn fill_ipred_luma(&mut self, mode: PredMode, ipred: &IPredContext) { + self.luma.ipred16(16, mode, ipred); + } + pub fn apply_ipred_chroma(&self, mode: PredMode, ipred_u: &IPredContext, ipred_v: &IPredContext, dst: &mut Residue) { + let mut tmp = [[0u8; 64]; 2]; + tmp[0].ipred8(8, mode, ipred_u); + tmp[1].ipred8(8, mode, ipred_v); + dst.set_chroma_from_diff(&self.chroma, &tmp); + } + pub fn fill_ipred_chroma(&mut self, mode: PredMode, ipred_u: &IPredContext, ipred_v: &IPredContext) { + self.chroma[0].ipred8(8, mode, ipred_u); + self.chroma[1].ipred8(8, mode, ipred_v); + } + + pub fn luma_blocks(&self) -> LumaIterator { + LumaIterator{ luma: &self.luma, blkno: 0 } + } + pub fn chroma_blocks(&self, plane: usize) -> ChromaIterator { + ChromaIterator{ chroma: &self.chroma[plane], blkno: 0 } + } +} + +#[derive(Clone)] +pub struct Residue { + pub luma: [[i16; 16]; 16], + pub dcs: [i16; 16], + pub chroma: [[[i16; 16]; 4]; 2], + pub has_dc: bool, + pub q: u8, +} + +impl Default for Residue { + fn default() -> Self { + unsafe { std::mem::zeroed() } + } +} + +impl Residue { + pub fn new() -> Self { Self::default() } + pub fn reset(&mut self) { + self.has_dc = false; + self.q = 242; + } + pub fn add_residue(&mut self, dst: &mut SrcBlock) { + self.dequant(); + self.idct(); + + for (dst, src) in dst.luma.chunks_mut(16 * 4).zip(self.luma.chunks(4)) { + for (x, blk) in src.iter().enumerate() { + for (drow, srow) in dst[x * 4..].chunks_mut(16).zip(blk.chunks(4)) { + for (del, &sel) in drow.iter_mut().zip(srow.iter()) { + *del = (i16::from(*del) + sel).max(0).min(255) as u8; + } + } + } + } + for (dchroma, schroma) in dst.chroma.iter_mut().zip(self.chroma.iter()) { + for (dst, src) in dchroma.chunks_mut(8 * 4).zip(schroma.chunks(2)) { + for (x, blk) in src.iter().enumerate() { + for (drow, srow) in dst[x * 4..].chunks_mut(8).zip(blk.chunks(4)) { + for (del, &sel) in drow.iter_mut().zip(srow.iter()) { + *del = (i16::from(*del) + sel).max(0).min(255) as u8; + } + } + } + } + } + } + pub fn add_residue_chroma(&mut self, dst: &mut SrcBlock) { + let q = self.q as usize; + for (dchroma, schroma) in dst.chroma.iter_mut().zip(self.chroma.iter_mut()) { + for (dst, src) in dchroma.chunks_mut(8 * 4).zip(schroma.chunks_mut(2)) { + for (x, blk) in src.iter_mut().enumerate() { + blk[0] *= UV_DC_QUANTS[q]; + for el in blk[1..].iter_mut() { + if *el != 0 { + *el *= UV_AC_QUANTS[q]; + } + } + blk.idct(); + for (drow, srow) in dst[x * 4..].chunks_mut(8).zip(blk.chunks(4)) { + for (del, &sel) in drow.iter_mut().zip(srow.iter()) { + *del = (i16::from(*del) + sel).max(0).min(255) as u8; + } + } + } + } + } + } + pub fn set_luma_from_diff(&mut self, blk1: &[u8; 256], blk2: &[u8; 256]) { + for (dst, (src1, src2)) in self.luma.chunks_mut(4).zip(blk1.chunks(16 * 4).zip(blk2.chunks(16 * 4))) { + for (x, blk) in dst.iter_mut().enumerate() { + for (dst, (row1, row2)) in blk.chunks_mut(4).zip(src1[x * 4..].chunks(16).zip(src2[x * 4..].chunks(16))) { + for (dst, (&a, &b)) in dst.iter_mut().zip(row1.iter().zip(row2.iter())) { + *dst = i16::from(a) - i16::from(b); + } + } + } + } + } + pub fn set_chroma_from_diff(&mut self, blk1: &[[u8; 64]; 2], blk2: &[[u8; 64]; 2]) { + for (chroma, (src1, src2)) in self.chroma.iter_mut().zip(blk1.iter().zip(blk2.iter())) { + for (dst, (src1, src2)) in chroma.chunks_mut(2).zip(src1.chunks(8 * 4).zip(src2.chunks(8 * 4))) { + for (x, blk) in dst.iter_mut().enumerate() { + for (dst, (row1, row2)) in blk.chunks_mut(4).zip(src1[x * 4..].chunks(8).zip(src2[x * 4..].chunks(8))) { + for (dst, (&a, &b)) in dst.iter_mut().zip(row1.iter().zip(row2.iter())) { + *dst = i16::from(a) - i16::from(b); + } + } + } + } + } + } + pub fn fdct(&mut self) { + self.fdct_luma(); + self.fdct_chroma(); + } + pub fn fdct_luma(&mut self) { + for blk in self.luma.iter_mut() { + blk.fdct(); + } + } + pub fn fdct_chroma(&mut self) { + for chroma in self.chroma.iter_mut() { + for blk in chroma.iter_mut() { + blk.fdct(); + } + } + } + pub fn fdct_dc_block(&mut self) { + for (dc, blk) in self.dcs.iter_mut().zip(self.luma.iter_mut()) { + *dc = blk[0]; + blk[0] = 0; + } + self.dcs.fdct(); + self.has_dc = true; + } + pub fn idct(&mut self) { + self.idct_luma(); + self.idct_chroma(); + } + pub fn idct_luma(&mut self) { + if self.has_dc { + self.dcs.idct(); + for (&dc, blk) in self.dcs.iter().zip(self.luma.iter_mut()) { + blk[0] = dc; + } + } + for blk in self.luma.iter_mut() { + blk.idct(); + } + } + pub fn idct_chroma(&mut self) { + for chroma in self.chroma.iter_mut() { + for blk in chroma.iter_mut() { + blk.idct(); + } + } + } + pub fn quant(&mut self, q: usize) { + self.quant_luma(q); + self.quant_chroma(q); + self.q = q as u8; + } + pub fn quant_luma(&mut self, q: usize) { + if self.has_dc { + self.dcs[0] /= Y2_DC_QUANTS[q]; + for el in self.dcs[1..].iter_mut() { + if *el != 0 { + *el /= Y2_AC_QUANTS[q]; + } + } + } + for blk in self.luma.iter_mut() { + blk[0] /= Y_DC_QUANTS[q]; + for el in blk[1..].iter_mut() { + if *el != 0 { + *el /= Y_AC_QUANTS[q]; + } + } + } + self.q = q as u8; + } + pub fn quant_chroma(&mut self, q: usize) { + for chroma in self.chroma.iter_mut() { + for blk in chroma.iter_mut() { + blk[0] /= UV_DC_QUANTS[q]; + for el in blk[1..].iter_mut() { + if *el != 0 { + *el /= UV_AC_QUANTS[q]; + } + } + } + } + self.q = q as u8; + } + pub fn dequant(&mut self) { + self.dequant_luma(); + self.dequant_chroma(); + } + pub fn dequant_luma(&mut self) { + let q = self.q as usize; + if self.has_dc { + self.dcs[0] *= Y2_DC_QUANTS[q]; + for el in self.dcs[1..].iter_mut() { + if *el != 0 { + *el *= Y2_AC_QUANTS[q]; + } + } + } + for blk in self.luma.iter_mut() { + blk[0] *= Y_DC_QUANTS[q]; + for el in blk[1..].iter_mut() { + if *el != 0 { + *el *= Y_AC_QUANTS[q]; + } + } + } + } + pub fn dequant_chroma(&mut self) { + let q = self.q as usize; + for chroma in self.chroma.iter_mut() { + for blk in chroma.iter_mut() { + blk[0] *= UV_DC_QUANTS[q]; + for el in blk[1..].iter_mut() { + if *el != 0 { + *el *= UV_AC_QUANTS[q]; + } + } + } + } + } +} + +pub fn load_blocks(src: &NAVideoBuffer, sblocks: &mut Vec) { + let data = src.get_data(); + let y = &data[src.get_offset(0)..]; + let u = &data[src.get_offset(1)..]; + let v = &data[src.get_offset(2)..]; + let ystride = src.get_stride(0); + let ustride = src.get_stride(1); + let vstride = src.get_stride(2); + let (width, height) = src.get_dimensions(0); + + sblocks.clear(); + for (ystrip, (ustrip, vstrip)) in y.chunks(ystride * 16).take((height + 15) / 16).zip(u.chunks(ustride * 8).zip(v.chunks(vstride * 8))) { + for x in (0..width).step_by(16) { + let mut sblk = SrcBlock::default(); + + for (dst, src) in sblk.luma.chunks_mut(16).zip(ystrip[x..].chunks(ystride)) { + dst.copy_from_slice(&src[..16]); + } + for (dst, src) in sblk.chroma[0].chunks_mut(8).zip(ustrip[x / 2..].chunks(ustride)) { + dst.copy_from_slice(&src[..8]); + } + for (dst, src) in sblk.chroma[1].chunks_mut(8).zip(vstrip[x / 2..].chunks(vstride)) { + dst.copy_from_slice(&src[..8]); + } + sblocks.push(sblk); + } + } +} + +pub struct YModePred { + pub cache: GenericCache, +} + +impl YModePred { + fn resize(&mut self, mb_w: usize) { + self.cache = GenericCache::new(4, mb_w * 4 + 1, PredMode::DCPred); + } + pub fn set_mode(&mut self, mb_x: usize, mode: PredMode) { + for row in self.cache.data[self.cache.xpos + mb_x * 4..].chunks_mut(self.cache.stride).take(4) { + for el in row[..4].iter_mut() { + *el = mode.to_b_mode(); + } + } + } + pub fn set_modes4x4(&mut self, mb_x: usize, imodes: &[PredMode; 16], ctx: &mut [u8; 16]) { + let mut off = self.cache.xpos + mb_x * 4; + for y in 0..4 { + for x in 0..4 { + let top_idx = self.cache.data[off + x - self.cache.stride].to_b_index(); + let left_idx = self.cache.data[off + x - 1].to_b_index(); + self.cache.data[off + x] = imodes[x + y * 4]; + ctx[x + y * 4] = ((top_idx * 10) + left_idx) as u8; + } + off += self.cache.stride; + } + } +} + +impl Default for YModePred { + fn default() -> Self { + Self { + cache: GenericCache::new(0, 0, PredMode::DCPred) + } + } +} + +#[derive(Default)] +pub struct BlockPCtx { + pub nz_y2: u8, + pub nz_y_top: [bool; 4], + pub nz_y_left: [bool; 4], + pub nz_c_top: [[bool; 2]; 2], + pub nz_c_left: [[bool; 2]; 2], +} + +#[derive(Default)] +pub struct PredContext { + pub mb_w: usize, + pub mb_h: usize, + + pub top_line_y: Vec, + pub top_line_u: Vec, + pub top_line_v: Vec, + pub tl_y: u8, + pub tl_u: u8, + pub tl_v: u8, + + pub left_y: [u8; 16], + pub left_u: [u8; 16], + pub left_v: [u8; 16], + + pub dc_last: [i16; 2], + pub dc_count: [usize; 2], + dc_last_saved: [i16; 2], + dc_count_saved: [usize; 2], + pub nz_y2_top: Vec, + pub nz_y2_left: bool, + pub nz_y_top: Vec, + pub nz_y_left: [bool; 4], + pub nz_c_top: [Vec; 2], + pub nz_c_left: [[bool; 2]; 2], + + pub ymodes: YModePred, + + pub mvs: Vec, + pub mv_stride: usize, + pub version: u8, +} + +impl PredContext { + pub fn new() -> Self { Self::default() } + pub fn resize(&mut self, mb_w: usize, mb_h: usize) { + self.mb_w = mb_w; + self.mb_h = mb_h; + + self.top_line_y.resize(mb_w * 16 + 1, 0); + self.top_line_u.resize(mb_w * 8 + 1, 0); + self.top_line_v.resize(mb_w * 8 + 1, 0); + + self.nz_y2_top.resize(mb_w, false); + self.nz_y_top.resize(mb_w * 4, false); + self.nz_c_top[0].resize(mb_w * 2, false); + self.nz_c_top[1].resize(mb_w * 2, false); + + self.ymodes.resize(mb_w); + + self.mv_stride = mb_w * 4; + self.mvs.resize(self.mv_stride * mb_h * 4, ZERO_MV); + } + + pub fn reset(&mut self) { + for el in self.top_line_y.iter_mut() { *el = 0x80; } + for el in self.top_line_u.iter_mut() { *el = 0x80; } + for el in self.top_line_v.iter_mut() { *el = 0x80; } + self.left_y = [0x80; 16]; + self.left_u = [0x80; 16]; + self.left_v = [0x80; 16]; + self.tl_y = 0x80; + self.tl_u = 0x80; + self.tl_v = 0x80; + + for el in self.nz_y_top.iter_mut() { *el = false; } + self.nz_y_left = [false; 4]; + for el in self.nz_y2_top.iter_mut() { *el = false; } + self.nz_y2_left = false; + for el in self.nz_c_top[0].iter_mut() { *el = false; } + for el in self.nz_c_top[1].iter_mut() { *el = false; } + self.nz_c_left = [[false; 2]; 2]; + + self.ymodes.cache.reset(); + + for mv in self.mvs.iter_mut() { *mv = ZERO_MV; } + } + pub fn reset_intra(&mut self) { + self.dc_last = [0; 2]; + self.dc_count = [0; 2]; + self.dc_last_saved = [0; 2]; + self.dc_count_saved = [0; 2]; + } + pub fn save_dc_pred(&mut self) { + self.dc_last_saved = self.dc_last; + self.dc_count_saved = self.dc_count; + } + #[allow(dead_code)] + pub fn restore_dc_pred(&mut self) { + self.dc_last = self.dc_last_saved; + self.dc_count = self.dc_count_saved; + } + pub fn update_mb_row(&mut self) { + self.left_y = [0x80; 16]; + self.left_u = [0x80; 16]; + self.left_v = [0x80; 16]; + self.tl_y = 0x80; + self.tl_u = 0x80; + self.tl_v = 0x80; + self.ymodes.cache.update_row(); + } + pub fn update_mb(&mut self, sblk: &SrcBlock, mb_x: usize) { + for (dst, src) in self.left_y.iter_mut().zip(sblk.luma.chunks_exact(16)) { + *dst = src[15]; + } + self.tl_y = self.top_line_y[mb_x * 16 + 16]; + self.top_line_y[mb_x * 16 + 1..][..16].copy_from_slice(&sblk.luma[15 * 16..]); + + for (dst, src) in self.left_u.iter_mut().zip(sblk.chroma[0].chunks_exact(8)) { + *dst = src[7]; + } + self.tl_u = self.top_line_u[mb_x * 8 + 8]; + self.top_line_u[mb_x * 8 + 1..][..8].copy_from_slice(&sblk.chroma[0][7 * 8..]); + + for (dst, src) in self.left_v.iter_mut().zip(sblk.chroma[1].chunks_exact(8)) { + *dst = src[7]; + } + self.tl_v = self.top_line_v[mb_x * 8 + 8]; + self.top_line_v[mb_x * 8 + 1..][..8].copy_from_slice(&sblk.chroma[1][7 * 8..]); + } + pub fn fill_ipred(&mut self, plane: usize, mb_x: usize, ipred: &mut IPredContext) { + match plane { + 0 => { + if ipred.has_top { + ipred.top.copy_from_slice(&self.top_line_y[mb_x * 16 + 1..][..16]); + ipred.tl = self.tl_y; + } + ipred.left.copy_from_slice(&self.left_y); + ipred.has_left = mb_x > 0; + }, + 1 => { + if ipred.has_top { + ipred.top[..8].copy_from_slice(&self.top_line_u[mb_x * 8 + 1..][..8]); + ipred.tl = self.tl_u; + } + ipred.left.copy_from_slice(&self.left_u); + ipred.has_left = mb_x > 0; + }, + _ => { + if ipred.has_top { + ipred.top[..8].copy_from_slice(&self.top_line_v[mb_x * 8 + 1..][..8]); + ipred.tl = self.tl_v; + } + ipred.left.copy_from_slice(&self.left_v); + ipred.has_left = mb_x > 0; + }, + } + } + pub fn get_ipred_tr(&self, mb_x: usize) -> [u8; 4] { + if mb_x < self.mb_w - 1 { + let mut tr = [0; 4]; + tr.copy_from_slice(&self.top_line_y[mb_x * 16 + 1 + 16..][..4]); + tr + } else { + [0x80; 4] + } + } + pub fn fill_pctx(&self, mb_x: usize, pctx: &mut BlockPCtx) { + pctx.nz_y2 = (self.nz_y2_left as u8) + (self.nz_y2_top[mb_x] as u8); + pctx.nz_y_left = self.nz_y_left; + pctx.nz_y_top.copy_from_slice(&self.nz_y_top[mb_x * 4..][..4]); + pctx.nz_c_left = self.nz_c_left; + pctx.nz_c_top = [[self.nz_c_top[0][mb_x * 2], self.nz_c_top[0][mb_x * 2 + 1]], + [self.nz_c_top[1][mb_x * 2], self.nz_c_top[1][mb_x * 2 + 1]]]; + } + pub fn set_nz(&mut self, mb_x: usize, blk: &Residue) { + if blk.has_dc { + let has_nz = blk.dcs.has_nz(); + self.nz_y2_left = has_nz; + self.nz_y2_top[mb_x] = has_nz; + } + for (y, blk_row) in blk.luma.chunks(4).enumerate() { + for (x, blk) in blk_row.iter().enumerate() { + let has_nz = blk.has_nz(); + self.nz_y_left[y] = has_nz; + self.nz_y_top[mb_x * 4 + x] = has_nz; + } + } + for (c, chroma) in blk.chroma.iter().enumerate() { + for (y, blk_row) in chroma.chunks(2).enumerate() { + for (x, blk) in blk_row.iter().enumerate() { + let has_nz = blk.has_nz(); + self.nz_c_left[c][y] = has_nz; + self.nz_c_top[c][mb_x * 2 + x] = has_nz; + } + } + } + } + + pub fn get_y2_dc_pred(&self, last: bool) -> i16 { + let ref_id = !last as usize; + if self.dc_count[ref_id] > 3 { + self.dc_last[ref_id] + } else { + 0 + } + } + pub fn predict_y2_dc(&mut self, dc: &mut i16, last: bool) { + let ref_id = !last as usize; + let pdc = self.dc_last[ref_id]; + let orig_dc = *dc; + + if self.dc_count[ref_id] > 3 { + *dc -= pdc; + } + + if (pdc == 0) || (orig_dc == 0) || ((pdc ^ orig_dc) < 0) { + self.dc_count[ref_id] = 0; + } else if pdc == orig_dc { + self.dc_count[ref_id] += 1; + } + self.dc_last[ref_id] = orig_dc; + } + + pub fn fill_mv(&mut self, mb_x: usize, mb_y: usize, mv: MV) { + let mut iidx = mb_x * 4 + mb_y * 4 * self.mv_stride; + for _ in 0..4 { + for x in 0..4 { + self.mvs[iidx + x] = mv; + } + iidx += self.mb_w * 4; + } + } + pub fn find_mv_pred(&self, mb_x: usize, mb_y: usize) -> ([u8; 4], MV, MV, MV) { + let mut nearest_mv = ZERO_MV; + let mut near_mv = ZERO_MV; + + let mut ct: [u8; 4] = [0; 4]; + + let start = if self.version == 0 { 1 } else { 0 }; + let mvwrap = (self.mb_w as isize) + 1; + for (yoff, xoff, weight, blk_no) in CAND_POS.iter() { + let cx = (mb_x as isize) + (*xoff as isize); + let cy = (mb_y as isize) + (*yoff as isize); + let mvpos = cx + cy * mvwrap; + if (mvpos < start) || ((mvpos % mvwrap) == (mvwrap - 1)) { + ct[0] += weight; + continue; + } + let cx = (mvpos % mvwrap) as usize; + let cy = (mvpos / mvwrap) as usize; + let bx = (*blk_no as usize) & 3; + let by = (*blk_no as usize) >> 2; + let blk_pos = cx * 4 + bx + (cy * 4 + by) * self.mv_stride; + let mv = self.mvs[blk_pos]; + if mv == ZERO_MV { + ct[0] += weight; + continue; + } + let idx; + if (nearest_mv == ZERO_MV) || (nearest_mv == mv) { + nearest_mv = mv; + idx = 1; + } else if near_mv == ZERO_MV { + near_mv = mv; + idx = 2; + } else { + idx = if mv == near_mv { 2 } else { 3 }; + } + ct[idx] += weight; + } + let pred_mv = if ct[1] > ct[2] { + if ct[1] >= ct[0] { nearest_mv } else { ZERO_MV } + } else { + if ct[2] >= ct[0] { near_mv } else { ZERO_MV } + }; + + let mvprobs = [INTER_MODE_PROBS[ct[0] as usize][0], + INTER_MODE_PROBS[ct[1] as usize][1], + INTER_MODE_PROBS[ct[2] as usize][2], + INTER_MODE_PROBS[ct[2] as usize][3]]; + + (mvprobs, nearest_mv, near_mv, pred_mv) + } +} diff --git a/nihav-duck/src/codecs/vp7enc/coder.rs b/nihav-duck/src/codecs/vp7enc/coder.rs new file mode 100644 index 0000000..fcd885a --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/coder.rs @@ -0,0 +1,601 @@ +use nihav_core::codecs::EncoderResult; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use crate::codecs::vpenc::coder::*; +use super::super::vpcommon::*; +use super::super::vp78::*; +use super::super::vp78data::*; +use super::super::vp7data::*; +use super::blocks::MBType; +use super::models::*; +pub use crate::codecs::vpenc::coder::{BoolEncoder, Estimator}; + +const KF_Y_MODE_TREE: &[TokenSeq] = &[ + bit_seq!(PredMode::BPred; F; 0), + bit_seq!(PredMode::DCPred; T, F, F; 0, 1, 2), + bit_seq!(PredMode::VPred; T, F, T; 0, 1, 2), + bit_seq!(PredMode::HPred; T, T, F; 0, 1, 3), + bit_seq!(PredMode::TMPred; T, T, T; 0, 1, 3), +]; + +const Y_MODE_TREE: &[TokenSeq] = &[ + bit_seq!(PredMode::DCPred; F; 0), + bit_seq!(PredMode::VPred; T, F, F; 0, 1, 2), + bit_seq!(PredMode::HPred; T, F, T; 0, 1, 2), + bit_seq!(PredMode::TMPred; T, T, F; 0, 1, 3), + bit_seq!(PredMode::BPred; T, T, T; 0, 1, 3), +]; + +const UV_MODE_TREE: &[TokenSeq] = &[ + bit_seq!(PredMode::DCPred; F; 0), + bit_seq!(PredMode::VPred; T, F; 0, 1), + bit_seq!(PredMode::HPred; T, T, F; 0, 1, 2), + bit_seq!(PredMode::TMPred; T, T, T; 0, 1, 2), +]; + +const B_MODE_TREE: &[TokenSeq] = &[ + bit_seq!(PredMode::DCPred; F; 0), + bit_seq!(PredMode::TMPred; T, F; 0, 1), + bit_seq!(PredMode::VPred; T, T, F; 0, 1, 2), + bit_seq!(PredMode::HPred; T, T, T, F, F; 0, 1, 2, 3, 4), + bit_seq!(PredMode::RDPred; T, T, T, F, T, F; 0, 1, 2, 3, 4, 5), + bit_seq!(PredMode::VRPred; T, T, T, F, T, T; 0, 1, 2, 3, 4, 5), + bit_seq!(PredMode::LDPred; T, T, T, T, F; 0, 1, 2, 3, 6), + bit_seq!(PredMode::VLPred; T, T, T, T, T, F; 0, 1, 2, 3, 6, 7), + bit_seq!(PredMode::HDPred; T, T, T, T, T, T, F; 0, 1, 2, 3, 6, 7, 8), + bit_seq!(PredMode::HUPred; T, T, T, T, T, T, T; 0, 1, 2, 3, 6, 7, 8), +]; + +const MV_REF_TREE: &[TokenSeq] = &[ + bit_seq!(VPMBType::InterNoMV; F; 0), + bit_seq!(VPMBType::InterNearest; T, F; 0, 1), + bit_seq!(VPMBType::InterNear; T, T, F; 0, 1, 2), + bit_seq!(VPMBType::InterMV; T, T, T, F; 0, 1, 2, 3), + bit_seq!(VPMBType::InterFourMV; T, T, T, T; 0, 1, 2, 3), +]; + +const COEF_TREE: &[TokenSeq] = &[ + bit_seq!(DCTToken::EOB; F; 0), + bit_seq!(DCTToken::Zero; T, F; 0, 1), + bit_seq!(DCTToken::One; T, T, F; 0, 1, 2), + bit_seq!(DCTToken::Two; T, T, T, F, F; 0, 1, 2, 3, 4), + bit_seq!(DCTToken::Three; T, T, T, F, T, F; 0, 1, 2, 3, 4, 5), + bit_seq!(DCTToken::Four; T, T, T, F, T, T; 0, 1, 2, 3, 4, 5), + bit_seq!(DCTToken::Cat1; T, T, T, T, F, F; 0, 1, 2, 3, 6, 7), + bit_seq!(DCTToken::Cat2; T, T, T, T, F, T; 0, 1, 2, 3, 6, 7), + bit_seq!(DCTToken::Cat3; T, T, T, T, T, F, F; 0, 1, 2, 3, 6, 8, 9), + bit_seq!(DCTToken::Cat4; T, T, T, T, T, F, T; 0, 1, 2, 3, 6, 8, 9), + bit_seq!(DCTToken::Cat5; T, T, T, T, T, T, F; 0, 1, 2, 3, 6, 8, 10), + bit_seq!(DCTToken::Cat6; T, T, T, T, T, T, T; 0, 1, 2, 3, 6, 8, 10), +]; + +const MV_TREE: &[TokenSeq] = &[ + bit_seq!(0; F, F, F, F; 0, 2, 3, 4), + bit_seq!(1; F, F, F, T; 0, 2, 3, 4), + bit_seq!(2; F, F, T, F; 0, 2, 3, 5), + bit_seq!(3; F, F, T, T; 0, 2, 3, 5), + bit_seq!(4; F, T, F, F; 0, 2, 6, 7), + bit_seq!(5; F, T, F, T; 0, 2, 6, 7), + bit_seq!(6; F, T, T, F; 0, 2, 6, 8), + bit_seq!(7; F, T, T, T; 0, 2, 6, 8), + bit_seq!(8; T; 0), +]; + +const MV_SPLIT_MODE_TREE: &[TokenSeq] = &[ + bit_seq!(MVSplitMode::Sixteenths; F; 0), + bit_seq!(MVSplitMode::Quarters; T, F; 0, 1), + bit_seq!(MVSplitMode::TopBottom; T, T, F; 0, 1, 2), + bit_seq!(MVSplitMode::LeftRight; T, T, T; 0, 1, 2), +]; + +const SUB_MV_REF_TREE: &[TokenSeq] = &[ + bit_seq!(SubMVRef::Left; F; 0), + bit_seq!(SubMVRef::Above; T, F; 0, 1), + bit_seq!(SubMVRef::Zero; T, T, F; 0, 1, 2), + bit_seq!(SubMVRef::New; T, T, T; 0, 1, 2), +]; + +const FEATURE_TREE: &[TokenSeq] = &[ + bit_seq!(0; F, F; 0, 1), + bit_seq!(1; F, T; 0, 1), + bit_seq!(2; T, F; 0, 2), + bit_seq!(3; T, T; 0, 2) +]; + +pub trait VP7BoolEncoder { + fn put_byte(&mut self, val: u8) -> EncoderResult<()>; + fn write_large_coef(&mut self, val: i16, cat: usize) -> EncoderResult<()>; + fn encode_subblock(&mut self, blk: &[i16; 16], ctype: usize, pctx: u8, models: &VP7Models) -> EncoderResult<()>; + fn encode_mv_component(&mut self, val: i16, probs: &[u8; 17]) -> EncoderResult<()>; + fn encode_mv(&mut self, mv: MV, models: &VP7Models) -> EncoderResult<()>; + fn encode_sub_mv(&mut self, stype: SubMVRef, mv: MV, models: &VP7Models) -> EncoderResult<()>; + fn encode_feature(&mut self, id: usize, feat: Option, models: &VP7Models) -> EncoderResult<()>; + fn encode_mb_type(&mut self, is_intra: bool, mb_type: &MBType, models: &VP7Models) -> EncoderResult<()>; +} + +impl<'a, 'b> VP7BoolEncoder for BoolEncoder<'a, 'b> { + fn put_byte(&mut self, val: u8) -> EncoderResult<()> { + self.put_bits(u32::from(val), 8) + } + fn write_large_coef(&mut self, val: i16, cat: usize) -> EncoderResult<()> { + let base = VP56_COEF_BASE[cat]; + let mut probs = VP56_COEF_ADD_PROBS[cat].iter(); + let add = val.abs() - base; + let mut mask = 1 << (VP6_COEF_ADD_BITS[cat] - 1); + while mask != 0 { + self.put_bool((add & mask) != 0, *probs.next().unwrap())?; + mask >>= 1; + } + self.put_bool(val < 0, 128)?; + + Ok(()) + } + fn encode_subblock(&mut self, blk: &[i16; 16], ctype: usize, pctx: u8, models: &VP7Models) -> EncoderResult<()> { + let probs = &models.coef_probs[ctype]; + + let start = if ctype != 0 { 0 } else { 1 }; + let mut cval = pctx as usize; + + let mut last = 16; + for &idx in DEFAULT_SCAN_ORDER.iter().skip(start) { + if blk[idx] != 0 { + last = idx; + } + } + + if last == 16 { + self.write_el(DCTToken::EOB, COEF_TREE, &probs[COEF_BANDS[start]][cval])?; + return Ok(()); + } + + for i in start..16 { + let val = blk[DEFAULT_SCAN_ORDER[i]]; + let token = match val.abs() { + 0 => DCTToken::Zero, + 1 => DCTToken::One, + 2 => DCTToken::Two, + 3 => DCTToken::Three, + 4 => DCTToken::Four, + 5..=6 => DCTToken::Cat1, + 7 ..=10 => DCTToken::Cat2, + 11..=18 => DCTToken::Cat3, + 19..=34 => DCTToken::Cat4, + 35..=66 => DCTToken::Cat5, + _ => DCTToken::Cat6, + }; + self.write_el(token, COEF_TREE, &probs[COEF_BANDS[i]][cval])?; + match token { + DCTToken::Zero => {}, + DCTToken::One | + DCTToken::Two | + DCTToken::Three | + DCTToken::Four => self.put_bool(val < 0, 128)?, + DCTToken::Cat1 => self.write_large_coef(val, 0)?, + DCTToken::Cat2 => self.write_large_coef(val, 1)?, + DCTToken::Cat3 => self.write_large_coef(val, 2)?, + DCTToken::Cat4 => self.write_large_coef(val, 3)?, + DCTToken::Cat5 => self.write_large_coef(val, 4)?, + DCTToken::Cat6 => self.write_large_coef(val, 5)?, + DCTToken::EOB => {}, + }; + cval = val.abs().min(2) as usize; + + if DEFAULT_SCAN_ORDER[i] == last { + if DEFAULT_SCAN_ORDER[i] != 15 { + self.write_el(DCTToken::EOB, COEF_TREE, &probs[COEF_BANDS[i + 1]][cval])?; + } + break; + } + } + Ok(()) + } + fn encode_mv_component(&mut self, val: i16, probs: &[u8; 17]) -> EncoderResult<()> { + let aval = val.abs(); + self.write_el(aval.min(8), MV_TREE, probs)?; + if aval >= 8 { + for &ord in LONG_VECTOR_ORDER.iter() { + self.put_bool(((aval >> ord) & 1) != 0, probs[ord + 9])?; + } + if (aval & 0xF0) != 0 { + self.put_bool((aval & (1 << 3)) != 0, probs[3 + 9])?; + } + } + if val != 0 { + self.put_bool(val < 0, probs[1])?; + } + Ok(()) + } + fn encode_mv(&mut self, mv: MV, models: &VP7Models) -> EncoderResult<()> { + self.encode_mv_component(mv.y, &models.mv_probs[0])?; + self.encode_mv_component(mv.x, &models.mv_probs[1])?; + Ok(()) + } + fn encode_sub_mv(&mut self, stype: SubMVRef, mv: MV, models: &VP7Models) -> EncoderResult<()> { + self.write_el(stype, SUB_MV_REF_TREE, &SUB_MV_REF_PROBS)?; + if stype == SubMVRef::New { + self.encode_mv_component(mv.y, &models.mv_probs[0])?; + self.encode_mv_component(mv.x, &models.mv_probs[1])?; + } + Ok(()) + } + fn encode_feature(&mut self, id: usize, feat: Option, models: &VP7Models) -> EncoderResult<()> { + self.put_bool(feat.is_some(), models.feature_present[id])?; + if let Some(num) = feat { + self.write_el(num, FEATURE_TREE, &models.feature_tree_probs[id])?; + } + Ok(()) + } + fn encode_mb_type(&mut self, is_intra: bool, mb_type: &MBType, models: &VP7Models) -> EncoderResult<()> { + if !is_intra { + self.put_bool(!mb_type.is_intra(), models.prob_intra_pred)?; + if !mb_type.is_intra() { + let last = mb_type.get_last(); + self.put_bool(!last, models.prob_last_pred)?; + } + } + match *mb_type { + MBType::Intra(ymode, uvmode) => { + if is_intra { + self.write_el(ymode, KF_Y_MODE_TREE, KF_Y_MODE_TREE_PROBS)?; + self.write_el(uvmode, UV_MODE_TREE, KF_UV_MODE_TREE_PROBS)?; + } else { + self.write_el(ymode, Y_MODE_TREE, &models.kf_ymode_prob)?; + self.write_el(uvmode, UV_MODE_TREE, &models.kf_uvmode_prob)?; + } + }, + MBType::Intra4x4(ymodes, yctx, uvmode) => { + if is_intra { + self.write_el(PredMode::BPred, KF_Y_MODE_TREE, KF_Y_MODE_TREE_PROBS)?; + for (&ypred, &yctx) in ymodes.iter().zip(yctx.iter()) { + let top_idx = (yctx / 10) as usize; + let left_idx = (yctx % 10) as usize; + self.write_el(ypred, B_MODE_TREE, &KF_B_MODE_TREE_PROBS[top_idx][left_idx])?; + } + self.write_el(uvmode, UV_MODE_TREE, KF_UV_MODE_TREE_PROBS)?; + } else { + self.write_el(PredMode::BPred, Y_MODE_TREE, &models.kf_ymode_prob)?; + for &ypred in ymodes.iter() { + self.write_el(ypred, B_MODE_TREE, B_MODE_TREE_PROBS)?; + } + self.write_el(uvmode, UV_MODE_TREE, &models.kf_uvmode_prob)?; + } + }, + MBType::InterNoMV(_last, ref mv_probs) => { + self.write_el(VPMBType::InterNoMV, MV_REF_TREE, mv_probs)?; + }, + MBType::InterNearest(_last, ref mv_probs) => { + self.write_el(VPMBType::InterNearest, MV_REF_TREE, mv_probs)?; + }, + MBType::InterNear(_last, ref mv_probs) => { + self.write_el(VPMBType::InterNear, MV_REF_TREE, mv_probs)?; + }, + MBType::InterMV(_last, ref mv_probs, mv) => { + self.write_el(VPMBType::InterMV, MV_REF_TREE, mv_probs)?; + self.encode_mv(mv, models)?; + }, + MBType::InterSplitMV(_last, ref mv_probs, split_mode, stypes, mvs) => { + self.write_el(VPMBType::InterFourMV, MV_REF_TREE, mv_probs)?; + self.write_el(split_mode, MV_SPLIT_MODE_TREE, &MV_SPLIT_MODE_PROBS)?; + match split_mode { + MVSplitMode::TopBottom | MVSplitMode::LeftRight => { + for (&stype, &mv) in stypes.iter().zip(mvs.iter()).take(2) { + self.encode_sub_mv(stype, mv, models)?; + } + }, + MVSplitMode::Quarters => { + for (&stype, &mv) in stypes.iter().zip(mvs.iter()).take(4) { + self.encode_sub_mv(stype, mv, models)?; + } + }, + MVSplitMode::Sixteenths => { + for (&stype, &mv) in stypes.iter().zip(mvs.iter()) { + self.encode_sub_mv(stype, mv, models)?; + } + }, + }; + }, + }; + Ok(()) + } +} + +pub fn encode_dct_coef_prob_upd(bc: &mut BoolEncoder, coef_probs: &[[[[u8; 11]; 3]; 8]; 4], prev: &[[[[u8; 11]; 3]; 8]; 4]) -> EncoderResult<()> { + for ((new, old), upd) in coef_probs.iter().zip(prev.iter()).zip(DCT_UPDATE_PROBS.iter()) { + for ((new, old), upd) in new.iter().zip(old.iter()).zip(upd.iter()) { + for ((new, old), upd) in new.iter().zip(old.iter()).zip(upd.iter()) { + for ((&new, &old), &upd) in new.iter().zip(old.iter()).zip(upd.iter()) { + bc.put_bool(new != old, upd)?; + if new != old { + bc.put_byte(new)?; + } + } + } + } + } + Ok(()) +} + +pub fn encode_mv_prob_upd(bc: &mut BoolEncoder, mv_probs: &[[u8; 17]; 2], prev: &[[u8; 17]; 2]) -> EncoderResult<()> { + for ((new, old), upd) in mv_probs.iter().zip(prev.iter()).zip(MV_UPDATE_PROBS.iter()) { + for ((&new, &old), &upd) in new.iter().zip(old.iter()).zip(upd.iter()) { + bc.put_bool(new != old, upd)?; + if new != old { + bc.put_bits(u32::from(new) >> 1, 7)?; + } + } + } + Ok(()) +} + +pub trait VP7Estimator { + fn estimate_subblock(&self, blk: &[i16; 16], ctype: usize, pctx: u8, models: &mut VP7ModelsStat); + fn estimate_mv_component(&self, val: i16, probs: &mut [ProbCounter; 17]); + fn estimate_mv(&self, mv: MV, models: &mut VP7ModelsStat); + fn estimate_sub_mv(&self, stype: SubMVRef, mv: MV, models: &mut VP7ModelsStat); + fn estimate_mb_type(&self, is_intra: bool, mb_type: &MBType, models: &mut VP7ModelsStat); + fn estimate_feature(&self, id: usize, feat: Option, models: &mut VP7ModelsStat); +} + +impl VP7Estimator for Estimator { + fn estimate_subblock(&self, blk: &[i16; 16], ctype: usize, pctx: u8, models: &mut VP7ModelsStat) { + let probs = &mut models.coef_probs[ctype]; + + let start = if ctype != 0 { 0 } else { 1 }; + let mut cval = pctx as usize; + + let mut last = 16; + for &idx in DEFAULT_SCAN_ORDER.iter().skip(start) { + if blk[idx] != 0 { + last = idx; + } + } + + if last == 16 { + self.write_el(DCTToken::EOB, COEF_TREE, &mut probs[COEF_BANDS[start]][cval]); + return; + } + + for i in start..16 { + let val = blk[DEFAULT_SCAN_ORDER[i]]; + let token = match val.abs() { + 0 => DCTToken::Zero, + 1 => DCTToken::One, + 2 => DCTToken::Two, + 3 => DCTToken::Three, + 4 => DCTToken::Four, + 5..=6 => DCTToken::Cat1, + 7 ..=10 => DCTToken::Cat2, + 11..=18 => DCTToken::Cat3, + 19..=34 => DCTToken::Cat4, + 35..=66 => DCTToken::Cat5, + _ => DCTToken::Cat6, + }; + self.write_el(token, COEF_TREE, &mut probs[COEF_BANDS[i]][cval]); + cval = val.abs().min(2) as usize; + + if DEFAULT_SCAN_ORDER[i] == last { + if DEFAULT_SCAN_ORDER[i] != 15 { + self.write_el(DCTToken::EOB, COEF_TREE, &mut probs[COEF_BANDS[i + 1]][cval]); + } + break; + } + } + } + fn estimate_mv_component(&self, val: i16, probs: &mut [ProbCounter; 17]) { + let aval = val.abs(); + self.write_el(aval.min(8), MV_TREE, probs); + if aval >= 8 { + for &ord in LONG_VECTOR_ORDER.iter() { + probs[ord + 9].add(((aval >> ord) & 1) != 0); + } + if (aval & 0xF0) != 0 { + probs[3 + 9].add((aval & (1 << 3)) != 0); + } + } + if val != 0 { + probs[1].add(val < 0); + } + } + fn estimate_mv(&self, mv: MV, models: &mut VP7ModelsStat) { + self.estimate_mv_component(mv.y, &mut models.mv_probs[0]); + self.estimate_mv_component(mv.x, &mut models.mv_probs[1]); + } + fn estimate_sub_mv(&self, stype: SubMVRef, mv: MV, models: &mut VP7ModelsStat) { + if stype == SubMVRef::New { + self.estimate_mv_component(mv.y, &mut models.mv_probs[0]); + self.estimate_mv_component(mv.x, &mut models.mv_probs[1]); + } + } + fn estimate_mb_type(&self, is_intra: bool, mb_type: &MBType, models: &mut VP7ModelsStat) { + if !is_intra { + models.prob_intra_pred.add(!mb_type.is_intra()); + if !mb_type.is_intra() { + let last = mb_type.get_last(); + models.prob_last_pred.add(!last); + } + } + match *mb_type { + MBType::Intra(ymode, cmode) => { + if !is_intra { + self.write_el(ymode, Y_MODE_TREE, &mut models.kf_ymode_prob); + self.write_el(cmode, UV_MODE_TREE, &mut models.kf_uvmode_prob); + } + }, + MBType::Intra4x4(_, _, cmode) => { + if !is_intra { + self.write_el(PredMode::BPred, Y_MODE_TREE, &mut models.kf_ymode_prob); + self.write_el(cmode, UV_MODE_TREE, &mut models.kf_uvmode_prob); + } + }, + MBType::InterMV(_last, _, mv) => { + self.estimate_mv(mv, models); + }, + MBType::InterSplitMV(_last, _, split_mode, stypes, mvs) => { + match split_mode { + MVSplitMode::TopBottom | MVSplitMode::LeftRight => { + for (&stype, &mv) in stypes.iter().zip(mvs.iter()).take(2) { + self.estimate_sub_mv(stype, mv, models); + } + }, + MVSplitMode::Quarters => { + for (&stype, &mv) in stypes.iter().zip(mvs.iter()).take(4) { + self.estimate_sub_mv(stype, mv, models); + } + }, + MVSplitMode::Sixteenths => { + for (&stype, &mv) in stypes.iter().zip(mvs.iter()) { + self.estimate_sub_mv(stype, mv, models); + } + }, + }; + }, + _ => {}, + }; + } + fn estimate_feature(&self, id: usize, feat: Option, models: &mut VP7ModelsStat) { + models.feature_present[id].add(feat.is_some()); + if let Some(num) = feat { + self.write_el(num, FEATURE_TREE, &mut models.feature_tree_probs[id]); + } + } +} + +fn code_nits(el: T, tree: &[TokenSeq], probs: &[u8]) -> u32 { + let mut nits = 0; + for entry in tree.iter() { + if entry.val == el { + for seq in entry.seq.iter() { + nits += Estimator::est_nits(seq.bit, probs[seq.idx as usize]); + } + return nits; + } + } + 0 +} +pub fn b_mode_nits(mode: PredMode) -> u32 { + code_nits(mode, B_MODE_TREE, &KF_B_MODE_TREE_PROBS[2][2]) // todo find better context +} +fn mv_component_nits(val: i16, probs: &[u8; 17]) -> u32 { + let aval = val.abs(); + let mut nits = code_nits(aval.min(8), MV_TREE, probs); + if aval >= 8 { + for &ord in LONG_VECTOR_ORDER.iter() { + nits += Estimator::est_nits(((aval >> ord) & 1) != 0, probs[ord + 9]); + } + if (aval & 0xF0) != 0 { + nits += Estimator::est_nits((aval & (1 << 3)) != 0, probs[3 + 9]); + } + } + if val != 0 { + nits += u32::from(PROB_BITS[128]); + } + nits +} +pub fn inter_mv_nits(mv: MV, mvprobs: &[u8; 4], nearest_mv: MV, near_mv: MV, pred_mv: MV, models: &VP7Models) -> u32 { + if mv == ZERO_MV { + code_nits(VPMBType::InterNoMV, MV_REF_TREE, mvprobs) + } else if mv == nearest_mv { + code_nits(VPMBType::InterNearest, MV_REF_TREE, mvprobs) + } else if mv == near_mv { + code_nits(VPMBType::InterNear, MV_REF_TREE, mvprobs) + } else { + let dmv = mv - pred_mv; + let mut nits = code_nits(VPMBType::InterMV, MV_REF_TREE, mvprobs); + nits += mv_component_nits(dmv.y, &models.mv_probs[0]); + nits += mv_component_nits(dmv.x, &models.mv_probs[1]); + nits + } +} +pub fn sub_mv_mode_nits(mode: MVSplitMode) -> u32 { + code_nits(mode, MV_SPLIT_MODE_TREE, &MV_SPLIT_MODE_PROBS) +} +pub fn sub_mv_nits(mv: MV, left_mv: MV, top_mv: MV, pred_mv: MV, models: &VP7Models) -> (SubMVRef, u32) { + if mv == ZERO_MV { + (SubMVRef::Zero, code_nits(SubMVRef::Zero, SUB_MV_REF_TREE, &SUB_MV_REF_PROBS)) + } else if mv == left_mv { + (SubMVRef::Left, code_nits(SubMVRef::Left, SUB_MV_REF_TREE, &SUB_MV_REF_PROBS)) + } else if mv == top_mv { + (SubMVRef::Above, code_nits(SubMVRef::Above, SUB_MV_REF_TREE, &SUB_MV_REF_PROBS)) + } else { + let dmv = mv - pred_mv; + let mut nits = code_nits(SubMVRef::New, SUB_MV_REF_TREE, &SUB_MV_REF_PROBS); + nits += mv_component_nits(dmv.y, &models.mv_probs[0]); + nits += mv_component_nits(dmv.x, &models.mv_probs[1]); + (SubMVRef::New, nits) + } +} +fn est_large_coef(val: i16, cat: usize) -> u32 { + let base = VP56_COEF_BASE[cat]; + let mut probs = VP56_COEF_ADD_PROBS[cat].iter(); + let add = val.abs() - base; + let mut mask = 1 << (VP6_COEF_ADD_BITS[cat] - 1); + let mut nits = 0; + while mask != 0 { + nits += Estimator::est_nits((add & mask) != 0, *probs.next().unwrap()); + mask >>= 1; + } + nits += u32::from(PROB_BITS[128]); + + nits +} + +pub fn estimate_subblock_nits(blk: &[i16; 16], ctype: usize, pctx: u8, probs: &[[[u8; 11]; 3]; 8]) -> u32 { + let start = if ctype != 0 { 0 } else { 1 }; + let mut cval = pctx as usize; + + let mut last = 16; + for &idx in DEFAULT_SCAN_ORDER.iter().skip(start) { + if blk[idx] != 0 { + last = idx; + } + } + + if last == 16 { + return code_nits(DCTToken::EOB, COEF_TREE, &probs[COEF_BANDS[start]][cval]); + } + + let mut nits = 0; + for i in start..16 { + let val = blk[DEFAULT_SCAN_ORDER[i]]; + let token = match val.abs() { + 0 => DCTToken::Zero, + 1 => DCTToken::One, + 2 => DCTToken::Two, + 3 => DCTToken::Three, + 4 => DCTToken::Four, + 5..=6 => DCTToken::Cat1, + 7 ..=10 => DCTToken::Cat2, + 11..=18 => DCTToken::Cat3, + 19..=34 => DCTToken::Cat4, + 35..=66 => DCTToken::Cat5, + _ => DCTToken::Cat6, + }; + nits += code_nits(token, COEF_TREE, &probs[COEF_BANDS[i]][cval]); + nits += match token { + DCTToken::Zero => 0, + DCTToken::One | + DCTToken::Two | + DCTToken::Three | + DCTToken::Four => u32::from(PROB_BITS[128]), + DCTToken::Cat1 => est_large_coef(val, 0), + DCTToken::Cat2 => est_large_coef(val, 1), + DCTToken::Cat3 => est_large_coef(val, 2), + DCTToken::Cat4 => est_large_coef(val, 3), + DCTToken::Cat5 => est_large_coef(val, 4), + DCTToken::Cat6 => est_large_coef(val, 5), + DCTToken::EOB => 0, + }; + cval = val.abs().min(2) as usize; + + if DEFAULT_SCAN_ORDER[i] == last { + if DEFAULT_SCAN_ORDER[i] != 15 { + nits += code_nits(DCTToken::EOB, COEF_TREE, &probs[COEF_BANDS[i + 1]][cval]); + } + break; + } + } + nits +} + +const COEF_BANDS: [usize; 16] = [ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 ]; +const VP6_COEF_ADD_BITS: [u8; 6] = [ 1, 2, 3, 4, 5, 11 ]; +const LONG_VECTOR_ORDER: [usize; 7] = [ 0, 1, 2, 7, 6, 5, 4 ]; diff --git a/nihav-duck/src/codecs/vp7enc/frame_coder.rs b/nihav-duck/src/codecs/vp7enc/frame_coder.rs new file mode 100644 index 0000000..080ab95 --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/frame_coder.rs @@ -0,0 +1,603 @@ +use nihav_core::codecs::*; +use nihav_codec_support::codecs::ZERO_MV; +use super::super::vp78::PredMode; +use super::super::vp78dsp::*; +use super::super::vp7dsp::*; +use super::blocks::*; +use super::coder::*; +use super::mb_coding::*; +use super::models::*; +use super::motion_est::*; +use super::rdo::*; + +const MBT_Q_OFFSET: usize = 3; + +pub struct LoopParams { + pub loop_sharpness: u8, + pub loop_filter_level: u8, + pub lf_simple: bool, +} + +pub struct FrameEncoder { + mb_w: usize, + mb_h: usize, + pub loop_params: LoopParams, + + sblocks: Vec, + res: Vec, + mbtypes: Vec, + recon: Vec, + features: Vec, + has_features: bool, + + pctx: PredContext, + + me_mode: MVSearchMode, + me_range: i16, + mc_buf1: NAVideoBufferRef, + mc_buf2: NAVideoBufferRef, + mv_search_last: Box, + mv_search_gold: Box, +} + +impl FrameEncoder { + pub fn new(mc_buf1: NAVideoBufferRef, mc_buf2: NAVideoBufferRef) -> Self { + let me_mode = MVSearchMode::default(); + + Self { + mb_w: 0, + mb_h: 0, + + sblocks: Vec::new(), + res: Vec::new(), + mbtypes: Vec::new(), + recon: Vec::new(), + features: Vec::new(), + has_features: false, + + pctx: PredContext::new(), + + loop_params: LoopParams { + loop_filter_level: 0, + loop_sharpness: 0, + lf_simple: true, + }, + me_mode, + me_range: 0, + mv_search_last: me_mode.create_search(), + mv_search_gold: me_mode.create_search(), + mc_buf1, mc_buf2, + } + } + pub fn resize(&mut self, mb_w: usize, mb_h: usize) { + self.mb_w = mb_w; + self.mb_h = mb_h; + + self.pctx.resize(mb_w, mb_h); + + self.sblocks.clear(); + self.sblocks.reserve(mb_w * mb_h); + self.res.clear(); + self.res.reserve(mb_w * mb_h); + self.mbtypes.clear(); + self.mbtypes.reserve(mb_w * mb_h); + self.recon.clear(); + self.recon.reserve(mb_w * mb_h); + self.features.clear(); + self.features.reserve(mb_w * mb_h); + } + pub fn set_me_params(&mut self, me_mode: MVSearchMode, me_range: i16, version: u8) { + self.me_range = me_range; + if self.me_mode != me_mode { + self.me_mode = me_mode; + self.mv_search_last = me_mode.create_search(); + self.mv_search_gold = me_mode.create_search(); + } + self.pctx.version = version; + } + pub fn load_frame(&mut self, vbuf: &NAVideoBuffer) { + load_blocks(vbuf, &mut self.sblocks); + } + + pub fn mb_tree_search(&mut self, ref_frm: NAVideoBufferRef, mb_map: &[usize], new_mb_map: &mut [usize], mb_weights: &mut [usize]) { + let mut mv_est = MVEstimator::new(ref_frm, self.mc_buf1.clone(), self.me_range); + self.mv_search_last.preinit(&mv_est); + let mut mb_idx = 0; + new_mb_map.copy_from_slice(mb_map); + for (mb_y, mb_row) in self.sblocks.chunks(self.mb_w).enumerate() { + for (mb_x, blk) in mb_row.iter().enumerate() { + let (mv, _) = self.mv_search_last.search_mb(&mut mv_est, blk, mb_x, mb_y); + + if mv != ZERO_MV { + let new_x = ((((mb_x as isize) * 64 + (mv.x as isize) + 32) >> 6).max(0) as usize).min(self.mb_w - 1); + let new_y = ((((mb_y as isize) * 64 + (mv.y as isize) + 32) >> 6).max(0) as usize).min(self.mb_h - 1); + let nidx = new_x + new_y * self.mb_w; + new_mb_map[mb_idx] = mb_map[nidx]; + } + mb_weights[new_mb_map[mb_idx]] += 1; + mb_idx += 1; + } + } + } + + pub fn intra_blocks(&mut self, base_q: usize, metric: &RateDistMetric, models: &VP7Models, mbt_map: Option<&[usize]>) { + self.mbtypes.clear(); + self.pctx.reset(); + self.pctx.reset_intra(); + self.res.clear(); + self.recon.clear(); + self.features.clear(); + + self.has_features = false; + if base_q > MBT_Q_OFFSET { + if let Some(map) = mbt_map { + let sum: usize = map.iter().sum(); + let size = map.len(); + let avg = (sum + size / 2) / size; + for &val in map.iter() { + if val > avg { + self.features.push(1); + self.has_features = true; + } else { + self.features.push(0); + } + } + } else { + for _ in 0..(self.mb_w * self.mb_h) { + self.features.push(0); + } + } + } + + let mut imctx = IntraModePredCtx { + metric, + models, + tr: [0; 4], + q: base_q, + ipred_y: IPredContext::default(), + ipred_u: IPredContext::default(), + ipred_v: IPredContext::default(), + pctx: BlockPCtx::default(), + }; + + for (mb_y, mb_row) in self.sblocks.chunks_mut(self.mb_w).enumerate() { + imctx.ipred_y.has_top = mb_y != 0; + imctx.ipred_u.has_top = mb_y != 0; + imctx.ipred_v.has_top = mb_y != 0; + + for (mb_x, sblk) in mb_row.iter().enumerate() { + self.pctx.fill_ipred(0, mb_x, &mut imctx.ipred_y); + self.pctx.fill_ipred(1, mb_x, &mut imctx.ipred_u); + self.pctx.fill_ipred(2, mb_x, &mut imctx.ipred_v); + self.pctx.fill_pctx(mb_x, &mut imctx.pctx); + if self.has_features { + imctx.q = if self.features[mb_x + mb_y * self.mb_w] != 0 { + base_q - MBT_Q_OFFSET + } else { + base_q + }; + } + + let mut res = Residue::new(); + let mut newblk = SrcBlock::default(); + + imctx.tr = self.pctx.get_ipred_tr(mb_x); + let mut mb_type = select_intra_mode(sblk, &mut newblk, &mut res, &imctx, MAX_DIST, MBType::InterNoMV(false, [0;4])); + + let use_i4 = match mb_type { + MBType::Intra(best_ymode, best_cmode) => { + sblk.apply_ipred_luma(best_ymode, &imctx.ipred_y, &mut res); + newblk.fill_ipred_luma(best_ymode, &imctx.ipred_y); + sblk.apply_ipred_chroma(best_cmode, &imctx.ipred_u, &imctx.ipred_v, &mut res); + newblk.fill_ipred_chroma(best_cmode, &imctx.ipred_u, &imctx.ipred_v); + res.fdct(); + res.fdct_dc_block(); + + self.pctx.ymodes.set_mode(mb_x, best_ymode); + + false + }, + MBType::Intra4x4(ref i4_modes, ref mut i4ctx, best_cmode) => { + sblk.apply_ipred_chroma(best_cmode, &imctx.ipred_u, &imctx.ipred_v, &mut res); + newblk.fill_ipred_chroma(best_cmode, &imctx.ipred_u, &imctx.ipred_v); + res.fdct(); + + self.pctx.ymodes.set_modes4x4(mb_x, i4_modes, i4ctx); + + true + }, + _ => unreachable!(), + }; + + res.quant(imctx.q); + self.pctx.set_nz(mb_x, &res); + let mut recon = res.clone(); + self.res.push(res); + self.mbtypes.push(mb_type); + + if !use_i4 { + recon.add_residue(&mut newblk); + } else { + recon.add_residue_chroma(&mut newblk); + } + + self.pctx.update_mb(&newblk, mb_x); + self.recon.push(newblk); + } + self.pctx.update_mb_row(); + } + } + pub fn inter_blocks(&mut self, q: usize, metric: &RateDistMetric, models: &VP7Models, last_frame: &NABufferType, gold_frame: &NABufferType) { + self.has_features = false; + + let mut mv_est_last = MVEstimator::new(last_frame.get_vbuf().unwrap(), self.mc_buf1.clone(), self.me_range); + self.mv_search_last.preinit(&mv_est_last); + let mut mv_est_gold = if let Some(gbuf) = gold_frame.get_vbuf() { + let mv_est = MVEstimator::new(gbuf, self.mc_buf2.clone(), self.me_range); + self.mv_search_gold.preinit(&mv_est); + Some(mv_est) + } else { + None + }; + + self.mbtypes.clear(); + self.pctx.reset(); + self.pctx.save_dc_pred(); + self.res.clear(); + self.recon.clear(); + self.features.clear(); + + let mut imctx = IntraModePredCtx { + metric, + models, + tr: [0; 4], + q, + ipred_y: IPredContext::default(), + ipred_u: IPredContext::default(), + ipred_v: IPredContext::default(), + pctx: BlockPCtx::default(), + }; + + for (mb_y, mb_row) in self.sblocks.chunks_mut(self.mb_w).enumerate() { + imctx.ipred_y.has_top = mb_y != 0; + imctx.ipred_u.has_top = mb_y != 0; + imctx.ipred_v.has_top = mb_y != 0; + + for (mb_x, sblk) in mb_row.iter().enumerate() { + self.pctx.fill_ipred(0, mb_x, &mut imctx.ipred_y); + self.pctx.fill_ipred(1, mb_x, &mut imctx.ipred_u); + self.pctx.fill_ipred(2, mb_x, &mut imctx.ipred_v); + self.pctx.fill_pctx(mb_x, &mut imctx.pctx); + + let mut res = Residue::new(); + let mut newblk = SrcBlock::default(); + + let (mvprobs, nearest_mv, near_mv, pred_mv) = self.pctx.find_mv_pred(mb_x, mb_y); + + let (mv, _dist) = self.mv_search_last.search_mb(&mut mv_est_last, sblk, mb_x, mb_y); + + mv_est_last.get_mb(&mut newblk, mb_x, mb_y, mv); + let mv_nits_dist = metric.calc_metric(0, inter_mv_nits(mv, &mvprobs, nearest_mv, near_mv, pred_mv, models)); + let last_dist = calc_inter_mb_dist(sblk, &newblk, &mut res, &imctx, self.pctx.get_y2_dc_pred(true)) + mv_nits_dist; + + let (gmv, gold_dist) = if last_dist > SMALL_DIST { + if let Some(ref mut mv_est) = &mut mv_est_gold { + let (gmv, _gdist) = self.mv_search_gold.search_mb(mv_est, sblk, mb_x, mb_y); + mv_est.get_mb(&mut newblk, mb_x, mb_y, gmv); + let mv_nits_dist = metric.calc_metric(0, inter_mv_nits(gmv, &mvprobs, nearest_mv, near_mv, pred_mv, models)); + let gdist = calc_inter_mb_dist(sblk, &newblk, &mut res, &imctx, self.pctx.get_y2_dc_pred(false)) + mv_nits_dist; + (gmv, gdist) + } else { + (ZERO_MV, MAX_DIST) + } + } else { + (ZERO_MV, MAX_DIST) + }; + + let (last, mut inter_dist, mv, mv_est) = if last_dist < gold_dist { + (true, last_dist, mv, &mut mv_est_last) + } else if let Some (ref mut mv_est) = &mut mv_est_gold { + (false, gold_dist, gmv, mv_est) + } else { + unreachable!() + }; + + let mut mb_type = if mv == ZERO_MV { + MBType::InterNoMV(last, mvprobs) + } else if mv == nearest_mv { + MBType::InterNearest(last, mvprobs) + } else if mv == near_mv { + MBType::InterNear(last, mvprobs) + } else { + MBType::InterMV(last, mvprobs, mv - pred_mv) + }; + if inter_dist > SMALL_DIST { + if let MBType::InterMV(_, _, _) = mb_type { // xxx: maybe do it for all types? + let mv_search = if last { &mut self.mv_search_last } else { &mut self.mv_search_gold }; + if let Some((mbt, dist)) = try_inter_split(sblk, &mut newblk, &mut res, mvprobs, nearest_mv, near_mv, pred_mv, last, mb_x, mb_y, mv_search, mv_est, &mut self.pctx, &imctx, inter_dist) { + mb_type = mbt; + inter_dist = dist; + } + } + } + + if inter_dist > SMALL_DIST { + imctx.tr = self.pctx.get_ipred_tr(mb_x); + mb_type = select_intra_mode(sblk, &mut newblk, &mut res, &imctx, inter_dist, mb_type); + } + + self.mbtypes.push(mb_type); + res.reset(); + match mb_type { + MBType::Intra(ymode, cmode) => { + newblk.fill_ipred_luma(ymode, &imctx.ipred_y); + newblk.fill_ipred_chroma(cmode, &imctx.ipred_u, &imctx.ipred_v); + self.pctx.ymodes.set_mode(mb_x, ymode); + self.pctx.fill_mv(mb_x, mb_y, ZERO_MV); + }, + MBType::Intra4x4(ref i4_modes, ref mut i4ctx, cmode) => { + newblk.fill_ipred_chroma(cmode, &imctx.ipred_u, &imctx.ipred_v); + self.pctx.ymodes.set_modes4x4(mb_x, i4_modes, i4ctx); + self.pctx.fill_mv(mb_x, mb_y, ZERO_MV); + }, + MBType::InterNoMV(_, _) | + MBType::InterNearest(_, _) | + MBType::InterNear(_, _) | + MBType::InterMV(_, _, _) => { + mv_est.get_mb(&mut newblk, mb_x, mb_y, mv); + self.pctx.fill_mv(mb_x, mb_y, mv); + self.pctx.ymodes.set_mode(mb_x, PredMode::Inter); + }, + MBType::InterSplitMV(_, _, _, _, _) => { + self.pctx.ymodes.set_mode(mb_x, PredMode::Inter); + recon_split_mb(&mut newblk, mb_x, mb_y, &self.pctx.mvs, self.pctx.mv_stride, mv_est); + }, + }; + if let MBType::Intra4x4(_, _, _) = mb_type { + res.set_chroma_from_diff(&sblk.chroma, &newblk.chroma); + res.fdct(); + } else { + res.set_luma_from_diff(&sblk.luma, &newblk.luma); + res.set_chroma_from_diff(&sblk.chroma, &newblk.chroma); + res.fdct(); + res.fdct_dc_block(); + if !mb_type.is_intra() { + requant_y2_dc(&mut res.dcs[0], q); + self.pctx.predict_y2_dc(&mut res.dcs[0], last); + } + } + + res.quant(q); + self.pctx.set_nz(mb_x, &res); + let mut recon = res.clone(); + self.res.push(res); + self.features.push(0); + if let MBType::Intra4x4(_, _, _) = mb_type { + recon.add_residue_chroma(&mut newblk); + } else { + recon.add_residue(&mut newblk); + } + self.pctx.update_mb(&newblk, mb_x); + self.recon.push(newblk); + } + self.pctx.update_mb_row(); + } + } + pub fn encode_features(&self, bc: &mut BoolEncoder, q: usize, models: &VP7Models) -> EncoderResult<()> { + if self.has_features { + // first feature - quantiser + bc.put_bool(true, 128)?; + bc.put_byte(models.feature_present[0])?; + for &prob in models.feature_tree_probs[0].iter() { + bc.put_bool(prob != 255, 128)?; + if prob != 255 { + bc.put_byte(prob)?; + } + } + bc.put_bool(true, 128)?; + bc.put_bits((q - MBT_Q_OFFSET) as u32, 7)?; + for _ in 1..4 { + bc.put_bool(false, 128)?; // other quants + } + + // other features ( + for _ in 1..4 { + bc.put_bool(false, 128)?; + } + } else { + for _ in 0..4 { + bc.put_bool(false, 128)?; + } + } + Ok(()) + } + pub fn encode_mb_types(&self, bc: &mut BoolEncoder, is_intra: bool, models: &VP7Models) -> EncoderResult<()> { + for (mb_type, &feature) in self.mbtypes.iter().zip(self.features.iter()) { + if self.has_features { + bc.encode_feature(0, if feature == 0 { None } else { Some(0) }, models)?; + } + bc.encode_mb_type(is_intra, mb_type, models)?; + } + Ok(()) + } + pub fn encode_residues(&mut self, bc: &mut BoolEncoder, models: &VP7Models) -> EncoderResult<()> { + self.pctx.reset(); + //self.pctx.restore_dc_pred(); + for (_mb_y, mb_row) in self.res.chunks(self.mb_w).enumerate() { + for (mb_x, blk) in mb_row.iter().enumerate() { + if blk.has_dc { + let pctx = (self.pctx.nz_y2_left as u8) + (self.pctx.nz_y2_top[mb_x] as u8); + bc.encode_subblock(&blk.dcs, 1, pctx, models)?; + let has_nz = blk.dcs.has_nz(); + self.pctx.nz_y2_left = has_nz; + self.pctx.nz_y2_top[mb_x] = has_nz; + } + let ytype = if blk.has_dc { 0 } else { 3 }; + for (y, blk_row) in blk.luma.chunks(4).enumerate() { + for (x, blk) in blk_row.iter().enumerate() { + let pctx = (self.pctx.nz_y_left[y] as u8) + (self.pctx.nz_y_top[mb_x * 4 + x] as u8); + bc.encode_subblock(blk, ytype, pctx, models)?; + let has_nz = blk.has_nz(); + self.pctx.nz_y_left[y] = has_nz; + self.pctx.nz_y_top[mb_x * 4 + x] = has_nz; + } + } + + for (c, chroma) in blk.chroma.iter().enumerate() { + for (y, blk_row) in chroma.chunks(2).enumerate() { + for (x, blk) in blk_row.iter().enumerate() { + let pctx = (self.pctx.nz_c_left[c][y] as u8) + (self.pctx.nz_c_top[c][mb_x * 2 + x] as u8); + bc.encode_subblock(blk, 2, pctx, models)?; + let has_nz = blk.has_nz(); + self.pctx.nz_c_left[c][y] = has_nz; + self.pctx.nz_c_top[c][mb_x * 2 + x] = has_nz; + } + } + } + } + self.pctx.update_mb_row(); + } + Ok(()) + } + pub fn generate_models(&mut self, is_intra: bool, stats: &mut VP7ModelsStat) { + stats.reset(); + let est = Estimator::new(); + self.pctx.reset(); + if self.has_features { + for &feat in self.features.iter() { + est.estimate_feature(0, if feat == 0 { None } else { Some(0) }, stats); + } + } + for (mbt_row, mb_row) in self.mbtypes.chunks(self.mb_w).zip(self.res.chunks(self.mb_w)) { + for (mb_x, (mbtype, blk)) in mbt_row.iter().zip(mb_row.iter()).enumerate() { + est.estimate_mb_type(is_intra, mbtype, stats); + if blk.has_dc { + let pctx = (self.pctx.nz_y2_left as u8) + (self.pctx.nz_y2_top[mb_x] as u8); + est.estimate_subblock(&blk.dcs, 1, pctx, stats); + let has_nz = blk.dcs.has_nz(); + self.pctx.nz_y2_left = has_nz; + self.pctx.nz_y2_top[mb_x] = has_nz; + } + let ytype = if blk.has_dc { 0 } else { 3 }; + for (y, blk_row) in blk.luma.chunks(4).enumerate() { + for (x, blk) in blk_row.iter().enumerate() { + let pctx = (self.pctx.nz_y_left[y] as u8) + (self.pctx.nz_y_top[mb_x * 4 + x] as u8); + est.estimate_subblock(blk, ytype, pctx, stats); + let has_nz = blk.has_nz(); + self.pctx.nz_y_left[y] = has_nz; + self.pctx.nz_y_top[mb_x * 4 + x] = has_nz; + } + } + + for (c, chroma) in blk.chroma.iter().enumerate() { + for (y, blk_row) in chroma.chunks(2).enumerate() { + for (x, blk) in blk_row.iter().enumerate() { + let pctx = (self.pctx.nz_c_left[c][y] as u8) + (self.pctx.nz_c_top[c][mb_x * 2 + x] as u8); + est.estimate_subblock(blk, 2, pctx, stats); + let has_nz = blk.has_nz(); + self.pctx.nz_c_left[c][y] = has_nz; + self.pctx.nz_c_top[c][mb_x * 2 + x] = has_nz; + } + } + } + } + self.pctx.update_mb_row(); + } + } + pub fn reconstruct_frame(&mut self, frm: &mut NASimpleVideoFrame, is_intra: bool) { + let mut yidx = frm.offset[0]; + let mut uidx = frm.offset[1]; + let mut vidx = frm.offset[2]; + let ystride = frm.stride[0]; + let ustride = frm.stride[1]; + let vstride = frm.stride[2]; + + for (mb_y, (f_row, mb_row)) in self.features.chunks(self.mb_w).zip(self.recon.chunks(self.mb_w)).enumerate() { + for (mb_x, (&feature, sblk)) in f_row.iter().zip(mb_row.iter()).enumerate() { + let dst = &mut frm.data[yidx + mb_x * 16..]; + for (dst, src) in dst.chunks_mut(ystride).zip(sblk.luma.chunks(16)) { + dst[..16].copy_from_slice(src); + } + let dst = &mut frm.data[uidx + mb_x * 8..]; + for (dst, src) in dst.chunks_mut(ustride).zip(sblk.chroma[0].chunks(8)) { + dst[..8].copy_from_slice(src); + } + let dst = &mut frm.data[vidx + mb_x * 8..]; + for (dst, src) in dst.chunks_mut(vstride).zip(sblk.chroma[1].chunks(8)) { + dst[..8].copy_from_slice(src); + } + + let loop_str = if feature != 2 { + self.loop_params.loop_filter_level + } else { 0 }; //todo + loop_filter_mb(frm, mb_x, mb_y, loop_str, &self.loop_params, is_intra); + } + yidx += ystride * 16; + uidx += ustride * 8; + vidx += vstride * 8; + } + } +} + +fn loop_filter_mb(dframe: &mut NASimpleVideoFrame, mb_x: usize, mb_y: usize, loop_str: u8, loop_params: &LoopParams, is_intra: bool) { + const HIGH_EDGE_VAR_THR: [[u8; 64]; 2] = [ + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 + ], [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + ]]; + + let edge_thr = i16::from(loop_str) + 2; + let luma_thr = i16::from(loop_str); + let chroma_thr = i16::from(loop_str) * 2; + let inner_thr = if loop_params.loop_sharpness == 0 { + i16::from(loop_str) + } else { + let bound1 = i16::from(9 - loop_params.loop_sharpness); + let shift = (loop_params.loop_sharpness + 3) >> 2; + (i16::from(loop_str) >> shift).min(bound1) + }; + let hev_thr = i16::from(HIGH_EDGE_VAR_THR[if is_intra { 1 } else { 0 }][loop_str as usize]); + + let ystride = dframe.stride[0]; + let ustride = dframe.stride[1]; + let vstride = dframe.stride[2]; + let ypos = dframe.offset[0] + mb_x * 16 + mb_y * 16 * ystride; + let upos = dframe.offset[1] + mb_x * 8 + mb_y * 8 * ustride; + let vpos = dframe.offset[2] + mb_x * 8 + mb_y * 8 * vstride; + + let (loop_edge, loop_inner) = if loop_params.lf_simple { + (simple_loop_filter as LoopFilterFunc, simple_loop_filter as LoopFilterFunc) + } else { + (normal_loop_filter_edge as LoopFilterFunc, normal_loop_filter_inner as LoopFilterFunc) + }; + + if mb_x > 0 { + loop_edge(dframe.data, ypos, 1, ystride, 16, edge_thr, inner_thr, hev_thr); + loop_edge(dframe.data, upos, 1, ustride, 8, edge_thr, inner_thr, hev_thr); + loop_edge(dframe.data, vpos, 1, vstride, 8, edge_thr, inner_thr, hev_thr); + } + if mb_y > 0 { + loop_edge(dframe.data, ypos, ystride, 1, 16, edge_thr, inner_thr, hev_thr); + loop_edge(dframe.data, upos, ustride, 1, 8, edge_thr, inner_thr, hev_thr); + loop_edge(dframe.data, vpos, vstride, 1, 8, edge_thr, inner_thr, hev_thr); + } + + for y in 1..4 { + loop_inner(dframe.data, ypos + y * 4 * ystride, ystride, 1, 16, luma_thr, inner_thr, hev_thr); + } + loop_inner(dframe.data, upos + 4 * ustride, ustride, 1, 8, chroma_thr, inner_thr, hev_thr); + loop_inner(dframe.data, vpos + 4 * vstride, vstride, 1, 8, chroma_thr, inner_thr, hev_thr); + + for x in 1..4 { + loop_inner(dframe.data, ypos + x * 4, 1, ystride, 16, luma_thr, inner_thr, hev_thr); + } + loop_inner(dframe.data, upos + 4, 1, ustride, 8, chroma_thr, inner_thr, hev_thr); + loop_inner(dframe.data, vpos + 4, 1, vstride, 8, chroma_thr, inner_thr, hev_thr); +} diff --git a/nihav-duck/src/codecs/vp7enc/mb_coding.rs b/nihav-duck/src/codecs/vp7enc/mb_coding.rs new file mode 100644 index 0000000..cf73d2d --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/mb_coding.rs @@ -0,0 +1,522 @@ +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::super::vp78::{PredMode, MVSplitMode, SubMVRef}; +use super::super::vp78dsp::*; +use super::blocks::*; +use super::coder::*; +use super::models::*; +use super::motion_est::*; +use super::rdo::*; + +pub struct IntraModePredCtx<'a> { + pub metric: &'a RateDistMetric, + pub models: &'a VP7Models, + pub q: usize, + pub tr: [u8; 4], + pub ipred_y: IPredContext, + pub ipred_u: IPredContext, + pub ipred_v: IPredContext, + pub pctx: BlockPCtx, +} + +struct UniqueList { + list: [A; 4], + fill: usize, +} + +impl UniqueList { + fn new() -> Self { + Self { list: [A::default(); 4], fill: 0 } + } + fn add(&mut self, cand: A) { + if self.fill == self.list.len() { return; } + let mut unique = true; + for el in self.list.iter().take(self.fill) { + if *el == cand { + unique = false; + break; + } + } + if unique { + self.list[self.fill] = cand; + self.fill += 1; + } + } + fn get_list(&self) -> &[A] { &self.list[..self.fill] } +} + +pub fn try_i4x4_pred(mut src: LumaIterator, modes: &mut [PredMode; 16], res: &mut Residue, new: &mut [u8; 256], pctx: &IntraModePredCtx, ref_best_dist: u32) -> u32 { + const PRED4X4: [PredMode; 10] = [ + PredMode::DCPred, PredMode::HPred, PredMode::VPred, PredMode::TMPred, + PredMode::LDPred, PredMode::RDPred, PredMode::VRPred, PredMode::VLPred, + PredMode::HUPred, PredMode::HDPred + ]; + + let mut ipred4 = IPredContext::default(); + let mut top = [0x80; 21]; + let mut diff = [0i16; 16]; + let mut yblk = [0u8; 16]; + top[0] = pctx.ipred_y.tl; + top[1..][..16].copy_from_slice(&pctx.ipred_y.top); + top[17..].copy_from_slice(&pctx.tr); + + let mut tot_dist = 0; + let mut nz_top = pctx.pctx.nz_y_top; + let mut nz_left = pctx.pctx.nz_y_left; + for y in 0..4 { + let (l1, l2) = ipred4.left.split_at_mut(16 - y * 4); + l1.copy_from_slice(&pctx.ipred_y.left[y * 4..]); + for el in l2.iter_mut() { *el = 0x80; } + + ipred4.tl = if y == 0 { top[0] } else { pctx.ipred_y.left[y * 4 - 1] }; + for x in 0..4 { + let tsrc = &top[x * 4 + 1..]; + let (t1, t2) = ipred4.top.split_at_mut(tsrc.len().min(16)); + for (dst, &src) in t1.iter_mut().zip(tsrc.iter()) { *dst = src; } + for el in t2.iter_mut() { *el = 0x80; } + + let mut best_mode = PredMode::DCPred; + let mut best_dist = MAX_DIST; + let mut best_has_nz = false; + + let srcblk = src.next().unwrap(); + for &mode in PRED4X4.iter() { + yblk.ipred4(4, mode, &ipred4); + let mode_nits = b_mode_nits(mode); + let blkctx = (nz_top[x] as u8) + (nz_left[y] as u8); + let (dist1, has_nz) = pctx.metric.block_dist(&srcblk, &yblk, pctx.q, 3, blkctx, &pctx.models.coef_probs[3]); + let dist = dist1 + pctx.metric.calc_metric(0, mode_nits); + if dist < best_dist { + best_mode = mode; + best_dist = dist; + best_has_nz = has_nz; + if dist <= SMALL_DIST { + break; + } + } + } + nz_top[x] = best_has_nz; + nz_left[y] = best_has_nz; + modes[x + y * 4] = best_mode; + tot_dist += best_dist; + if tot_dist >= ref_best_dist { + return MAX_DIST; + } + + yblk.ipred4(4, modes[x + y * 4], &ipred4); + get_block_difference(&mut diff, &srcblk, &yblk); + res.luma[x + y * 4] = diff; + diff.fdct(); + diff.requant_y(pctx.q); + diff.idct(); + + let nblk = &mut new[x * 4 + y * 4 * 16..]; + for (dst, (src, res)) in nblk.chunks_mut(16).zip(yblk.chunks(4).zip(diff.chunks(4))) { + for (del, (&sel, &rel)) in dst.iter_mut().zip(src.iter().zip(res.iter())) { + *del = (i16::from(sel) + rel).max(0).min(255) as u8; + } + } + + ipred4.tl = top[x * 4 + 4]; + top[x * 4 + 1..][..4].copy_from_slice(&nblk[16 * 3..][..4]); + for (dst, src) in ipred4.left[..4].iter_mut().zip(nblk.chunks(16)) { + *dst = src[3]; + } + } + } + tot_dist +} + +fn try_intra16_pred(sblk: &SrcBlock, newblk: &mut SrcBlock, res: &mut Residue, imctx: &IntraModePredCtx, ymode: PredMode) -> u32 { + newblk.fill_ipred_luma(ymode, &imctx.ipred_y); + + for (dst, (src1, src2)) in res.luma.iter_mut().zip(sblk.luma_blocks().zip(newblk.luma_blocks())) { + get_block_difference(dst, &src1, &src2); + } + + let mut nits = 0; + + res.fdct_luma(); + res.fdct_dc_block(); + res.quant_luma(imctx.q); + nits += estimate_subblock_nits(&res.dcs, 1, imctx.pctx.nz_y2, &imctx.models.coef_probs[1]); + let mut nz_top = imctx.pctx.nz_y_top; + let mut nz_left = imctx.pctx.nz_y_left; + for (y, row) in res.luma.chunks(4).enumerate() { + for (x, blk) in row.iter().enumerate() { + let has_nz = blk.has_nz(); + let pctx = (nz_top[x] as u8) + (nz_left[y] as u8); + nits += estimate_subblock_nits(blk, 0, pctx, &imctx.models.coef_probs[0]); + nz_top[x] = has_nz; + nz_left[y] = has_nz; + } + } + res.dequant_luma(); + res.idct_luma(); + + let mut dist = 0; + for (diff, (src1, src2)) in res.luma.iter().zip(sblk.luma_blocks().zip(newblk.luma_blocks())) { + dist += get_difference_dist(&src1, &src2, diff); + } + + imctx.metric.calc_metric(dist, nits) +} + +pub fn select_intra_mode(sblk: &SrcBlock, newblk: &mut SrcBlock, res: &mut Residue, imctx: &IntraModePredCtx, ref_best_dist: u32, mb_type: MBType) -> MBType { + const PRED16X16: [PredMode; 4] = [PredMode::DCPred, PredMode::HPred, PredMode::VPred, PredMode::TMPred]; + + let mut best_ymode = PredMode::DCPred; + let mut y_best_dist = MAX_DIST; + let mut use_i4 = false; + let mut i4_modes = [PredMode::DCPred; 16]; + if !sblk.is_flat() { + for &ymode in PRED16X16.iter() { + let dist = try_intra16_pred(sblk, newblk, res, imctx, ymode); + + if dist < y_best_dist { + best_ymode = ymode; + y_best_dist = dist; + if dist <= SMALL_DIST { + break; + } + } + } + + if y_best_dist >= ref_best_dist { + return mb_type; + } + + if y_best_dist > SMALL_DIST { + res.reset(); + let dist4 = try_i4x4_pred(sblk.luma_blocks(), &mut i4_modes, res, &mut newblk.luma, &imctx, y_best_dist); + use_i4 = dist4 < y_best_dist; + y_best_dist = y_best_dist.min(dist4); + } + } else if ref_best_dist != MAX_DIST { // we can skip that for intra-only case + y_best_dist = try_intra16_pred(sblk, newblk, res, imctx, PredMode::DCPred); + if y_best_dist >= ref_best_dist { + return mb_type; + } + } + + let mut best_cmode = PredMode::DCPred; + let mut c_best_dist = MAX_DIST; + for &cmode in PRED16X16.iter() { + newblk.fill_ipred_chroma(cmode, &imctx.ipred_u, &imctx.ipred_v); + let mut dist = 0; + 'csearch: for chroma in 0..2 { + let mut nz_top = imctx.pctx.nz_c_top[chroma]; + let mut nz_left = imctx.pctx.nz_c_left[chroma]; + for (idx, (sblk, nblk)) in sblk.chroma_blocks(chroma).zip(newblk.chroma_blocks(chroma)).enumerate() { + let pctx = (nz_top[idx & 1] as u8) + (nz_left[idx >> 1] as u8); + let (dist1, has_nz) = imctx.metric.block_dist(&sblk, &nblk, imctx.q, 2, pctx, &imctx.models.coef_probs[2]); + dist += dist1; + nz_top[idx & 1] = has_nz; + nz_left[idx >> 1] = has_nz; + if dist >= c_best_dist { + break 'csearch; + } + } + } + if dist < c_best_dist { + best_cmode = cmode; + c_best_dist = dist; + } + } + let tot_dist = y_best_dist.saturating_add(c_best_dist); + if (ref_best_dist == MAX_DIST) || (tot_dist < ref_best_dist) { + if !use_i4 { + MBType::Intra(best_ymode, best_cmode) + } else { + MBType::Intra4x4(i4_modes, [0; 16], best_cmode) + } + } else { + mb_type + } +} + +pub fn calc_inter_mb_dist(sblk: &SrcBlock, newblk: &SrcBlock, res: &mut Residue, imctx: &IntraModePredCtx, pdc: i16) -> u32 { + res.set_luma_from_diff(&sblk.luma, &newblk.luma); + res.set_chroma_from_diff(&sblk.chroma, &newblk.chroma); + res.fdct(); + res.fdct_dc_block(); + requant_y2_dc(&mut res.dcs[0], imctx.q); + res.dcs[0] -= pdc; + res.quant(imctx.q); + let mut nits = estimate_subblock_nits(&res.dcs, 1, imctx.pctx.nz_y2, &imctx.models.coef_probs[1]); + let mut nz_top = imctx.pctx.nz_y_top; + let mut nz_left = imctx.pctx.nz_y_left; + for (y, row) in res.luma.chunks(4).enumerate() { + for (x, blk) in row.iter().enumerate() { + let has_nz = blk.has_nz(); + let pctx = (nz_top[x] as u8) + (nz_left[y] as u8); + nits += estimate_subblock_nits(blk, 0, pctx, &imctx.models.coef_probs[0]); + nz_top[x] = has_nz; + nz_left[y] = has_nz; + } + } + for (c_idx, chroma) in res.chroma.iter().enumerate() { + let mut nz_top = imctx.pctx.nz_c_top[c_idx]; + let mut nz_left = imctx.pctx.nz_c_left[c_idx]; + for (idx, blk) in chroma.iter().enumerate() { + let pctx = (nz_top[idx & 1] as u8) + (nz_left[idx >> 1] as u8); + let has_nz = blk.has_nz(); + nits += estimate_subblock_nits(blk, 2, pctx, &imctx.models.coef_probs[2]); + nz_top[idx & 1] = has_nz; + nz_left[idx >> 1] = has_nz; + } + } + res.dequant(); + res.idct(); + let mut dist = 0; + for (diff, (src, new)) in res.luma.iter().zip(sblk.luma_blocks().zip(newblk.luma_blocks())) { + dist += get_difference_dist(&src, &new, diff); + } + for chroma in 0..2 { + for (diff, (src, new)) in res.chroma[chroma].iter().zip(sblk.chroma_blocks(chroma).zip(newblk.chroma_blocks(chroma))) { + dist += get_difference_dist(&src, &new, diff); + } + } + + res.reset(); + imctx.metric.calc_metric(dist, nits) +} + +#[allow(clippy::too_many_arguments)] +pub fn try_inter_split(sblk: &SrcBlock, newblk: &mut SrcBlock, res: &mut Residue, mvprobs: [u8; 4], nearest_mv: MV, near_mv: MV, pred_mv: MV, last: bool, mb_x: usize, mb_y: usize, mv_search: &mut Box, mv_est: &mut MVEstimator, pctx: &mut PredContext, imctx: &IntraModePredCtx, inter_dist: u32) -> Option<(MBType, u32)> { + let mv_stride = pctx.mv_stride; + let mut blk8 = [0; 64]; + let mut mvs8 = [ZERO_MV; 4]; + let mut split_cand = [false; 4]; + let mut mv_dist = [0; 4]; + + let mb_mv = pctx.mvs[mb_x * 4 + mb_y * 4 * mv_stride]; + for (quarter, dst_mv) in mvs8.iter_mut().enumerate() { + let xoff = mb_x * 16 + (quarter & 1) * 8; + let yoff = mb_y * 16 + (quarter & 2) * 4; + + let off = (quarter & 1) * 8 + (quarter >> 1) * 8 * 16; + for (src, dst) in sblk.luma[off..].chunks(16).zip(blk8.chunks_mut(8)) { + dst.copy_from_slice(&src[..8]); + } + + let mut mvs = UniqueList::new(); + mvs.add(ZERO_MV); + mvs.add(mb_mv); + let mv_idx = xoff / 4 + (yoff / 4) * mv_stride; + if xoff > 0 { + mvs.add(pctx.mvs[mv_idx - 1]); + } + if mv_idx >= mv_stride { + mvs.add(pctx.mvs[mv_idx - mv_stride]); + } + mvs.add(near_mv); + mvs.add(nearest_mv); + let (mv, dist) = mv_search.search_blk8(mv_est, &blk8, xoff, yoff, mvs.get_list()); + *dst_mv = mv; + split_cand[quarter] = dist > LARGE_BLK8_DIST; + mv_dist[quarter] = dist; + } + if mvs8[0] == mvs8[1] && mvs8[0] == mvs8[2] && mvs8[0] == mvs8[3] { + // single MV per MB + return None; + } + let mv_idx = mb_x * 4 + mb_y * 4 * mv_stride; + for (dst, src) in pctx.mvs[mv_idx..].chunks_mut(2 * mv_stride).zip(mvs8.chunks(2)) { + dst[0] = src[0]; + dst[1] = src[0]; + dst[2] = src[1]; + dst[3] = src[1]; + dst[mv_stride ] = src[0]; + dst[mv_stride + 1] = src[0]; + dst[mv_stride + 2] = src[1]; + dst[mv_stride + 3] = src[1]; + } + recon_split_mb(newblk, mb_x, mb_y, &pctx.mvs, mv_stride, mv_est); + + let mut tot_dist = calc_inter_mb_dist(sblk, newblk, res, imctx, pctx.get_y2_dc_pred(last)); + + let mut split_mode = MVSplitMode::Quarters; + let mut sub_refs = [SubMVRef::Zero; 16]; + let mut sub_mvs = [ZERO_MV; 16]; + + let mut mv_nits = 0; + if mvs8[0] == mvs8[1] && mvs8[2] == mvs8[3] { + split_mode = MVSplitMode::TopBottom; + sub_mvs[0] = mvs8[0] - pred_mv; + sub_mvs[1] = mvs8[2] - pred_mv; + + let mv_idx = mb_x * 4 + mb_y * 4 * mv_stride; + let left_mv = if mb_x > 0 { pctx.mvs[mv_idx - 1] } else { ZERO_MV }; + let top_mv = if mb_y > 0 { pctx.mvs[mv_idx - mv_stride] } else { ZERO_MV }; + let (ref0, nits0) = sub_mv_nits(mvs8[0], left_mv, top_mv, pred_mv, imctx.models); + let left_mv = if mb_x > 0 { pctx.mvs[mv_idx + 2 * mv_stride - 1] } else { ZERO_MV }; + let (ref1, nits1) = sub_mv_nits(mvs8[2], left_mv, mvs8[0], pred_mv, imctx.models); + sub_refs[0] = ref0; + sub_refs[1] = ref1; + mv_nits += nits0 + nits1; + } else if mvs8[0] == mvs8[2] && mvs8[1] == mvs8[3] { + split_mode = MVSplitMode::LeftRight; + sub_mvs[0] = mvs8[0] - pred_mv; + sub_mvs[1] = mvs8[1] - pred_mv; + + let mv_idx = mb_x * 4 + mb_y * 4 * mv_stride; + let left_mv = if mb_x > 0 { pctx.mvs[mv_idx - 1] } else { ZERO_MV }; + let top_mv = if mb_y > 0 { pctx.mvs[mv_idx - mv_stride] } else { ZERO_MV }; + let (ref0, nits0) = sub_mv_nits(mvs8[0], left_mv, top_mv, pred_mv, imctx.models); + let top_mv = if mb_y > 0 { pctx.mvs[mv_idx - mv_stride + 2] } else { ZERO_MV }; + let (ref1, nits1) = sub_mv_nits(mvs8[1], mvs8[0], top_mv, pred_mv, imctx.models); + sub_refs[0] = ref0; + sub_refs[1] = ref1; + mv_nits += nits0 + nits1; + } else { + for (quarter, &mv) in mvs8.iter().enumerate() { + let xoff = mb_x * 16 + (quarter & 1) * 8; + let yoff = mb_y * 16 + (quarter & 2) * 4; + let mv_idx = xoff / 4 + (yoff / 4) * mv_stride; + let left_mv = if xoff > 0 { pctx.mvs[mv_idx - 1] } else { ZERO_MV }; + let top_mv = if yoff > 0 { pctx.mvs[mv_idx - mv_stride] } else { ZERO_MV }; + let (cur_sub_ref, nits) = sub_mv_nits(mv, left_mv, top_mv, pred_mv, imctx.models); + sub_refs[quarter] = cur_sub_ref; + sub_mvs[quarter] = mv - pred_mv; + mv_nits += nits; + + pctx.mvs[mv_idx] = mv; + pctx.mvs[mv_idx + 1] = mv; + pctx.mvs[mv_idx + mv_stride] = mv; + pctx.mvs[mv_idx + mv_stride + 1] = mv; + } + } + mv_nits += sub_mv_mode_nits(split_mode); + tot_dist += imctx.metric.calc_metric(0, mv_nits); + if tot_dist < inter_dist { + if tot_dist > SMALL_DIST && (split_cand[0] || split_cand[1] || split_cand[2] || split_cand[3]) { + let mut blk4 = [0; 16]; + let mut has_splits = false; + for (quarter, &mv_dist) in mv_dist.iter().enumerate() { + if !split_cand[quarter] { + continue; + } + let xoff = mb_x * 16 + (quarter & 1) * 8; + let yoff = mb_y * 16 + (quarter & 2) * 4; + let mut dist_sum = 0; + let mut smv = [ZERO_MV; 4]; + for (subq, smv) in smv.iter_mut().enumerate() { + let off = (quarter & 1) * 8 + (subq & 1) * 4 + ((quarter >> 1) * 8 + (subq >> 1) * 4) * 16; + for (dst, src) in blk4.chunks_mut(4).zip(sblk.luma[off..].chunks(16)) { + dst.copy_from_slice(&src[..4]); + } + + let mut mvs = UniqueList::new(); + mvs.add(ZERO_MV); + mvs.add(mvs8[quarter]); + mvs.add(mb_mv); + let mv_idx = xoff / 4 + (subq & 1) + ((yoff / 4) + (subq >> 1)) * mv_stride; + if xoff > 0 || (subq & 1) != 0 { + mvs.add(pctx.mvs[mv_idx - 1]); + } + if mv_idx >= mv_stride { + mvs.add(pctx.mvs[mv_idx - mv_stride]); + } + let (mv, dist) = mv_search.search_blk4(mv_est, &blk4, xoff, yoff, mvs.get_list()); + *smv = mv; + dist_sum += dist; + } + if dist_sum < mv_dist / 2 { + for (subq, &smv) in smv.iter().enumerate() { + let mv_idx = xoff / 4 + (subq & 1) + ((yoff / 4) + (subq >> 1)) * mv_stride; + pctx.mvs[mv_idx] = smv; + } + has_splits = true; + } + } + if has_splits { + recon_split_mb(newblk, mb_x, mb_y, &pctx.mvs, mv_stride, mv_est); + let mut split16_dist = calc_inter_mb_dist(sblk, newblk, res, imctx, pctx.get_y2_dc_pred(last)); + if split16_dist < tot_dist { + let mut mv_nits = sub_mv_mode_nits(MVSplitMode::Sixteenths); + let mut mv_idx = mb_x * 4 + mb_y * 4 * mv_stride; + let mut sub_refs2 = [SubMVRef::Zero; 16]; + let mut sub_mvs2 = [ZERO_MV; 16]; + for y in 0..4 { + for x in 0..4 { + let left_mv = if x > 0 || mb_x > 0 { pctx.mvs[mv_idx + x - 1] } else { ZERO_MV }; + let top_mv = if mv_idx + x >= mv_stride { pctx.mvs[mv_idx + x - mv_stride] } else { ZERO_MV }; + let cur_mv = pctx.mvs[mv_idx + x]; + sub_mvs2[x + y * 4] = cur_mv - pred_mv; + let (cur_sub_ref, nits) = sub_mv_nits(cur_mv, left_mv, top_mv, pred_mv, imctx.models); + sub_refs2[x + y * 4] = cur_sub_ref; + mv_nits += nits; + } + mv_idx += mv_stride; + } + split16_dist += imctx.metric.calc_metric(0, mv_nits); + if split16_dist < tot_dist { + let mb_t = MBType::InterSplitMV(last, mvprobs, MVSplitMode::Sixteenths, sub_refs2, sub_mvs2); + return Some((mb_t, split16_dist)); + } + } + } + } + let mv_idx = mb_x * 4 + mb_y * 4 * mv_stride; + for (dst, src) in pctx.mvs[mv_idx..].chunks_mut(2 * mv_stride).zip(mvs8.chunks(2)) { + dst[0] = src[0]; + dst[1] = src[0]; + dst[2] = src[1]; + dst[3] = src[1]; + dst[mv_stride ] = src[0]; + dst[mv_stride + 1] = src[0]; + dst[mv_stride + 2] = src[1]; + dst[mv_stride + 3] = src[1]; + } + Some((MBType::InterSplitMV(last, mvprobs, split_mode, sub_refs, sub_mvs), tot_dist)) + } else { + None + } +} + +fn get_chroma_mv(mut mv: MV) -> MV { + if mv.x < 0 { + mv.x += 1; + } else { + mv.x += 2; + } + if mv.y < 0 { + mv.y += 1; + } else { + mv.y += 2; + } + mv.x >>= 2; + mv.y >>= 2; + mv +} + +pub fn recon_split_mb(newblk: &mut SrcBlock, mb_x: usize, mb_y: usize, mvs: &[MV], mv_stride: usize, mv_est: &mut MVEstimator) { + let mut mv_idx = mb_x * 4 + mb_y * 4 * mv_stride; + let mut sum_mv = [ZERO_MV; 2]; + let mut blk4 = [0; 16]; + for (y, strip) in newblk.luma.chunks_mut(16 * 4).enumerate() { + if (y & 1) == 0 { + sum_mv = [ZERO_MV; 2]; + } + for x in 0..4 { + let mv = mvs[mv_idx + x]; + sum_mv[x / 2] += mv; + mv_est.get_blk4(&mut blk4, 0, mb_x * 16 + x * 4, mb_y * 16 + y * 4, mv); + for (dst, src) in strip[x * 4..].chunks_mut(16).zip(blk4.chunks(4)) { + dst[..4].copy_from_slice(src); + } + } + if (y & 1) == 1 { + let cmv = [get_chroma_mv(sum_mv[0]), get_chroma_mv(sum_mv[1])]; + for chroma in 0..2 { + for (x, &mv) in cmv.iter().enumerate() { + mv_est.get_blk4(&mut blk4, chroma + 1, mb_x * 8 + x * 4, mb_y * 8 + (y & 2) * 2, mv); + for (dst, src) in newblk.chroma[chroma][x * 4 + (y & 2) * 2 * 8..].chunks_mut(8).zip(blk4.chunks(4)) { + dst[..4].copy_from_slice(src); + } + } + } + } + mv_idx += mv_stride; + } +} diff --git a/nihav-duck/src/codecs/vp7enc/mod.rs b/nihav-duck/src/codecs/vp7enc/mod.rs new file mode 100644 index 0000000..67d4bed --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/mod.rs @@ -0,0 +1,610 @@ +use nihav_core::codecs::*; +use nihav_core::io::byteio::*; + +mod blocks; +mod coder; +use coder::*; +mod frame_coder; +use frame_coder::*; +mod mb_coding; +mod models; +use models::*; +mod motion_est; +use motion_est::MVSearchMode; +mod rdo; +use rdo::*; + +#[derive(PartialEq,Debug)] +enum EncodingState { + Intra, + Refinement, + JustEncode, +} + +#[allow(dead_code)] +struct VP7Encoder { + stream: Option, + pkt: Option, + version: u8, + key_int: u8, + frmcount: u8, + width: usize, + height: usize, + mb_w: usize, + mb_h: usize, + + metric: RateDistMetric, + fenc: FrameEncoder, + pmodels: VP7Models, + br_ctl: BitRateControl, + + last_frame: NABufferType, + gold_frame: NABufferType, + last_gold: bool, + me_mode: MVSearchMode, + me_range: i16, + lf_level: Option, + + mbt_depth: usize, + mb_weight: Vec, + mb_map: Vec, + mb_map2: Vec, + qframes: Vec, + frm_pool: NAVideoBufferPool, + i_frame: NAVideoBufferRef, + enc_state: EncodingState, +} + +impl VP7Encoder { + fn new() -> Self { + let vt = alloc_video_buffer(NAVideoInfo::new(24, 24, false, YUV420_FORMAT), 4).unwrap(); + let mc_buf1 = vt.get_vbuf().unwrap(); + let vt = alloc_video_buffer(NAVideoInfo::new(24, 24, false, YUV420_FORMAT), 4).unwrap(); + let mc_buf2 = vt.get_vbuf().unwrap(); + let vt = alloc_video_buffer(NAVideoInfo::new(24, 24, false, YUV420_FORMAT), 4).unwrap(); + let i_frame = vt.get_vbuf().unwrap(); + Self { + stream: None, + pkt: None, + version: 0, + key_int: 10, + frmcount: 0, + width: 0, + height: 0, + mb_w: 0, + mb_h: 0, + + metric: RateDistMetric::new(), + fenc: FrameEncoder::new(mc_buf1, mc_buf2), + pmodels: VP7Models::new(), + br_ctl: BitRateControl::new(), + + last_frame: NABufferType::None, + gold_frame: NABufferType::None, + last_gold: false, + me_mode: MVSearchMode::default(), + me_range: 16, + lf_level: None, + + mbt_depth: 0, + mb_weight: Vec::new(), + mb_map: Vec::new(), + mb_map2: Vec::new(), + qframes: Vec::new(), + frm_pool: NAVideoBufferPool::new(0), + i_frame, + enc_state: EncodingState::JustEncode, + } + } + fn encode_frame(&mut self, frm: &NAFrame) -> EncoderResult<()> { + let buf = frm.get_buffer(); + if let Some(ref vbuf) = buf.get_vbuf() { + self.fenc.set_me_params(self.me_mode, self.me_range, self.version); + self.fenc.load_frame(vbuf); + + let mut dbuf = Vec::with_capacity(4); + let mut gw = GrowableMemoryWriter::new_write(&mut dbuf); + let mut bw = ByteWriter::new(&mut gw); + + let is_intra = self.frmcount == 0; + let golden_frame = is_intra; + + self.br_ctl.set_key_interval(self.key_int); + let cur_quant = self.br_ctl.get_frame_quant(is_intra); + + if let Some(level) = self.lf_level { + self.fenc.loop_params.loop_filter_level = level; + } else { + self.fenc.loop_params.loop_filter_level = if cur_quant <= 16 { 0 } else { (cur_quant / 4) as u8 }; + } + + if is_intra { + self.pmodels.reset(); + let mbt_frames = self.mbt_depth.min(self.key_int as usize); + self.fenc.intra_blocks(cur_quant, &self.metric, &self.pmodels, if mbt_frames > 0 { Some(&self.mb_weight) } else { None }); + } else { + let gold_ref = if !self.last_gold { &self.gold_frame } else { &NABufferType::None }; + self.fenc.inter_blocks(cur_quant, &self.metric, &self.pmodels, &self.last_frame, gold_ref); + } + + let mut stats = VP7ModelsStat::new(); + let mut models = self.pmodels; + self.fenc.generate_models(is_intra, &mut stats); + stats.generate(&mut models, is_intra); + + bw.write_u24le(0)?; // frame tag + if self.version == 0 { + bw.write_byte(0)?; // unused + } + + let start = bw.tell(); + + let mut bc = BoolEncoder::new(&mut bw); + if is_intra { + bc.put_bits(self.width as u32, 12)?; + bc.put_bits(self.height as u32, 12)?; + bc.put_bits(0, 2)?; // scale vertical + bc.put_bits(0, 2)?; // scale horizontal + } + + self.fenc.encode_features(&mut bc, cur_quant, &models)?; + + bc.put_bits(cur_quant as u32, 7)?; // y_ac_q + bc.put_bool(false, 128)?; // y_dc_q + bc.put_bool(false, 128)?; // y2_ac_q + bc.put_bool(false, 128)?; // y2_dc_q + bc.put_bool(false, 128)?; // uv_ac_q + bc.put_bool(false, 128)?; // uv_dc_q + + if !is_intra { + bc.put_bool(false, 128)?; // update golden frame + } + + let has_fading = self.version == 0 || is_intra; + if self.version != 0 { + bc.put_bool(true, 128)?; // keep probabilities + if !is_intra { + bc.put_bool(false, 128)?; // has fading feature + } + } + if has_fading { + bc.put_bool(false, 128)?; // fading + } + + if self.version == 0 { + bc.put_bool(self.fenc.loop_params.lf_simple, 128)?; + } + + // scan + bc.put_bool(false, 128)?; + + if self.version != 0 { + bc.put_bool(self.fenc.loop_params.lf_simple, 128)?; + } + + bc.put_bits(u32::from(self.fenc.loop_params.loop_filter_level), 6)?; + bc.put_bits(u32::from(self.fenc.loop_params.loop_sharpness), 3)?; + + encode_dct_coef_prob_upd(&mut bc, &models.coef_probs, &self.pmodels.coef_probs)?; + + if !is_intra { + bc.put_byte(models.prob_intra_pred)?; + bc.put_byte(models.prob_last_pred)?; + + let ymode_differs = models.kf_ymode_prob != self.pmodels.kf_ymode_prob; + bc.put_bool(ymode_differs, 128)?; + if ymode_differs { + for &el in models.kf_ymode_prob.iter() { + bc.put_byte(el)?; + } + } + + let uvmode_differs = models.kf_uvmode_prob != self.pmodels.kf_uvmode_prob; + bc.put_bool(uvmode_differs, 128)?; + if uvmode_differs { + for &el in models.kf_uvmode_prob.iter() { + bc.put_byte(el)?; + } + } + + encode_mv_prob_upd(&mut bc, &models.mv_probs, &self.pmodels.mv_probs)?; + } + + self.fenc.encode_mb_types(&mut bc, is_intra, &models)?; + + bc.flush()?; + let end = bw.tell(); + + let mut bc = BoolEncoder::new(&mut bw); + self.fenc.encode_residues(&mut bc, &models)?; + bc.flush()?; + + bw.seek(SeekFrom::Start(0))?; + bw.write_u24le((((end - start) as u32) << 4) | + (u32::from(self.version) << 1) | + if is_intra { 0 } else { 1 })?; + + let cur_size = dbuf.len(); + + self.pkt = Some(NAPacket::new(self.stream.clone().unwrap(), frm.ts, is_intra, dbuf)); + + self.pmodels = models; + + if self.key_int > 0 { + self.frmcount += 1; + } + if self.frmcount == self.key_int { + self.frmcount = 0; + } + + if let Some(ref mut vbuf) = self.last_frame.get_vbuf() { + let mut frm = NASimpleVideoFrame::from_video_buf(vbuf).unwrap(); + self.fenc.reconstruct_frame(&mut frm, is_intra); + } + self.last_gold = golden_frame; + if golden_frame { + let mut dfrm = self.gold_frame.get_vbuf().unwrap(); + let src = self.last_frame.get_vbuf().unwrap(); + + let dst = dfrm.get_data_mut().unwrap(); + dst.copy_from_slice(src.get_data()); + } + + self.br_ctl.update(cur_size); + if self.br_ctl.has_bitrate() { + let tgt_size = (self.br_ctl.get_target_size(is_intra) / 8) as usize; + self.metric.adjust_br(cur_size, tgt_size); + } + + Ok(()) + } else { + Err(EncoderError::InvalidParameters) + } + } +} + +impl NAEncoder for VP7Encoder { + fn negotiate_format(&self, encinfo: &EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => { + let mut ofmt = EncodeParameters::default(); + ofmt.format = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, YUV420_FORMAT)); + Ok(ofmt) + }, + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + let outinfo = NAVideoInfo::new((vinfo.width + 15) & !15, (vinfo.height + 15) & !15, false, YUV420_FORMAT); + let mut ofmt = *encinfo; + ofmt.format = NACodecTypeInfo::Video(outinfo); + Ok(ofmt) + } + } + } + fn init(&mut self, stream_id: u32, encinfo: EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => Err(EncoderError::FormatError), + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + if vinfo.format != YUV420_FORMAT { + return Err(EncoderError::FormatError); + } + if ((vinfo.width | vinfo.height) & 15) != 0 { + return Err(EncoderError::FormatError); + } + if (vinfo.width | vinfo.height) >= (1 << 12) { + return Err(EncoderError::FormatError); + } + + let out_info = NAVideoInfo::new(vinfo.width, vinfo.height, false, vinfo.format); + let info = NACodecInfo::new("vp7", NACodecTypeInfo::Video(out_info), None); + let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den, 0); + stream.set_num(stream_id as usize); + let stream = stream.into_ref(); + + self.last_frame = alloc_video_buffer(out_info, 4)?; + self.gold_frame = alloc_video_buffer(out_info, 4)?; + + self.stream = Some(stream.clone()); + + self.width = vinfo.width; + self.height = vinfo.height; + self.mb_w = (vinfo.width + 15) >> 4; + self.mb_h = (vinfo.height + 15) >> 4; + self.fenc.resize(self.mb_w, self.mb_h); + + self.br_ctl.set_params(encinfo.tb_num, encinfo.tb_den, encinfo.bitrate, self.key_int, self.mb_w * self.mb_h); + + self.frm_pool.reset(); + self.frm_pool.set_dec_bufs(self.mbt_depth + 1); + self.frm_pool.prealloc_video(out_info, 4)?; + self.i_frame = self.frm_pool.get_free().unwrap(); + self.mb_weight.resize(self.mb_w * self.mb_h, 0); + self.mb_map.resize(self.mb_w * self.mb_h, 0); + self.mb_map2.resize(self.mb_w * self.mb_h, 0); + self.qframes.clear(); + self.enc_state = if self.mbt_depth.min(self.key_int as usize) > 0 { + EncodingState::Intra + } else { + EncodingState::JustEncode + }; + + Ok(stream) + }, + } + } + fn encode(&mut self, frm: &NAFrame) -> EncoderResult<()> { + if let Some(ref vbuf) = frm.get_buffer().get_vbuf() { + let mbt_frames = self.mbt_depth.min(self.key_int as usize); + if !self.qframes.is_empty() || (mbt_frames > 0 && self.enc_state != EncodingState::JustEncode) { + if let Some(dbuf) = self.frm_pool.get_copy(vbuf) { + let newfrm = NAFrame::new(frm.ts, frm.frame_type, frm.key, frm.get_info(), NABufferType::Video(dbuf)); + if self.enc_state == EncodingState::Intra { + for (i, el) in self.mb_map.iter_mut().enumerate() { + *el = i; + } + for el in self.mb_weight.iter_mut() { + *el = 1; + } + let frm = NASimpleVideoFrame::from_video_buf(&mut self.i_frame).unwrap(); + let src = vbuf.get_data(); + for plane in 0..3 { + let soff = vbuf.get_offset(plane); + let sstride = vbuf.get_stride(plane); + let copy_len = sstride.min(frm.stride[plane]); + for (dst, src) in frm.data[frm.offset[plane]..].chunks_mut(frm.stride[plane]).zip(src[soff..].chunks(sstride)).take(frm.height[plane]) { + dst[..copy_len].copy_from_slice(&src[..copy_len]); + } + } + self.enc_state = EncodingState::Refinement; + } else { + self.fenc.set_me_params(self.me_mode, self.me_range, self.version); + self.fenc.load_frame(vbuf); + self.fenc.mb_tree_search(self.i_frame.clone(), &self.mb_map, &mut self.mb_map2, &mut self.mb_weight); + std::mem::swap(&mut self.mb_map, &mut self.mb_map2); + } + self.qframes.push(newfrm); + Ok(()) + } else { + self.enc_state = EncodingState::JustEncode; + self.encode_frame(frm) + } + } else { + self.encode_frame(frm) + } + } else { + Err(EncoderError::FormatError) + } + } + fn get_packet(&mut self) -> EncoderResult> { + let mbt_frames = self.mbt_depth.min(self.key_int as usize); + if self.qframes.len() >= mbt_frames { + self.enc_state = EncodingState::JustEncode; + } + if self.pkt.is_none() && !self.qframes.is_empty() && self.enc_state == EncodingState::JustEncode { + let frm = self.qframes.remove(0); + self.encode_frame(&frm)?; + if self.qframes.is_empty() && self.mbt_depth > 0 && self.frmcount == 0 { + self.enc_state = EncodingState::Intra; + } + } + let mut npkt = None; + std::mem::swap(&mut self.pkt, &mut npkt); + Ok(npkt) + } + fn flush(&mut self) -> EncoderResult<()> { + self.frmcount = 0; + self.enc_state = EncodingState::JustEncode; + Ok(()) + } +} + +const VERSION_OPTION: &str = "version"; +const LF_LEVEL_OPTION: &str = "lf_level"; +const LF_SHARP_OPTION: &str = "lf_sharpness"; +const LF_SIMPLE_OPTION: &str = "lf_simple"; +const QUANT_OPTION: &str = "quant"; +const MV_SEARCH_OPTION: &str = "mv_mode"; +const MV_RANGE_OPTION: &str = "mv_range"; +const MBTREE_DEPTH: &str = "mbtree_depth"; + +const ENCODER_OPTS: &[NAOptionDefinition] = &[ + NAOptionDefinition { + name: KEYFRAME_OPTION, description: KEYFRAME_OPTION_DESC, + opt_type: NAOptionDefinitionType::Int(Some(0), Some(128)) }, + NAOptionDefinition { + name: VERSION_OPTION, description: "internal codec version", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(1)) }, + NAOptionDefinition { + name: LF_LEVEL_OPTION, description: "loop filter level (-1 = automatic)", + opt_type: NAOptionDefinitionType::Int(Some(-1), Some(63)) }, + NAOptionDefinition { + name: LF_SHARP_OPTION, description: "loop filter sharpness", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(7)) }, + NAOptionDefinition { + name: LF_SIMPLE_OPTION, description: "use simple loop filter", + opt_type: NAOptionDefinitionType::Bool }, + NAOptionDefinition { + name: QUANT_OPTION, description: "force fixed quantiser for encoding", + opt_type: NAOptionDefinitionType::Int(Some(-1), Some(127)) }, + NAOptionDefinition { + name: MV_SEARCH_OPTION, description: "motion search mode", + opt_type: NAOptionDefinitionType::String(Some(&["sea", "dia", "hex", "epzs"])) }, + NAOptionDefinition { + name: MV_RANGE_OPTION, description: "motion search range (in pixels)", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(30)) }, + NAOptionDefinition { + name: MBTREE_DEPTH, description: "number of frames in MB tree analysis buffer", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(128)) }, +]; + +impl NAOptionHandler for VP7Encoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { ENCODER_OPTS } + fn set_options(&mut self, options: &[NAOption]) { + for option in options.iter() { + for opt_def in ENCODER_OPTS.iter() { + if opt_def.check(option).is_ok() { + match option.name { + KEYFRAME_OPTION => { + if let NAValue::Int(intval) = option.value { + self.key_int = intval as u8; + } + }, + VERSION_OPTION => { + if let NAValue::Int(intval) = option.value { + self.version = intval as u8; + } + }, + LF_LEVEL_OPTION => { + if let NAValue::Int(intval) = option.value { + self.lf_level = if intval < 0 { None } else { Some(intval as u8) }; + } + }, + LF_SHARP_OPTION => { + if let NAValue::Int(intval) = option.value { + self.fenc.loop_params.loop_sharpness = intval as u8; + } + }, + LF_SIMPLE_OPTION => { + if let NAValue::Bool(flag) = option.value { + self.fenc.loop_params.lf_simple = flag; + } + }, + QUANT_OPTION => { + if let NAValue::Int(intval) = option.value { + self.br_ctl.set_quant(if intval < 0 { None } else { Some(intval as usize) }); + } + }, + MV_SEARCH_OPTION => { + if let NAValue::String(ref string) = option.value { + if let Ok(mv_mode) = string.parse::() { + self.me_mode = mv_mode; + } + } + }, + MV_RANGE_OPTION => { + if let NAValue::Int(intval) = option.value { + self.me_range = intval as i16; + } + }, + MBTREE_DEPTH => { + if let NAValue::Int(intval) = option.value { + self.mbt_depth = intval as usize; + } + }, + _ => {}, + }; + } + } + } + } + fn query_option_value(&self, name: &str) -> Option { + match name { + KEYFRAME_OPTION => Some(NAValue::Int(i64::from(self.key_int))), + VERSION_OPTION => Some(NAValue::Int(i64::from(self.version))), + QUANT_OPTION => if let Some(q) = self.br_ctl.get_quant() { + Some(NAValue::Int(q as i64)) + } else { + Some(NAValue::Int(-1)) + }, + LF_LEVEL_OPTION => if let Some(lev) = self.lf_level { + Some(NAValue::Int(i64::from(lev))) + } else { + Some(NAValue::Int(-1)) + }, + LF_SHARP_OPTION => Some(NAValue::Int(i64::from(self.fenc.loop_params.loop_sharpness))), + LF_SIMPLE_OPTION => Some(NAValue::Bool(self.fenc.loop_params.lf_simple)), + MV_SEARCH_OPTION => Some(NAValue::String(self.me_mode.to_string())), + MV_RANGE_OPTION => Some(NAValue::Int(i64::from(self.me_range))), + MBTREE_DEPTH => Some(NAValue::Int(self.mbt_depth as i64)), + _ => None, + } + } +} + +pub fn get_encoder() -> Box { + Box::new(VP7Encoder::new()) +} + +#[cfg(test)] +mod test { + use nihav_core::codecs::*; + use nihav_core::demuxers::*; + use nihav_core::muxers::*; + use crate::*; + use nihav_commonfmt::*; + use nihav_codec_support::test::enc_video::*; + + fn encode_test(out_name: &'static str, enc_options: &[NAOption], hash: &[u32; 4]) { + let mut dmx_reg = RegisteredDemuxers::new(); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredDecoders::new(); + duck_register_all_decoders(&mut dec_reg); + let mut mux_reg = RegisteredMuxers::new(); + generic_register_all_muxers(&mut mux_reg); + let mut enc_reg = RegisteredEncoders::new(); + duck_register_all_encoders(&mut enc_reg); + + // sample: https://samples.mplayerhq.hu/V-codecs/VP4/ot171_vp40.avi + let dec_config = DecoderTestParams { + demuxer: "avi", + in_name: "assets/Duck/ot171_vp40.avi", + stream_type: StreamType::Video, + limit: Some(9), + dmx_reg, dec_reg, + }; + let enc_config = EncoderTestParams { + muxer: "avi", + enc_name: "vp7", + out_name, + mux_reg, enc_reg, + }; + let dst_vinfo = NAVideoInfo { + width: 0, + height: 0, + format: YUV420_FORMAT, + flipped: false, + bits: 12, + }; + let enc_params = EncodeParameters { + format: NACodecTypeInfo::Video(dst_vinfo), + quality: 0, + bitrate: 50000, + tb_num: 0, + tb_den: 0, + flags: 0, + }; + //test_encoding_to_file(&dec_config, &enc_config, enc_params, enc_options); + test_encoding_md5(&dec_config, &enc_config, enc_params, enc_options, + hash); + } + #[test] + fn test_vp7_encoder() { + let enc_options = &[ + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(42) }, + ]; + encode_test("vp7-q42.avi", enc_options, &[0xa5079e5b, 0x33dd8a63, 0xfc189e21, 0xee08332b]); + } + #[test] + fn test_vp7_encoder_noloop() { + let enc_options = &[ + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(42) }, + NAOption { name: super::LF_LEVEL_OPTION, value: NAValue::Int(0) }, + ]; + encode_test("vp7-noloop.avi", enc_options, &[0xc7d41732, 0x09b03059, 0x8550921c, 0xa99d4c29]); + } + #[test] + fn test_vp7_encoder_mbtree() { + let enc_options = &[ + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(24) }, + NAOption { name: super::MBTREE_DEPTH, value: NAValue::Int(10) }, + ]; + encode_test("vp7-mbt.avi", enc_options, &[0xd0d90d31, 0x0253275d, 0xbe502d3c, 0xacf2b6e7]); + } + #[test] + fn test_vp7_encoder_ratectl() { + let enc_options = &[ + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(-1) }, + ]; + encode_test("vp7-br.avi", enc_options, &[0x47dcd4da, 0x04b06feb, 0x386163c1, 0x54899da3]); + } +} diff --git a/nihav-duck/src/codecs/vp7enc/models.rs b/nihav-duck/src/codecs/vp7enc/models.rs new file mode 100644 index 0000000..cc716b0 --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/models.rs @@ -0,0 +1,119 @@ +use super::super::vp78data::*; +pub use crate::codecs::vpenc::models::*; + +#[derive(Clone,Copy)] +pub struct VP7Models { + pub coef_probs: [[[[u8; 11]; 3]; 8]; 4], + pub mv_probs: [[u8; 17]; 2], + pub kf_ymode_prob: [u8; 4], + pub kf_uvmode_prob: [u8; 3], + pub prob_intra_pred: u8, + pub prob_last_pred: u8, + pub feature_present: [u8; 4], + pub feature_tree_probs: [[u8; 3]; 4], +} + +const DEFAULT_MV_PROBS: [[u8; 17]; 2] = [ + [ 162, 128, 225, 146, 172, 147, 214, 39, 156, 247, 210, 135, 68, 138, 220, 239, 246 ], + [ 164, 128, 204, 170, 119, 235, 140, 230, 228, 244, 184, 201, 44, 173, 221, 239, 253 ] +]; + +impl VP7Models { + pub fn new() -> Self { + let mut obj: Self = unsafe { std::mem::zeroed() }; + obj.reset(); + obj + } + pub fn reset(&mut self) { + self.coef_probs.copy_from_slice(&DEFAULT_DCT_PROBS); + self.mv_probs.copy_from_slice(&DEFAULT_MV_PROBS); + self.kf_ymode_prob.copy_from_slice(Y_MODE_TREE_PROBS); + self.kf_uvmode_prob.copy_from_slice(UV_MODE_TREE_PROBS); + } +} + +pub trait VP7ProbCounter { + fn to_prob8(self) -> u8; + fn to_prob8_worthy(&self, ref_prob: &mut u8); +} + +impl VP7ProbCounter for ProbCounter { + fn to_prob8(self) -> u8 { + if self.total > 0 { + ((self.zeroes << 8) / self.total).min(255).max(1) as u8 + } else { + 128 + } + } + fn to_prob8_worthy(&self, ref_prob: &mut u8) { + if self.total > 0 { + let new_prob = self.to_prob(); + let new_bits = Self::est_bits(new_prob, self.zeroes, self.total); + let old_bits = Self::est_bits(*ref_prob, self.zeroes, self.total); + + if new_bits + 8 < old_bits { + *ref_prob = new_prob; + } + } + } +} + +#[derive(Clone,Copy,Default)] +pub struct VP7ModelsStat { + pub coef_probs: [[[[ProbCounter; 11]; 3]; 8]; 4], + pub mv_probs: [[ProbCounter; 17]; 2], + pub kf_ymode_prob: [ProbCounter; 4], + pub kf_uvmode_prob: [ProbCounter; 3], + pub prob_intra_pred: ProbCounter, + pub prob_last_pred: ProbCounter, + pub feature_present: [ProbCounter; 4], + pub feature_tree_probs: [[ProbCounter; 3]; 4], +} + +impl VP7ModelsStat { + pub fn new() -> Self { Self::default() } + pub fn reset(&mut self) { + *self = Self::default(); + } + pub fn generate(&self, dst: &mut VP7Models, is_intra: bool) { + for (dst, src) in dst.feature_present.iter_mut().zip(self.feature_present.iter()) { + *dst = src.to_prob8(); + } + for (dst, src) in dst.feature_tree_probs.iter_mut().zip(self.feature_tree_probs.iter()) { + for (dst, src) in dst.iter_mut().zip(src.iter()) { + if src.total != 0 { + *dst = src.to_prob8(); + } else { + *dst = 255; + } + } + } + for (dst, src) in dst.coef_probs.iter_mut().zip(self.coef_probs.iter()) { + for (dst, src) in dst.iter_mut().zip(src.iter()) { + for (dst, src) in dst.iter_mut().zip(src.iter()) { + for (dst, src) in dst.iter_mut().zip(src.iter()) { + src.to_prob8_worthy(dst); + } + } + } + } + + if !is_intra { + dst.prob_intra_pred = self.prob_intra_pred.to_prob8(); + dst.prob_last_pred = self.prob_last_pred.to_prob8(); + + for (dmv, smv) in dst.mv_probs.iter_mut().zip(self.mv_probs.iter()) { + for (dp, sp) in dmv.iter_mut().zip(smv.iter()) { + *dp = sp.to_prob_worthy(*dp); + } + } + + for (dp, sp) in dst.kf_ymode_prob.iter_mut().zip(self.kf_ymode_prob.iter()) { + sp.to_prob8_worthy(dp); + } + for (dp, sp) in dst.kf_uvmode_prob.iter_mut().zip(self.kf_uvmode_prob.iter()) { + sp.to_prob8_worthy(dp); + } + } + } +} diff --git a/nihav-duck/src/codecs/vp7enc/motion_est.rs b/nihav-duck/src/codecs/vp7enc/motion_est.rs new file mode 100644 index 0000000..882e3e7 --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/motion_est.rs @@ -0,0 +1,523 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::super::vp78dsp::*; +use super::blocks::{SrcBlock, get_block_difference}; +use crate::codecs::vpenc::motion_est::*; +pub use crate::codecs::vpenc::motion_est::MVSearchMode; + +pub trait MVSearchModeCreate { + fn create_search(&self) -> Box; +} + +impl MVSearchModeCreate for MVSearchMode { + fn create_search(&self) -> Box { + match *self { + MVSearchMode::SEA => Box::new(EliminationSearch::new()), + MVSearchMode::Diamond => Box::new(DiaSearch::new()), + MVSearchMode::Hexagon => Box::new(HexSearch::new()), + MVSearchMode::EPZS => Box::new(EPZSearch::new()), + _ => unreachable!(), + } + } +} + +const MAX_DIST: u32 = std::u32::MAX; +const DIST_THRESH: u32 = 256; +pub const LARGE_BLK8_DIST: u32 = 256; + +trait FromPixels { + fn from_pixels(self) -> Self; +} + +impl FromPixels for MV { + fn from_pixels(self) -> MV { + MV { x: self.x * 8, y: self.y * 8 } + } +} + +pub trait MVSearch { + fn preinit(&mut self, mv_est: &MVEstimator); + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &SrcBlock, mb_x: usize, mb_y: usize) -> (MV, u32); + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 64], xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32); + fn search_blk4(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 16], xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32); +} + +#[derive(Default)] +pub struct EliminationSearch { + msa: [Vec; 3], + stride: [usize; 3], +} + +impl EliminationSearch { + const BLOCK_SIZE: usize = 4; + pub fn new() -> Self { Self::default() } + fn get_rdist(&self, xpos: usize, ypos: usize, bavg: &[u16; 3]) -> i32 { + let luma_off = xpos + ypos * self.stride[0]; + let chroma_off = (xpos / 2) + (ypos / 2) * self.stride[1]; + + let mut luma_avg = 0; + for row in self.msa[0][luma_off..].chunks(self.stride[0]).take(16).step_by(Self::BLOCK_SIZE) { + for &el in row.iter().take(16).step_by(Self::BLOCK_SIZE) { + luma_avg += el; + } + } + let mut chroma_avg = [0; 2]; + for chroma in 0..1 { + for row in self.msa[chroma + 1][chroma_off..].chunks(self.stride[1]).take(8).step_by(Self::BLOCK_SIZE) { + for &el in row.iter().take(8).step_by(Self::BLOCK_SIZE) { + chroma_avg[chroma] += el; + } + } + } + + (i32::from(bavg[0]) - i32::from(luma_avg)).abs() + + (i32::from(bavg[1]) - i32::from(chroma_avg[0])).abs() + + (i32::from(bavg[2]) - i32::from(chroma_avg[1])).abs() + } +} + +impl MVSearch for EliminationSearch { + fn preinit(&mut self, mv_est: &MVEstimator) { + let data = mv_est.ref_frame.get_data(); + for (plane, msa) in self.msa.iter_mut().enumerate() { + let (width, height) = mv_est.ref_frame.get_dimensions(plane); + self.stride[plane] = width + 1 - Self::BLOCK_SIZE; + msa.clear(); + msa.reserve(self.stride[plane] * (height + 1 - Self::BLOCK_SIZE)); + + let mut off = mv_est.ref_frame.get_offset(plane); + let stride = mv_est.ref_frame.get_stride(plane); + for _ in 0..(height + 1 - Self::BLOCK_SIZE) { + for x in 0..(width + 1 - Self::BLOCK_SIZE) { + let mut sum = 0; + for j in 0..Self::BLOCK_SIZE { + for i in 0..Self::BLOCK_SIZE { + sum += u16::from(data[off + x + i + j * stride]); + } + } + msa.push(sum); + } + off += stride; + } + } + } + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &SrcBlock, mb_x: usize, mb_y: usize) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + + let (width, height) = mv_est.ref_frame.get_dimensions(0); + let mut bavg = [0; 3]; + for blk in cur_mb.luma_blocks() { + bavg[0] += blk.iter().fold(0u16, |acc, &x| acc + u16::from(x)); + } + for chroma in 0..2 { + for blk in cur_mb.chroma_blocks(chroma) { + bavg[chroma + 1] += blk.iter().fold(0u16, |acc, &x| acc + u16::from(x)); + } + } + let mut rough_dist = std::i32::MAX; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + let ypos = (mb_y as isize) * 16 + (dy as isize); + if ypos < 0 || (ypos + 16) > (height as isize) { + continue; + } + let ypos = ypos as usize; + cur_mv.y = dy * 8; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + let xpos = (mb_x as isize) * 16 + (dx as isize); + if xpos < 0 || (xpos + 16) > (width as isize) { + continue; + } + let xpos = xpos as usize; + + let rdist = self.get_rdist(xpos, ypos, &bavg); + if rdist > rough_dist { + continue; + } + rough_dist = rdist; + + cur_mv.x = dx * 8; + + let dist = mv_est.sad_mb(cur_mb, mb_x, mb_y, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + if dist <= DIST_THRESH { + return (best_mv, best_dist); + } + } + } + } + (best_mv, best_dist) + } + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 64], xpos_orig: usize, ypos_orig: usize, _cand_mvs: &[MV]) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + + let (width, height) = mv_est.ref_frame.get_dimensions(0); + let bavg = ref_blk.iter().fold(0u16, |acc, &x| acc + u16::from(x)); + + let mut rough_dist = std::i32::MAX; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + let ypos = (ypos_orig as isize) + (dy as isize); + if ypos < 0 || (ypos + 8) > (height as isize) { + continue; + } + let ypos = ypos as usize; + cur_mv.y = dy * 8; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + let xpos = (xpos_orig as isize) + (dx as isize); + if xpos < 0 || (xpos + 8) > (width as isize) { + continue; + } + let xpos = xpos as usize; + + let luma_off = xpos + ypos * self.stride[0]; + let mut cur_avg = 0; + for row in self.msa[0][luma_off..].chunks(self.stride[0]).take(8).step_by(Self::BLOCK_SIZE) { + for &el in row.iter().take(8).step_by(Self::BLOCK_SIZE) { + cur_avg += el; + } + } + + let rdist = (i32::from(cur_avg) - i32::from(bavg)).abs(); + if rdist > rough_dist { + continue; + } + rough_dist = rdist; + + cur_mv.x = dx * 8; + + let dist = mv_est.sad_blk8(ref_blk, xpos_orig, ypos_orig, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + if dist <= DIST_THRESH / 4 { + return (best_mv, best_dist); + } + } + } + } + (best_mv, best_dist) + } + fn search_blk4(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 16], xpos_orig: usize, ypos_orig: usize, _cand_mvs: &[MV]) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + + let (width, height) = mv_est.ref_frame.get_dimensions(0); + let bavg = ref_blk.iter().fold(0u16, |acc, &x| acc + u16::from(x)); + + let mut rough_dist = std::i32::MAX; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + let ypos = (ypos_orig as isize) + (dy as isize); + if ypos < 0 || (ypos + 4) > (height as isize) { + continue; + } + let ypos = ypos as usize; + cur_mv.y = dy * 8; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + let xpos = (xpos_orig as isize) + (dx as isize); + if xpos < 0 || (xpos + 4) > (width as isize) { + continue; + } + let xpos = xpos as usize; + + let luma_off = xpos + ypos * self.stride[0]; + let cur_avg = self.msa[0][luma_off]; + + let rdist = (i32::from(cur_avg) - i32::from(bavg)).abs(); + if rdist > rough_dist { + continue; + } + rough_dist = rdist; + + cur_mv.x = dx * 8; + + let dist = mv_est.sad_blk4(ref_blk, xpos_orig, ypos_orig, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + if dist <= DIST_THRESH / 16 { + return (best_mv, best_dist); + } + } + } + } + (best_mv, best_dist) + } +} + +macro_rules! pattern_search { + ($struct_name: ident, $patterns: expr) => { + pub struct $struct_name { + point: [MV; $patterns.len()], + dist: [u32; $patterns.len()], + steps: &'static [MV; $patterns.len()], + } + + impl $struct_name { + pub fn new() -> Self { + Self { + point: $patterns, + dist: [MAX_DIST; $patterns.len()], + steps: &$patterns, + } + } + fn reset(&mut self) { + self.point = $patterns; + self.dist = [MAX_DIST; $patterns.len()]; + } + fn set_new_point(&mut self, start: MV, dist: u32) { + for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { + *dst = src + start; + } + self.dist = [MAX_DIST; $patterns.len()]; + self.dist[0] = dist; + } + fn update(&mut self, step: MV) { + let mut new_point = self.point; + let mut new_dist = [MAX_DIST; $patterns.len()]; + + for point in new_point.iter_mut() { + *point += step; + } + + for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { + for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { + if *new_point == old_point { + *new_dist = old_dist; + break; + } + } + } + self.point = new_point; + self.dist = new_dist; + } + } + + impl MVSearch for $struct_name { + fn preinit(&mut self, _mv_est: &MVEstimator) {} + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &SrcBlock, mb_x: usize, mb_y: usize) -> (MV, u32) { + search_template!(self, mv_est, cur_mb, mb_x, mb_y, sad_mb, DIST_THRESH) + } + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 64], xpos: usize, ypos: usize, _cand_mvs: &[MV]) -> (MV, u32) { + search_template!(self, mv_est, ref_blk, xpos, ypos, sad_blk8, DIST_THRESH / 4) + } + fn search_blk4(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 16], xpos: usize, ypos: usize, _cand_mvs: &[MV]) -> (MV, u32) { + search_template!(self, mv_est, ref_blk, xpos, ypos, sad_blk4, DIST_THRESH / 16) + } + } + } +} + +pattern_search!(DiaSearch, DIA_PATTERN); +pattern_search!(HexSearch, HEX_PATTERN); + +pub struct EPZSearch { + point: [MV; DIA_PATTERN.len()], + dist: [u32; DIA_PATTERN.len()], + steps: &'static [MV; DIA_PATTERN.len()], +} + +impl EPZSearch { + pub fn new() -> Self { + Self { + point: DIA_PATTERN, + dist: [MAX_DIST; DIA_PATTERN.len()], + steps: &DIA_PATTERN, + } + } + fn reset(&mut self) { + self.point = DIA_PATTERN; + self.dist = [MAX_DIST; DIA_PATTERN.len()]; + } + fn set_new_point(&mut self, start: MV, dist: u32) { + for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { + *dst = src + start; + } + self.dist = [MAX_DIST; DIA_PATTERN.len()]; + self.dist[0] = dist; + } + fn update(&mut self, step: MV) { + let mut new_point = self.point; + let mut new_dist = [MAX_DIST; DIA_PATTERN.len()]; + + for point in new_point.iter_mut() { + *point += step; + } + + for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { + for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { + if *new_point == old_point { + *new_dist = old_dist; + break; + } + } + } + self.point = new_point; + self.dist = new_dist; + } +} + +impl MVSearch for EPZSearch { + fn preinit(&mut self, _mv_est: &MVEstimator) {} + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &SrcBlock, mb_x: usize, mb_y: usize) -> (MV, u32) { + search_template!(self, mv_est, cur_mb, mb_x, mb_y, sad_mb, DIST_THRESH) + } + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 64], xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32) { + let mut best_mv = ZERO_MV; + let zero_dist = mv_est.sad_blk8(ref_blk, xpos, ypos, best_mv, MAX_DIST); + let mut best_dist = zero_dist; + if best_dist > DIST_THRESH { + for &cmv in cand_mvs[1..].iter() { + let dist = mv_est.sad_blk8(ref_blk, xpos, ypos, cmv, best_dist); + if dist < best_dist { + best_dist = dist; + best_mv = cmv; + if best_dist <= DIST_THRESH { + break; + } + } + } + if best_dist > DIST_THRESH { + return search_template!(self, mv_est, ref_blk, xpos, ypos, sad_blk8, DIST_THRESH / 4, best_mv, best_dist, false); + } + } + (best_mv, best_dist) + } + fn search_blk4(&mut self, mv_est: &mut MVEstimator, ref_blk: &[u8; 16], xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32) { + let mut best_mv = ZERO_MV; + let zero_dist = mv_est.sad_blk4(ref_blk, xpos, ypos, best_mv, MAX_DIST); + let mut best_dist = zero_dist; + if best_dist > DIST_THRESH { + for &cmv in cand_mvs[1..].iter() { + let dist = mv_est.sad_blk4(ref_blk, xpos, ypos, cmv, best_dist); + if dist < best_dist { + best_dist = dist; + best_mv = cmv; + if best_dist <= DIST_THRESH { + break; + } + } + } + if best_dist > DIST_THRESH { + return search_template!(self, mv_est, ref_blk, xpos, ypos, sad_blk4, DIST_THRESH / 16, best_mv, best_dist, false); + } + } + (best_mv, best_dist) + } +} + +pub struct MVEstimator { + pub ref_mb: SrcBlock, + pub blk8: [u8; 64], + pub blk4: [u8; 16], + mc_buf: NAVideoBufferRef, + ref_frame: NAVideoBufferRef, + mv_range: i16, +count: usize, +} + +#[allow(dead_code)] +impl MVEstimator { + pub fn new(ref_frame: NAVideoBufferRef, mc_buf: NAVideoBufferRef, mv_range: i16) -> Self { + Self { + ref_mb: SrcBlock::new(), + blk8: [0; 64], + blk4: [0; 16], + ref_frame, mc_buf, + mv_range, +count: 0, + } + } + pub fn get_mb(&mut self, dst: &mut SrcBlock, mb_x: usize, mb_y: usize, cur_mv: MV) { + let tmp_blk = self.mc_buf.get_data_mut().unwrap(); + mc_block16x16(&mut dst.luma, 0, 16, mb_x * 16, mb_y * 16, cur_mv.x * 2, cur_mv.y * 2, self.ref_frame.clone(), 0, tmp_blk); + mc_block8x8(&mut dst.chroma[0], 0, 8, mb_x * 8, mb_y * 8, cur_mv.x, cur_mv.y, self.ref_frame.clone(), 1, tmp_blk); + mc_block8x8(&mut dst.chroma[1], 0, 8, mb_x * 8, mb_y * 8, cur_mv.x, cur_mv.y, self.ref_frame.clone(), 2, tmp_blk); + } + pub fn get_blk8(&mut self, dst: &mut [u8; 64], plane: usize, x: usize, y: usize, mut cur_mv: MV) { + if plane == 0 { + cur_mv.x *= 2; + cur_mv.y *= 2; + } + mc_block8x8(dst, 0, 8, x, y, cur_mv.x, cur_mv.y, self.ref_frame.clone(), plane, self.mc_buf.get_data_mut().unwrap()); + } + fn sad_blk8(&mut self, refblk: &[u8; 64], x: usize, y: usize, cur_mv: MV, _best_dist: u32) -> u32 { + mc_block8x8(&mut self.blk8, 0, 8, x, y, cur_mv.x * 2, cur_mv.y * 2, self.ref_frame.clone(), 0, self.mc_buf.get_data_mut().unwrap()); +self.count += 1; + sad8x8(&self.blk8, refblk) + } + pub fn get_blk4(&mut self, dst: &mut [u8; 16], plane: usize, x: usize, y: usize, mut cur_mv: MV) { + if plane == 0 { + cur_mv.x *= 2; + cur_mv.y *= 2; + } + mc_block4x4(dst, 0, 4, x, y, cur_mv.x, cur_mv.y, self.ref_frame.clone(), plane, self.mc_buf.get_data_mut().unwrap()); + } + fn sad_blk4(&mut self, refblk: &[u8; 16], x: usize, y: usize, cur_mv: MV, _best_dist: u32) -> u32 { + mc_block4x4(&mut self.blk4, 0, 4, x, y, cur_mv.x * 2, cur_mv.y * 2, self.ref_frame.clone(), 0, self.mc_buf.get_data_mut().unwrap()); + sad4x4(&self.blk4, refblk) + } + fn sad_mb(&mut self, cur_mb: &SrcBlock, mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 { + let tmp_blk = self.mc_buf.get_data_mut().unwrap(); + + mc_block16x16(&mut self.ref_mb.luma, 0, 16, mb_x * 16, mb_y * 16, cur_mv.x * 2, cur_mv.y * 2, self.ref_frame.clone(), 0, tmp_blk); + mc_block8x8(&mut self.ref_mb.chroma[0], 0, 8, mb_x * 8, mb_y * 8, cur_mv.x, cur_mv.y, self.ref_frame.clone(), 1, tmp_blk); + mc_block8x8(&mut self.ref_mb.chroma[1], 0, 8, mb_x * 8, mb_y * 8, cur_mv.x, cur_mv.y, self.ref_frame.clone(), 2, tmp_blk); + let mut dist = 0; + let mut diff = [0; 16]; + for (sblk, dblk) in self.ref_mb.luma_blocks().zip(cur_mb.luma_blocks()) { + get_block_difference(&mut diff, &sblk, &dblk); + dist += sad(&diff); + if dist > best_dist { + break; + } + } + 'chroma_loop: for chroma in 0..2 { + for (sblk, dblk) in self.ref_mb.chroma_blocks(chroma).zip(cur_mb.chroma_blocks(chroma)) { + get_block_difference(&mut diff, &sblk, &dblk); + dist += sad(&diff); + if dist > best_dist { + break 'chroma_loop; + } + } + } + dist + } +} + +fn sad(diff: &[i16; 16]) -> u32 { + diff.iter().fold(0u32, |acc, &x| acc + ((i32::from(x) * i32::from(x)) as u32)) +} +fn sad8x8(blk1: &[u8; 64], blk2: &[u8; 64]) -> u32 { + let mut sum = 0u32; + for (&a, &b) in blk1.iter().zip(blk2.iter()) { + let diff = i32::from(a) - i32::from(b); + sum += (diff * diff) as u32; + } + sum +} +fn sad4x4(blk1: &[u8; 16], blk2: &[u8; 16]) -> u32 { + let mut sum = 0u32; + for (&a, &b) in blk1.iter().zip(blk2.iter()) { + let diff = i32::from(a) - i32::from(b); + sum += (diff * diff) as u32; + } + sum +} diff --git a/nihav-duck/src/codecs/vp7enc/rdo.rs b/nihav-duck/src/codecs/vp7enc/rdo.rs new file mode 100644 index 0000000..88f7456 --- /dev/null +++ b/nihav-duck/src/codecs/vp7enc/rdo.rs @@ -0,0 +1,207 @@ +use super::blocks::*; +use super::coder::*; + +static mut COUNTER: usize = 0; + +pub const SMALL_DIST: u32 = 256; +pub const MAX_DIST: u32 = std::u32::MAX; + +const INTER_TO_INTRA_RATIO: f32 = 0.85; + +pub struct RateDistMetric { + pub lambda: f32, +} + +impl RateDistMetric { + pub fn new() -> Self { + Self { + lambda: 1.0, + } + } + pub fn calc_metric(&self, dist: u32, nits: u32) -> u32 { + ((dist as f32) + self.lambda * (nits as f32) + 0.5) as u32 + } + pub fn adjust_br(&mut self, cur_size: usize, tgt_size: usize) { + let low_limit = tgt_size - tgt_size / 8; + let up_limit = tgt_size + tgt_size / 8; + if cur_size < low_limit { + self.lambda = (self.lambda - 0.1).max(0.0); + } else if cur_size > up_limit { + self.lambda = (self.lambda + 0.1).min(16.0); + } + } + + pub fn block_dist(&self, src: &[u8; 16], new: &[u8; 16], q: usize, ctype: usize, pctx: u8, probs: &[[[u8; 11]; 3]; 8]) -> (u32, bool) { + let mut diff = [0i16; 16]; + get_block_difference(&mut diff, src, new); + diff.fdct(); + diff.quant(q, ctype); + let has_nz = diff.has_nz(); + let nits = estimate_subblock_nits(&diff, ctype, pctx, probs); + diff.dequant(q, ctype); + diff.idct(); + let dist = get_difference_dist(src, new, &diff); +unsafe {COUNTER += 1;} + (self.calc_metric(dist, nits), has_nz) + } +} + +#[derive(Default)] +pub struct BitRateControl { + tb_num: u32, + tb_den: u32, + key_int: u32, + bitrate: u32, + force_q: Option, + bitpool: u32, + fpos: u32, + kpos: u32, + num_mb: u32, +} + +impl BitRateControl { + pub fn new() -> Self { Self::default() } + fn reset(&mut self) { + self.fpos = 0; + self.kpos = 0; + self.bitpool = self.bitrate; + } + pub fn set_params(&mut self, tb_num: u32, tb_den: u32, bitrate: u32, key_int: u8, num_mb: usize) { + self.tb_num = tb_num; + self.tb_den = tb_den; + self.bitrate = bitrate; + self.key_int = u32::from(key_int); + self.num_mb = num_mb as u32; + self.reset(); + } + pub fn has_bitrate(&self) -> bool { self.bitrate != 0 } + pub fn get_quant(&self) -> Option { self.force_q } + pub fn set_quant(&mut self, q: Option) { + if self.force_q != q { + self.force_q = q; + self.reset(); + } + } + pub fn set_key_interval(&mut self, key_int: u8) { + let key_int = u32::from(key_int); + if self.key_int != key_int { + self.key_int = key_int; + self.reset(); + } + } + pub fn get_target_size(&self, is_intra: bool) -> u32 { + if self.bitrate != 0 && self.force_q.is_none() { + let pool_frames = self.tb_den - self.fpos; + if self.key_int <= 1 { // all intra + if self.bitpool == 0 || pool_frames == 0 { + self.bitrate * self.tb_num / self.tb_den + } else { + self.bitpool / pool_frames + } + } else { + let full_gop_weight = 1.0 + ((self.key_int - 1) as f32) * INTER_TO_INTRA_RATIO; + let i_bits = if self.bitpool == 0 || pool_frames == 0 { + let gop_size = self.bitrate * self.tb_num * self.key_int / self.tb_den; + (gop_size as f32) / full_gop_weight + } else { + let full_gops = pool_frames / self.key_int; + let weight = (full_gops as f32) * full_gop_weight + ((pool_frames % self.key_int) as f32) * INTER_TO_INTRA_RATIO; + (self.bitpool as f32) / weight + }; + if is_intra { + (i_bits + 0.5) as u32 + } else { + (i_bits * INTER_TO_INTRA_RATIO + 0.5) as u32 + } + } + } else { + 0 + } + } + fn pred_nits_per_mb(is_intra: bool, q: usize) -> f32 { + let fq = q as f32; + match (is_intra, q) { + (true, 0..=6) => 3434.0 + fq * fq * 7.5 - fq * 195.0, + (true, _) => 2500.0 - (fq - 6.0).ln() * 500.0, + (false, 0..=10) => 1595.0 + fq * fq * 3.4 - fq * 125.0, + (false, _) => 800.0 - (fq - 8.0).ln() * 155.0, + } + } + #[allow(dead_code)] + // todo use for refining maybe + pub fn predict_size(&self, is_intra: bool, q: usize) -> u32 { + let min_size = if is_intra { 200 * 8 } else { 50 * 8 }; + let nits_per_mb = Self::pred_nits_per_mb(is_intra, q); + ((nits_per_mb * (self.num_mb as f32) / 8.0) as u32).max(min_size) + } + pub fn get_frame_quant(&self, is_intra: bool) -> usize { + if let Some(q) = self.force_q { + q as usize + } else { + let expected_size = self.get_target_size(is_intra); + let nits_per_mb = ((expected_size * 8) as f32) / (self.num_mb as f32); + if is_intra { + if nits_per_mb > 2500.0 { // simple search + if nits_per_mb > Self::pred_nits_per_mb(is_intra, 3) { + if nits_per_mb > Self::pred_nits_per_mb(is_intra, 1) { + 0 + } else if nits_per_mb > Self::pred_nits_per_mb(is_intra, 2) { + 1 + } else { + 2 + } + } else { + if nits_per_mb > Self::pred_nits_per_mb(is_intra, 4) { + 3 + } else if nits_per_mb > Self::pred_nits_per_mb(is_intra, 5) { + 4 + } else { + 5 + } + } + } else { + ((((2500.0 - nits_per_mb) / 500.0).exp() + 6.0) as usize).min(127) + } + } else { + if nits_per_mb > 680.0 { // simple search + let (start, end) = if nits_per_mb > Self::pred_nits_per_mb(is_intra, 5) { + if nits_per_mb > Self::pred_nits_per_mb(is_intra, 3) { + (0, 3) + } else { + (3, 5) + } + } else if nits_per_mb > Self::pred_nits_per_mb(is_intra, 7) { + (5, 7) + } else { + (7, 10) + }; + let mut q = end; + for qq in start..end { + if nits_per_mb > Self::pred_nits_per_mb(is_intra, qq) { + q = qq; + break; + } + } + q + } else { + ((((800.0 - nits_per_mb) / 155.0).exp() + 6.0) as usize).max(10).min(127) + } + } + } + } + pub fn update(&mut self, size: usize) { + self.kpos += 1; + if self.kpos == self.key_int { + self.kpos = 0; + } + if self.bitrate == 0 || self.force_q.is_some() { + return; + } + self.fpos += self.tb_num; + while self.fpos >= self.tb_den { + self.fpos -= self.tb_den; + self.bitpool += self.bitrate; + } + self.bitpool = self.bitpool.saturating_sub((size * 8) as u32); + } +} diff --git a/nihav-duck/src/codecs/vpenc/models.rs b/nihav-duck/src/codecs/vpenc/models.rs index 0495c89..c2c2235 100644 --- a/nihav-duck/src/codecs/vpenc/models.rs +++ b/nihav-duck/src/codecs/vpenc/models.rs @@ -1,7 +1,7 @@ #[derive(Clone,Copy,Default)] pub struct ProbCounter { - zeroes: u32, - total: u32, + pub zeroes: u32, + pub total: u32, } // bits to code zero probability multiplied by eight @@ -69,7 +69,7 @@ impl ProbCounter { old_prob } } - fn est_bits(prob: u8, zeroes: u32, total: u32) -> u32 { + pub fn est_bits(prob: u8, zeroes: u32, total: u32) -> u32 { (u32::from(PROB_BITS[prob as usize]) * zeroes + u32::from(PROB_BITS[256 - (prob as usize)]) * (total - zeroes) + 7) >> 3 } } diff --git a/nihav-duck/src/codecs/vpenc/motion_est.rs b/nihav-duck/src/codecs/vpenc/motion_est.rs index 128633f..005780c 100644 --- a/nihav-duck/src/codecs/vpenc/motion_est.rs +++ b/nihav-duck/src/codecs/vpenc/motion_est.rs @@ -3,10 +3,13 @@ use nihav_codec_support::codecs::{MV, ZERO_MV}; use std::str::FromStr; #[derive(Debug,Clone,Copy,PartialEq)] +#[allow(dead_code)] pub enum MVSearchMode { Full, + SEA, Diamond, Hexagon, + EPZS, } impl Default for MVSearchMode { @@ -22,8 +25,10 @@ impl FromStr for MVSearchMode { fn from_str(s: &str) -> Result { match s { "full" => Ok(MVSearchMode::Full), + "sea" => Ok(MVSearchMode::SEA), "dia" => Ok(MVSearchMode::Diamond), "hex" => Ok(MVSearchMode::Hexagon), + "epzs" => Ok(MVSearchMode::EPZS), _ => Err(ParseError{}), } } @@ -33,8 +38,10 @@ impl std::fmt::Display for MVSearchMode { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match *self { MVSearchMode::Full => write!(f, "full"), + MVSearchMode::SEA => write!(f, "sea"), MVSearchMode::Diamond => write!(f, "dia"), MVSearchMode::Hexagon => write!(f, "hex"), + MVSearchMode::EPZS => write!(f, "epzs"), } } } @@ -80,58 +87,63 @@ pub const REFINEMENT: [MV; 4] = [ #[macro_export] macro_rules! search_template { - ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident) => ({ - let mut best_dist = MAX_DIST; - let mut best_mv; + ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident, $threshold: expr) => ({ + search_template!($self, $mv_est, $cur_blk, $mb_x, $mb_y, $sad_func, $threshold, ZERO_MV, MAX_DIST, true) + }); + ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident, $threshold: expr, $start_mv: expr, $best_dist: expr, $fullpel_stage: expr) => ({ + let mut best_dist = $best_dist; + let mut best_mv = $start_mv; let mut min_dist; let mut min_idx; - $self.reset(); - loop { - let mut cur_best_dist = best_dist; - for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { - if *dist == MAX_DIST { - *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist); - cur_best_dist = cur_best_dist.min(*dist); - if *dist <= DIST_THRESH { - break; + if $fullpel_stage { + $self.reset(); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= $threshold { + break; + } } } - } - min_dist = $self.dist[0]; - min_idx = 0; - for (i, &dist) in $self.dist.iter().enumerate().skip(1) { - if dist < min_dist { - min_dist = dist; - min_idx = i; - if dist <= DIST_THRESH { - break; + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= $threshold { + break; + } } } - } - if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range { - break; + if min_dist <= $threshold || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); } best_dist = min_dist; - $self.update($self.steps[min_idx]); - } - best_dist = min_dist; - best_mv = $self.point[min_idx]; - if best_dist <= DIST_THRESH { - return (best_mv.from_pixels(), best_dist); - } - for &step in REFINEMENT.iter() { - let mv = best_mv + step; - let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST); - if best_dist > dist { - best_dist = dist; - best_mv = mv; + best_mv = $self.point[min_idx]; + if best_dist <= $threshold { + return (best_mv.from_pixels(), best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + best_mv = best_mv.from_pixels(); + if best_dist <= $threshold { + return (best_mv, best_dist); } - } - best_mv = best_mv.from_pixels(); - if best_dist <= DIST_THRESH { - return (best_mv, best_dist); } // subpel refinement @@ -142,7 +154,7 @@ macro_rules! search_template { if *dist == MAX_DIST { *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point, cur_best_dist); cur_best_dist = cur_best_dist.min(*dist); - if *dist <= DIST_THRESH { + if *dist <= $threshold { break; } } @@ -153,12 +165,12 @@ macro_rules! search_template { if dist < min_dist { min_dist = dist; min_idx = i; - if dist <= DIST_THRESH { + if dist <= $threshold { break; } } } - if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 4 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 4 { + if min_dist <= $threshold || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 8 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 8 { break; } best_dist = min_dist; @@ -166,7 +178,7 @@ macro_rules! search_template { } best_dist = min_dist; best_mv = $self.point[min_idx]; - if best_dist <= DIST_THRESH { + if best_dist <= $threshold { return (best_mv, best_dist); } for &step in REFINEMENT.iter() { @@ -178,5 +190,5 @@ macro_rules! search_template { } } (best_mv, best_dist) - }) + }); } -- 2.39.5