From: Kostya Shishkov Date: Thu, 18 May 2023 16:27:26 +0000 (+0200) Subject: RealVideo 4 encoder X-Git-Url: https://git.nihav.org/?p=nihav.git;a=commitdiff_plain;h=4965a5e560c5e194c5b5163c591fcade5f56c3f0 RealVideo 4 encoder --- diff --git a/nihav-realmedia/Cargo.toml b/nihav-realmedia/Cargo.toml index 2d096be..06a2141 100644 --- a/nihav-realmedia/Cargo.toml +++ b/nihav-realmedia/Cargo.toml @@ -11,6 +11,9 @@ path = "../nihav-core" path = "../nihav-codec-support" features = ["h263", "mdct", "blockdsp"] +[dev-dependencies] +nihav_commonfmt = { path = "../nihav-commonfmt", default-features=false, features = ["demuxer_y4m", "decoder_rawvideo"] } + [features] default = ["all_decoders", "all_demuxers", "all_encoders", "all_muxers"] demuxers = [] @@ -40,7 +43,8 @@ decoder_ralf = ["decoders"] all_encoders = ["all_video_encoders", "all_audio_encoders"] encoders = [] -all_video_encoders = [] +all_video_encoders = ["encoder_rv40"] +encoder_rv40 = ["encoders"] all_audio_encoders = ["encoder_cook"] encoder_cook = ["encoders"] diff --git a/nihav-realmedia/src/codecs/mod.rs b/nihav-realmedia/src/codecs/mod.rs index 987cc8d..c6058a5 100644 --- a/nihav-realmedia/src/codecs/mod.rs +++ b/nihav-realmedia/src/codecs/mod.rs @@ -11,7 +11,7 @@ macro_rules! validate { #[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4"))] mod rv3040; -#[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4"))] +#[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4", feature="encoder_realvideo4"))] #[allow(clippy::erasing_op)] mod rv34codes; #[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4"))] @@ -32,7 +32,7 @@ pub mod rv30; pub mod rv30dsp; #[cfg(feature="decoder_realvideo4")] pub mod rv40; -#[cfg(feature="decoder_realvideo4")] +#[cfg(any(feature="decoder_realvideo4", feature="encoder_realvideo4"))] pub mod rv40data; #[cfg(feature="decoder_realvideo4")] #[allow(clippy::erasing_op)] @@ -91,10 +91,16 @@ pub fn realmedia_register_all_decoders(rd: &mut RegisteredDecoders) { #[cfg(feature="encoder_cook")] mod cookenc; +#[cfg(feature="encoder_rv40")] +mod rv40enc; + #[cfg(feature="encoders")] const ENCODERS: &[EncoderInfo] = &[ #[cfg(feature="encoder_cook")] EncoderInfo { name: "cook", get_encoder: cookenc::get_encoder }, + +#[cfg(feature="encoder_rv40")] + EncoderInfo { name: "realvideo4", get_encoder: rv40enc::get_encoder }, ]; /// Registers all available encoders provided by this crate. diff --git a/nihav-realmedia/src/codecs/rv40enc/bitstream.rs b/nihav-realmedia/src/codecs/rv40enc/bitstream.rs new file mode 100644 index 0000000..bd0bacc --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/bitstream.rs @@ -0,0 +1,534 @@ +use nihav_core::frame::FrameType; +use nihav_core::io::bitwriter::*; +use nihav_core::io::intcode::*; +use nihav_codec_support::codecs::MV; +use super::types::*; +use super::super::rv34codes::*; +use super::super::rv40data::*; + +pub fn write_slice_header(bw: &mut BitWriter, ftype: FrameType, q: usize, set_idx: usize, deblock: bool, pts: u32) { + bw.write0(); + match ftype { + FrameType::I => bw.write(0, 2), + FrameType::P => bw.write(2, 2), + FrameType::B => bw.write(3, 2), + _ => unreachable!(), + }; + bw.write(q as u32, 5); + bw.write(0, 2); // unknown + bw.write(set_idx as u32, 2); + bw.write(!deblock as u32, 1); + bw.write(pts, 13); +} + +pub fn write_slice_dimensions(bw: &mut BitWriter, width: usize, height: usize) { + let wcode = match width { + 160 => 0, + 176 => 1, + 240 => 2, + 320 => 3, + 352 => 4, + 640 => 5, + 704 => 6, + _ => 7, + }; + bw.write(wcode, 3); + if wcode == 7 { + let mut w = width >> 2; + while w >= 255 { + bw.write(255, 8); + w -= 255; + } + bw.write(w as u32, 8); + } + + let hcode = match height { + 120 => 0, + 132 => 1, + 144 => 2, + 240 => 3, + 288 => 4, + 480 => 5, + 180 => 6, + 360 => 7, + 576 => 8, + _ => 9, + }; + if hcode < 6 { + bw.write(hcode, 3); + } else { + bw.write(hcode + 6, 4); + if hcode == 9 { + let mut h = height >> 2; + while h >= 255 { + bw.write(255, 8); + h -= 255; + } + bw.write(h as u32, 8); + } + } +} + +pub fn write_slice_mb_idx(bw: &mut BitWriter, mb_idx: usize, num_mbs: usize) { + let mba_bits = match num_mbs - 1 { + 0..= 47 => 6, + 48..= 98 => 7, + 99..= 395 => 9, + 396..=1583 => 11, + 1584..=6335 => 13, + 6336..=9215 => 14, + _ => unreachable!(), + }; + bw.write(mb_idx as u32, mba_bits); +} + +pub fn write_skip_count(bw: &mut BitWriter, skip_count: u32) { + bw.write_code(UintCodeType::Gamma, skip_count); +} + +fn write_mv(bw: &mut BitWriter, mv: MV) { + let xcode = if mv.x > 0 { (mv.x - 1) * 2 + 1 } else { -mv.x * 2 } as u32; + let ycode = if mv.y > 0 { (mv.y - 1) * 2 + 1 } else { -mv.y * 2 } as u32; + + bw.write_code(UintCodeType::Gamma, xcode); + bw.write_code(UintCodeType::Gamma, ycode); +} + +pub fn write_mb_header(bw: &mut BitWriter, ftype: FrameType, sstate: &SliceState, mbstate: &MBState) { + let mb_idx = mbstate.get_mb_idx(sstate.mb_x, sstate.mb_y); + let pred_mbt = mbstate.get_pred_mbtype(sstate, ftype == FrameType::B); + + let set_id = pred_mbt.to_code(); + + if ftype != FrameType::I { + let (codes, lens) = if ftype == FrameType::P { + (&RV40_PTYPE_CODES[set_id][..], &RV40_PTYPE_BITS[set_id][..]) + } else { + (&RV40_BTYPE_CODES[set_id][..], &RV40_BTYPE_BITS[set_id][..]) + }; + let idx = mbstate.mb_type[mb_idx].to_code(); + bw.write(codes[idx].into(), lens[idx]); + } + match mbstate.mb_type[mb_idx] { + MBType::Intra16 => { + if ftype == FrameType::I { + bw.write1(); + } + bw.write(mbstate.ipred[mbstate.get_blk4_idx(sstate.mb_x, sstate.mb_y)] as u32, 2); + }, + MBType::Intra => { + if ftype == FrameType::I { + bw.write0(); + bw.write1(); //dquant + } + let ystart = if sstate.has_t { 0 } else { 1 }; + let mut blk4_idx = mbstate.get_blk4_idx(sstate.mb_x, sstate.mb_y); + + if !sstate.has_t { + let mut code = 0; + for &el in mbstate.ipred[blk4_idx..][..4].iter() { + code = code * 2 + if el == 0 { 0 } else { 1 }; + } + bw.write(RV40_AIC_TOP_CODES[code].into(), RV40_AIC_TOP_BITS[code]); + blk4_idx += mbstate.blk4_stride; + } + for y in ystart..4 { + let mut x = 0; + while x < 4 { + let (lctx, tctx, trctx) = mbstate.get_ipred4x4_ctx(sstate.mb_x, sstate.mb_y, x, y); + let mode = mbstate.ipred[blk4_idx + x]; + let ctx_word = if x < 3 { + ((trctx & 0xF) as u16) + (((tctx & 0xF) as u16) << 4) + (((lctx & 0xF) as u16) << 8) + } else { 0xFFF }; + if let Some(idx) = RV40_AIC_PATTERNS.iter().position(|&x| x == ctx_word) { + let mode1 = mbstate.ipred[blk4_idx + x + 1]; + let code = mode * 9 + mode1; + bw.write(RV40_AIC_MODE2_CODES[idx][code as usize].into(), + RV40_AIC_MODE2_BITS[idx][code as usize]); + x += 2; + } else if tctx != -1 && lctx != -1 { + let idx = (tctx + lctx * 10) as usize; + let code = mode as usize; + bw.write(RV40_AIC_MODE1_CODES[idx][code].into(), + RV40_AIC_MODE1_BITS[idx][code]); + x += 1; + } else { + match lctx { + -1 if tctx < 2 => { + if mode == 0 { + bw.write1(); + } else { +assert_eq!(mode, 1); + bw.write0(); + } + }, + 0 | 2 => { + if mode == 0 { + bw.write1(); + } else { +assert_eq!(mode, 2); + bw.write0(); + } + }, + _ => { +assert_eq!(mode, 0); + }, + }; + x += 1; + } + } + blk4_idx += mbstate.blk4_stride; + } + }, + MBType::P16x16 | MBType::P16x16Mix => { + let diff_mv = mbstate.get_diff_mv(sstate, true, 0, 0); + write_mv(bw, diff_mv); + }, + MBType::P16x8 => { + let diff_mv = mbstate.get_diff_mv(sstate, true, 0, 0); + write_mv(bw, diff_mv); + let diff_mv = mbstate.get_diff_mv(sstate, true, 0, 1); + write_mv(bw, diff_mv); + }, + MBType::P8x16 => { + let diff_mv = mbstate.get_diff_mv(sstate, false, 0, 0); + write_mv(bw, diff_mv); + let diff_mv = mbstate.get_diff_mv(sstate, false, 1, 0); + write_mv(bw, diff_mv); + }, + MBType::P8x8 => { + for i in 0..4 { + let diff_mv = mbstate.get_diff_mv(sstate, false, i & 1, i >> 1); + write_mv(bw, diff_mv); + } + }, + MBType::Forward => { + let fwd_diff = mbstate.get_diff_mv_b(sstate, true); + write_mv(bw, fwd_diff); + }, + MBType::Backward => { + let bwd_diff = mbstate.get_diff_mv_b(sstate, false); + write_mv(bw, bwd_diff); + }, + MBType::Bidir => { + let fwd_diff = mbstate.get_diff_mv_b(sstate, true); + let bwd_diff = mbstate.get_diff_mv_b(sstate, false); + write_mv(bw, fwd_diff); + write_mv(bw, bwd_diff); + }, + MBType::Invalid => unreachable!(), + _ => unimplemented!(), + }; +} + +trait CodeWriter { + fn write(&self, bw: &mut BitWriter, code: u16); +} + +impl CodeWriter for RV34CodeReader { + fn write(&self, bw: &mut BitWriter, to_write: u16) { + for (&sym, (&code, &bits)) in self.syms.iter().zip(self.codes.iter().zip(self.lengths.iter())) { + if sym == to_write { + bw.write(code, bits); + return; + } + } +unreachable!(); + } +} + +impl CodeWriter for RV34CBPCodeReader { + fn write(&self, bw: &mut BitWriter, to_write: u16) { + for (&sym, (&code, &bits)) in self.syms.iter().zip(self.codes.iter().zip(self.lengths.iter())) { + if u16::from(sym) == to_write { + bw.write(code, bits); + return; + } + } +unreachable!(); + } +} + +struct CBPSet { + cbp_pattern: RV34CodeReader, + cbp: [RV34CBPCodeReader; 4] +} + +impl CBPSet { + fn new(intra: bool, set: usize, subset: usize) -> Self { + if intra { + let cbp_pat = RV34CodeReader::new(&RV34_INTRA_CBPPAT[set][subset]); + let cbp0 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset]); + let cbp1 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset + 1*2]); + let cbp2 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset + 2*2]); + let cbp3 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset + 3*2]); + CBPSet { cbp_pattern: cbp_pat, cbp: [cbp0, cbp1, cbp2, cbp3] } + } else { + let cbp_pat = RV34CodeReader::new(&RV34_INTER_CBPPAT[set]); + let cbp0 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][0]); + let cbp1 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][1]); + let cbp2 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][2]); + let cbp3 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][3]); + CBPSet { cbp_pattern: cbp_pat, cbp: [cbp0, cbp1, cbp2, cbp3] } + } + } +} + +struct CoefSet { + pat0: Vec, + pat1: Vec, + pat2: Vec, +} + +impl CoefSet { + fn new(intra: bool, set: usize) -> Self { + if intra { + let first0 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][0]); + let first1 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][1]); + let first2 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][2]); + let first3 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][3]); + let firsts = vec![first0, first1, first2, first3]; + + let second0 = RV34CodeReader::new(&RV34_INTRA_SECONDPAT[set][0]); + let second1 = RV34CodeReader::new(&RV34_INTRA_SECONDPAT[set][1]); + let seconds = vec![second0, second1]; + + let third0 = RV34CodeReader::new(&RV34_INTRA_THIRDPAT[set][0]); + let third1 = RV34CodeReader::new(&RV34_INTRA_THIRDPAT[set][1]); + let thirds = vec![third0, third1]; + + CoefSet { pat0: firsts, pat1: seconds, pat2: thirds } + } else { + let first0 = RV34CodeReader::new(&RV34_INTER_FIRSTPAT[set][0]); + let first1 = RV34CodeReader::new(&RV34_INTER_FIRSTPAT[set][1]); + let firsts = vec![first0, first1]; + + let second0 = RV34CodeReader::new(&RV34_INTER_SECONDPAT[set][0]); + let second1 = RV34CodeReader::new(&RV34_INTER_SECONDPAT[set][1]); + let seconds = vec![second0, second1]; + + let third0 = RV34CodeReader::new(&RV34_INTER_THIRDPAT[set][0]); + let third1 = RV34CodeReader::new(&RV34_INTER_THIRDPAT[set][1]); + let thirds = vec![third0, third1]; + + CoefSet { pat0: firsts, pat1: seconds, pat2: thirds } + } + } +} + +struct FullSet { + cbp: Vec, + cset: CoefSet, + coeffs: RV34CodeReader, +} + +impl FullSet { + fn new(intra: bool, set: usize) -> Self { + if intra { + let cbp0 = CBPSet::new(intra, set, 0); + let cbp1 = CBPSet::new(intra, set, 1); + let cbp: Vec = vec![cbp0, cbp1]; + let cset = CoefSet::new(intra, set); + let coeffs = RV34CodeReader::new(&RV34_INTRA_COEFFS[set]); + FullSet { cbp, cset, coeffs } + } else { + let cbp0 = CBPSet::new(intra, set, 0); + let cbp: Vec = vec![cbp0]; + let cset = CoefSet::new(intra, set); + let coeffs = RV34CodeReader::new(&RV34_INTER_COEFFS[set]); + FullSet { cbp, cset, coeffs } + } + } + fn write_block(&self, bw: &mut BitWriter, blk: &Block, subset_idx: usize, luma: bool) { + let sblk0 = [blk.coeffs[0], blk.coeffs[1], blk.coeffs[4], blk.coeffs[5]]; + let sblk1 = [blk.coeffs[2], blk.coeffs[3], blk.coeffs[6], blk.coeffs[7]]; + let sblk2 = [blk.coeffs[8], blk.coeffs[12], blk.coeffs[9], blk.coeffs[13]]; // sub-block 2 has different order + let sblk3 = [blk.coeffs[10], blk.coeffs[11], blk.coeffs[14], blk.coeffs[15]]; + + let idx0 = get_subblock_index(&sblk0); + let idx1 = get_subblock_index(&sblk1); + let idx2 = get_subblock_index(&sblk2); + let idx3 = get_subblock_index(&sblk3); + + let mut cflags = idx0; + cflags = (cflags << 1) | ((idx1 != 0) as u16); + cflags = (cflags << 1) | ((idx2 != 0) as u16); + cflags = (cflags << 1) | ((idx3 != 0) as u16); + + self.cset.pat0[subset_idx].write(bw, cflags); + + if matches!(idx0, 0 | 27 | 54 | 81) { // only first coefficient is set + write_single_coeff(bw, &self.coeffs, sblk0[0], 3); + } else { + write_coeffs(bw, &self.coeffs, &sblk0); + } + if idx1 != 0 { + self.cset.pat1[!luma as usize].write(bw, idx1); + write_coeffs(bw, &self.coeffs, &sblk1); + } + if idx2 != 0 { + self.cset.pat1[!luma as usize].write(bw, idx2); + write_coeffs(bw, &self.coeffs, &sblk2); + } + if idx3 != 0 { + self.cset.pat2[!luma as usize].write(bw, idx3); + write_coeffs(bw, &self.coeffs, &sblk3); + } + } +} + +fn write_coeffs(bw: &mut BitWriter, coeffs: &RV34CodeReader, blk: &[i16; 4]) { + for (&val, &limit) in blk.iter().zip([3i16, 2, 2, 2].iter()) { + write_single_coeff(bw, coeffs, val, limit); + } +} + +fn write_single_coeff(bw: &mut BitWriter, coeffs: &RV34CodeReader, val: i16, limit: i16) { + if val != 0 { + if val.abs() >= limit { + let mut val = (val.abs() - limit) as u16; + if val > 23 { + val -= 22; + let bits = (15 - val.leading_zeros()) as u16; + coeffs.write(bw, bits + 23); + bw.write(u32::from(val - (1 << bits)), bits as u8); + } else { + coeffs.write(bw, val); + } + } + if val > 0 { + bw.write0(); + } else { + bw.write1(); + } + } +} + +pub struct CodeSets { + super_idx: usize, + set_idx: usize, + intra: bool, + is16: bool, + is_p16: bool, + + iset: Vec, + pset: Vec, +} + +impl CodeSets { + pub fn new() -> Self { + let mut iset: Vec = Vec::with_capacity(5); + for set in 0..5 { iset.push(FullSet::new(true, set)); } + let mut pset: Vec = Vec::with_capacity(7); + for set in 0..7 { pset.push(FullSet::new(false, set)); } + + Self { + iset, pset, + super_idx: 0, + set_idx: 0, + intra: false, + is16: false, + is_p16: false, + } + } + pub fn init(&mut self, quant: usize, subset: usize) { + let mut idx = quant as usize; + if (subset == 2) && (idx < 19) { + idx += 10; + } else if (subset != 0) && (idx < 26) { + idx += 5; + } + if idx > 30 { + idx = 30; + } + self.super_idx = idx; + } + pub fn set_params(&mut self, mbtype: &MacroblockType) { + self.is_p16 = matches!(*mbtype, MacroblockType::InterMix(_)); + self.intra = mbtype.is_intra() || self.is_p16; + self.is16 = mbtype.is_16(); + self.set_idx = if self.intra { + RV34_SET_IDX_INTRA[self.super_idx] + } else { + RV34_SET_IDX_INTER[self.super_idx] + }; + } + fn write_cbp(&self, bw: &mut BitWriter, coded_pat: [bool; 24], cbp_code: &CBPSet) { + let mut cbp_pat = 0u16; + for i in 16..20 { + cbp_pat = cbp_pat * 3 + (coded_pat[i] as u16) + (coded_pat[i + 4] as u16); + } + let mut nnz = 0usize; + for blk4 in coded_pat[..16].chunks(4) { + let cur_nz = blk4.contains(&true); + if cur_nz { + nnz += 1; + } + cbp_pat = cbp_pat * 2 + (cur_nz as u16); + } + nnz = nnz.saturating_sub(1); + + cbp_code.cbp_pattern.write(bw, cbp_pat); + for blk4 in coded_pat[..16].chunks(4) { + let pat = (blk4[3] as u16) * 32 + (blk4[2] as u16) * 16 + (blk4[1] as u16) * 2 + (blk4[0] as u16); + if pat != 0 { + cbp_code.cbp[nnz].write(bw, pat); + } + } + for i in 16..20 { + if coded_pat[i] ^ coded_pat[i + 4] { + if coded_pat[i] { + bw.write1(); + } else { + bw.write0(); + } + } + } + } + pub fn write_coeffs(&mut self, bw: &mut BitWriter, coeffs: &[Block; 25]) { + let mut fset = if self.intra { &self.iset[self.set_idx] } else { &self.pset[self.set_idx] }; + + const CODED_ORDER: [usize; 24] = [0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]; + let cbp_code = &fset.cbp[if self.is16 { 1 } else { 0 }]; + let mut coded_blk = [false; 24]; + let mut coded_pat = [false; 24]; + for (i, ((cpat, cblk), &seq)) in coded_pat.iter_mut().zip(coded_blk.iter_mut()) + .zip(CODED_ORDER.iter()).enumerate() { + *cpat = !coeffs[seq].is_empty(); + *cblk = !coeffs[i].is_empty(); + } + self.write_cbp(bw, coded_pat, cbp_code); + + if self.is16 { + fset.write_block(bw, &coeffs[24], 3, true); + } + let (luma_set, chroma_set) = if self.intra { + (if self.is16 { 2 } else { 1 }, if !self.is_p16 { 0 } else { 1 }) + } else { + (0, 1) + }; + let mut citer = coded_blk.iter(); + for blk in coeffs[..16].iter() { + if let Some(true) = citer.next() { + fset.write_block(bw, blk, luma_set, true); + } + } + if self.is_p16 { + self.set_idx = RV34_SET_IDX_INTER[self.super_idx]; + fset = &self.pset[self.set_idx]; + } + for blk in coeffs[16..24].iter() { + if let Some(true) = citer.next() { + fset.write_block(bw, blk, chroma_set, false); + } + } + } +} + +fn get_subblock_index(blk: &[i16; 4]) -> u16 { + let mut idx = blk[0].abs().min(3) as u16; + idx = idx * 3 + (blk[1].abs().min(2) as u16); + idx = idx * 3 + (blk[2].abs().min(2) as u16); + idx = idx * 3 + (blk[3].abs().min(2) as u16); + idx +} diff --git a/nihav-realmedia/src/codecs/rv40enc/dsp/blk.rs b/nihav-realmedia/src/codecs/rv40enc/dsp/blk.rs new file mode 100644 index 0000000..9a4a716 --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/dsp/blk.rs @@ -0,0 +1,167 @@ +use super::super::types::Block; +use super::clip8; + +pub trait BlockOps { + fn from_diff(&mut self, new: &[u8], old: &[u8], stride: usize); + fn add_to(&self, dst: &mut [u8], stride: usize); + fn quant_dcs(&mut self, q_dc: usize, q_ac: usize); + fn quant(&mut self, q_dc: usize, q_ac: usize); + fn dequant_dcs(&mut self, q_dc: usize, q_ac: usize); + fn dequant(&mut self, q_dc: usize, q_ac: usize); + fn transform_4x4(&mut self); + fn transform_dcs(&mut self); + fn itransform_4x4(&mut self); + fn itransform_dcs(&mut self); +} + +macro_rules! tx { + ($a:expr, $b:expr, $c:expr, $d:expr, $o0:expr, $o1:expr, $o2:expr, $o3:expr) => { + let t0 = $a + $d; + let t1 = $a - $d; + let t2 = $b + $c; + let t3 = $b - $c; + $o0 = 13 * (t0 + t2); + $o2 = 13 * (t0 - t2); + $o1 = 17 * t1 + 7 * t3; + $o3 = 7 * t1 - 17 * t3; + } +} + +macro_rules! itx { + ($a:expr, $b:expr, $c:expr, $d:expr, $bias:expr) => { + let t0 = 13 * ($a + $c) + $bias; + let t1 = 13 * ($a - $c) + $bias; + let t2 = 7 * $b - 17 * $d; + let t3 = 17 * $b + 7 * $d; + $a = t0 + t3; + $d = t0 - t3; + $b = t1 + t2; + $c = t1 - t2; + } +} + +impl BlockOps for Block { + fn from_diff(&mut self, new: &[u8], old: &[u8], stride: usize) { + for (dline, (oline, nline)) in self.coeffs.chunks_mut(4).zip(old.chunks(stride).zip(new.chunks(stride))) { + for (dst, (&o, &n)) in dline.iter_mut().zip(oline.iter().zip(nline.iter())) { + *dst = i16::from(n) - i16::from(o); + } + } + } + fn add_to(&self, dst: &mut [u8], stride: usize) { + for (line, row) in dst.chunks_mut(stride).zip(self.coeffs.chunks(4)) { + for (dst, &add) in line.iter_mut().zip(row.iter()) { + *dst = clip8(i16::from(*dst) + add); + } + } + } + fn quant_dcs(&mut self, q_dc: usize, q_ac: usize) { + let q_dc = i32::from(RV34_QUANT_TAB[q_dc]); + let q_ac = i32::from(RV34_QUANT_TAB[q_ac]); + for (i, el) in self.coeffs.iter_mut().enumerate() { + if *el != 0 { + let q = if matches!(i, 0 | 1 | 4) { q_dc } else { q_ac }; + *el = (i32::from(*el) * 16 / q).max(-511).min(511) as i16; + } + } + } + fn quant(&mut self, q_dc: usize, q_ac: usize) { + let q_dc = RV34_QUANT_TAB[q_dc]; + let q_ac = RV34_QUANT_TAB[q_ac]; + if self.coeffs[0] != 0 { + self.coeffs[0] = self.coeffs[0] * 16 / q_dc; + } + for el in self.coeffs.iter_mut().skip(1) { + if *el != 0 { + *el = *el * 16 / q_ac; + } + } + } + fn dequant_dcs(&mut self, q_dc: usize, q_ac: usize) { + let q_dc = i32::from(RV34_QUANT_TAB[q_dc]); + let q_ac = i32::from(RV34_QUANT_TAB[q_ac]); + for (i, el) in self.coeffs.iter_mut().enumerate() { + if *el != 0 { + let q = if matches!(i, 0 | 1 | 4) { q_dc } else { q_ac }; + *el = ((i32::from(*el) * q + 8) >> 4) as i16; + } + } + } + fn dequant(&mut self, q_dc: usize, q_ac: usize) { + let q_ac = i32::from(RV34_QUANT_TAB[q_ac]); + if self.coeffs[0] != 0 { + let q_dc = i32::from(RV34_QUANT_TAB[q_dc]); + self.coeffs[0] = ((i32::from(self.coeffs[0]) * q_dc + 8) >> 4) as i16; + } + for el in self.coeffs.iter_mut().skip(1) { + if *el != 0 { + *el = ((i32::from(*el) * q_ac + 8) >> 4) as i16; + } + } + } + fn transform_4x4(&mut self) { + let mut tmp = [0; 16]; + for (drow, srow) in tmp.chunks_mut(4).zip(self.coeffs.chunks(4)) { + tx!(i32::from(srow[0]), i32::from(srow[1]), i32::from(srow[2]), i32::from(srow[3]), + drow[0], drow[1], drow[2], drow[3]); + } + for i in 0..4 { + tx!(tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12], + tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12]); + } + for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) { + *dst = ((src + 223) / 446) as i16; + } + } + fn transform_dcs(&mut self) { + let mut tmp = [0; 16]; + for (drow, srow) in tmp.chunks_mut(4).zip(self.coeffs.chunks(4)) { + tx!(i32::from(srow[0]), i32::from(srow[1]), i32::from(srow[2]), i32::from(srow[3]), + drow[0], drow[1], drow[2], drow[3]); + } + for i in 0..4 { + tx!(tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12], + tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12]); + } + for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) { + *dst = ((src + 334) / 669) as i16; + } + } + fn itransform_4x4(&mut self) { + let mut tmp: [i32; 16] = [0; 16]; + for (dst, &src) in tmp.iter_mut().zip(self.coeffs.iter()) { + *dst = i32::from(src); + } + for row in tmp.chunks_mut(4) { + itx!(row[0], row[1], row[2], row[3], 0); + } + for i in 0..4 { + itx!(tmp[i], tmp[i + 4], tmp[i + 2 * 4], tmp[i + 3 * 4], 0x200); + } + for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) { + *dst = (src >> 10) as i16; + } + } + fn itransform_dcs(&mut self) { + let mut tmp: [i32; 16] = [0; 16]; + for (dst, &src) in tmp.iter_mut().zip(self.coeffs.iter()) { + *dst = i32::from(src); + } + for row in tmp.chunks_mut(4) { + itx!(row[0], row[1], row[2], row[3], 0); + } + for i in 0..4 { + itx!(tmp[i], tmp[i + 4], tmp[i + 2 * 4], tmp[i + 3 * 4], 0); + } + for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) { + *dst = ((src * 3) >> 11) as i16; + } + } +} + +const RV34_QUANT_TAB: [i16; 32] = [ + 60, 67, 76, 85, 96, 108, 121, 136, + 152, 171, 192, 216, 242, 272, 305, 341, + 383, 432, 481, 544, 606, 683, 767, 854, + 963, 1074, 1212, 1392, 1566, 1708, 1978, 2211 +]; diff --git a/nihav-realmedia/src/codecs/rv40enc/dsp/ipred.rs b/nihav-realmedia/src/codecs/rv40enc/dsp/ipred.rs new file mode 100644 index 0000000..f5df3d2 --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/dsp/ipred.rs @@ -0,0 +1,562 @@ +use super::super::types::{PredType8x8, PredType4x4}; +use super::RefMBData; + +#[derive(Default)] +pub struct IntraPred16x16 { + pub top: [u8; 17], + pub left: [u8; 17], +} + +impl IntraPred16x16 { + pub fn new() -> Self { Self::default() } + #[allow(clippy::many_single_char_names)] + pub fn apply16(&self, mode: PredType8x8, dst: &mut [u8], stride: usize) { + match mode { + PredType8x8::DC => { + let sumt = self.top[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let suml = self.left[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let dc = ((sumt + suml + 16) >> 5) as u8; + for line in dst.chunks_mut(stride).take(16) { + for dst in line[..16].iter_mut() { + *dst = dc; + } + } + }, + PredType8x8::Hor => { + for (&left, line) in self.left[1..].iter().zip(dst.chunks_mut(stride)) { + for dst in line[..16].iter_mut() { + *dst = left; + } + } + }, + PredType8x8::Ver => { + for line in dst.chunks_mut(stride).take(16) { + line[..16].copy_from_slice(&self.top[1..]); + } + }, + PredType8x8::Plane => { + let top0 = &self.top[9..]; + let top1 = &self.top[..8]; + let h = top0.iter().zip(top1.iter().rev()).enumerate().fold( + 0i32, |acc, (k, (&a, &b))| acc + ((k + 1) as i32) * (i32::from(a) - i32::from(b))); + let left0 = &self.left[9..]; + let left1 = &self.left[..8]; + let v = left0.iter().zip(left1.iter().rev()).enumerate().fold( + 0i32, |acc, (k, (&a, &b))| acc + ((k + 1) as i32) * (i32::from(a) - i32::from(b))); + let b = (h + (h >> 2)) >> 4; + let c = (v + (v >> 2)) >> 4; + let mut a = 16 * (i32::from(self.left[16]) + i32::from(self.top[16])) + 16 - 7 * (b + c); + + for line in dst.chunks_mut(stride).take(16) { + let mut oval = a; + for el in line[..16].iter_mut() { + *el = (oval >> 5).max(0).min(255) as u8; + oval += b; + } + a += c; + } + }, + PredType8x8::LeftDC => { + let dc = ((self.left[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 8) >> 4) as u8; + for line in dst.chunks_mut(stride).take(16) { + for dst in line[..16].iter_mut() { + *dst = dc; + } + } + }, + PredType8x8::TopDC => { + let dc = ((self.top[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 8) >> 4) as u8; + for line in dst.chunks_mut(stride).take(16) { + for dst in line[..16].iter_mut() { + *dst = dc; + } + } + }, + PredType8x8::DC128 => { + for line in dst.chunks_mut(stride).take(16) { + for dst in line[..16].iter_mut() { + *dst = 128; + } + } + }, + } + } + pub fn apply8(&self, mode: PredType8x8, dst: &mut [u8], stride: usize) { + match mode { + PredType8x8::DC | PredType8x8::Plane => { + let sumt = self.top[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let suml = self.left[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let dc = ((sumt + suml + 8) >> 4) as u8; + for line in dst.chunks_mut(stride).take(8) { + for dst in line[..8].iter_mut() { + *dst = dc; + } + } + }, + PredType8x8::Hor => { + for (&left, line) in self.left[1..9].iter().zip(dst.chunks_mut(stride)) { + for dst in line[..8].iter_mut() { + *dst = left; + } + } + }, + PredType8x8::Ver => { + for line in dst.chunks_mut(stride).take(8) { + line[..8].copy_from_slice(&self.top[1..9]); + } + }, + PredType8x8::LeftDC => { + let dc = ((self.left[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 4) >> 3) as u8; + for line in dst.chunks_mut(stride).take(8) { + for dst in line[..8].iter_mut() { + *dst = dc; + } + } + }, + PredType8x8::TopDC => { + let dc = ((self.top[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 4) >> 3) as u8; + for line in dst.chunks_mut(stride).take(8) { + for dst in line[..8].iter_mut() { + *dst = dc; + } + } + }, + PredType8x8::DC128 => { + for line in dst.chunks_mut(stride).take(8) { + for dst in line[..8].iter_mut() { + *dst = 128; + } + } + }, + } + } +} + +#[derive(Default)] +pub struct Intra4Pred { + pub top: [u8; 8], + pub left: [u8; 8], + pub tl: u8, +} + +impl Intra4Pred { + pub fn new() -> Self { Self::default() } + fn load_left(&self) -> [u16; 8] { + let mut ret = [0; 8]; + for (dst, &src) in ret.iter_mut().zip(self.left.iter()) { + *dst = u16::from(src); + } + ret + } + fn load_top(&self) -> [u16; 8] { + let mut ret = [0; 8]; + for (dst, &src) in ret.iter_mut().zip(self.top.iter()) { + *dst = u16::from(src); + } + ret + } + fn load_left_and_top(&self) -> ([u16; 5], [u16; 5]) { + let mut left = [0; 5]; + let mut top = [0; 5]; + left[0] = u16::from(self.tl); + top[0] = u16::from(self.tl); + for (dst, &src) in left[1..].iter_mut().zip(self.left.iter()) { + *dst = u16::from(src); + } + for (dst, &src) in top[1..].iter_mut().zip(self.top.iter()) { + *dst = u16::from(src); + } + (left, top) + } + #[allow(clippy::many_single_char_names)] + pub fn apply(&self, ptype: PredType4x4, buf: &mut [u8], stride: usize) { + match ptype { + PredType4x4::DC => { + let dc_l = self.left[..4].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let dc_t = self.top [..4].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let dc = ((dc_t + dc_l + 4) >> 3) as u8; + for line in buf.chunks_mut(stride).take(4) { + for el in line[..4].iter_mut() { + *el = dc; + } + } + }, + PredType4x4::LeftDC => { + let dc_l = self.left[..4].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let dc = ((dc_l + 2) >> 2) as u8; + for line in buf.chunks_mut(stride).take(4) { + for el in line[..4].iter_mut() { + *el = dc; + } + } + }, + PredType4x4::TopDC => { + let dc_t = self.top [..4].iter().fold(0u32, |acc, &x| acc + u32::from(x)); + let dc = ((dc_t + 2) >> 2) as u8; + for line in buf.chunks_mut(stride).take(4) { + for el in line[..4].iter_mut() { + *el = dc; + } + } + }, + PredType4x4::DC128 => { + for line in buf.chunks_mut(stride).take(4) { + for el in line[..4].iter_mut() { + *el = 128; + } + } + }, + PredType4x4::Ver => { + for line in buf.chunks_mut(stride).take(4) { + line[..4].copy_from_slice(&self.top[..4]); + } + }, + PredType4x4::Hor => { + for (&left, line) in self.left[..4].iter().zip(buf.chunks_mut(stride)) { + for dst in line[..4].iter_mut() { + *dst = left; + } + } + }, + PredType4x4::DiagDownLeft => { + let l = self.load_left(); + let t = self.load_top(); + buf[0] = ((t[0] + t[2] + 2*t[1] + 2 + l[0] + l[2] + 2*l[1] + 2) >> 3) as u8; + let pix = ((t[1] + t[3] + 2*t[2] + 2 + l[1] + l[3] + 2*l[2] + 2) >> 3) as u8; + buf[1] = pix; + buf[stride] = pix; + let pix = ((t[2] + t[4] + 2*t[3] + 2 + l[2] + l[4] + 2*l[3] + 2) >> 3) as u8; + buf[2] = pix; + buf[1 + stride] = pix; + buf[2 * stride] = pix; + let pix = ((t[3] + t[5] + 2*t[4] + 2 + l[3] + l[5] + 2*l[4] + 2) >> 3) as u8; + buf[3] = pix; + buf[2 + stride] = pix; + buf[1 + 2 * stride] = pix; + buf[ 3 * stride] = pix; + let pix = ((t[4] + t[6] + 2*t[5] + 2 + l[4] + l[6] + 2*l[5] + 2) >> 3) as u8; + buf[3 + stride] = pix; + buf[2 + 2 * stride] = pix; + buf[1 + 3 * stride] = pix; + let pix = ((t[5] + t[7] + 2*t[6] + 2 + l[5] + l[7] + 2*l[6] + 2) >> 3) as u8; + buf[3 + 2 * stride] = pix; + buf[2 + 3 * stride] = pix; + buf[3 + 3 * stride] = ((t[6] + t[7] + 1 + l[6] + l[7] + 1) >> 2) as u8; + }, + PredType4x4::DiagDownRight => { + let (l, t) = self.load_left_and_top(); + for (j, line) in buf.chunks_mut(stride).take(4).enumerate() { + for i in 0..j { + line[i] = ((l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2) as u8; + } + line[j] = ((l[1] + 2 * l[0] + t[1] + 2) >> 2) as u8; + for i in (j + 1)..4 { + line[i] = ((t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2) as u8; + } + } + }, + PredType4x4::VerRight => { + let (l, t) = self.load_left_and_top(); + for (j, line) in buf.chunks_mut(stride).take(4).enumerate() { + for (i, pix) in line[..4].iter_mut().enumerate() { + let zvr = ((2 * i) as i8) - (j as i8); + *pix = if zvr >= 0 { + if (zvr & 1) == 0 { + (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1 + } else { + (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2 + } + } else { + if zvr == -1 { + (l[1] + 2 * l[0] + t[1] + 2) >> 2 + } else { + (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2 + } + } as u8; + } + } + }, + PredType4x4::HorDown => { + let (l, t) = self.load_left_and_top(); + for (j, line) in buf.chunks_mut(stride).take(4).enumerate() { + for (i, pix) in line[..4].iter_mut().enumerate() { + let zhd = ((2 * j) as i8) - (i as i8); + *pix = if zhd >= 0 { + if (zhd & 1) == 0 { + (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1 + } else { + (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2 + } + } else { + if zhd == -1 { + (l[1] + 2 * l[0] + t[1] + 2) >> 2 + } else { + (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2 + } + } as u8; + } + } + }, + PredType4x4::VerLeft => { + let l = self.load_left(); + let t = self.load_top(); + buf[0] = ((2*t[0] + 2*t[1] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8; + let pix = ((t[1] + t[2] + 1) >> 1) as u8; + buf[1] = pix; + buf[2 * stride] = pix; + let pix = ((t[2] + t[3] + 1) >> 1) as u8; + buf[2] = pix; + buf[1 + 2 * stride] = pix; + let pix = ((t[3] + t[4] + 1) >> 1) as u8; + buf[3] = pix; + buf[2 + 2 * stride] = pix; + buf[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8; + buf[ stride] = ((t[0] + 2*t[1] + t[2] + l[2] + 2*l[3] + l[4] + 4) >> 3) as u8; + let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8; + buf[1 + stride] = pix; + buf[ 3 * stride] = pix; + let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8; + buf[2 + stride] = pix; + buf[1 + 3 * stride] = pix; + let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8; + buf[3 + stride] = pix; + buf[2 + 3 * stride] = pix; + buf[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; + }, + PredType4x4::HorUp => { + let l = self.load_left(); + let t = self.load_top(); + buf[0] = ((t[1] + 2*t[2] + t[3] + 2*l[0] + 2*l[1] + 4) >> 3) as u8; + buf[1] = ((t[2] + 2*t[3] + t[4] + l[0] + 2*l[1] + l[2] + 4) >> 3) as u8; + let pix = ((t[3] + 2*t[4] + t[5] + 2*l[1] + 2*l[2] + 4) >> 3) as u8; + buf[2] = pix; + buf[ stride] = pix; + let pix = ((t[4] + 2*t[5] + t[6] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8; + buf[3] = pix; + buf[1 + stride] = pix; + let pix = ((t[5] + 2*t[6] + t[7] + 2*l[2] + 2*l[3] + 4) >> 3) as u8; + buf[2 + stride] = pix; + buf[0 + 2 * stride] = pix; + let pix = ((t[6] + 3*t[7] + l[2] + 3*l[3] + 4) >> 3) as u8; + buf[3 + stride] = pix; + buf[1 + 2 * stride] = pix; + let pix = ((l[3] + 2*l[4] + l[5] + 2) >> 2) as u8; + buf[3 + 2 * stride] = pix; + buf[1 + 3 * stride] = pix; + let pix = ((t[6] + t[7] + l[3] + l[4] + 2) >> 2) as u8; + buf[0 + 3 * stride] = pix; + buf[2 + 2 * stride] = pix; + buf[2 + 3 * stride] = ((l[4] + l[5] + 1) >> 1) as u8; + buf[3 + 3 * stride] = ((l[4] + 2*l[5] + l[6] + 2) >> 2) as u8; + }, + PredType4x4::DiagDownLeftNoDown => { + let l = self.load_left(); + let t = self.load_top(); + buf[0] = ((t[0] + t[2] + 2*t[1] + 2 + l[0] + l[2] + 2*l[1] + 2) >> 3) as u8; + let pix = ((t[1] + t[3] + 2*t[2] + 2 + l[1] + l[3] + 2*l[2] + 2) >> 3) as u8; + buf[1] = pix; + buf[0 + stride] = pix; + let pix = ((t[2] + t[4] + 2*t[3] + 2 + l[2] + 3*l[3] + 2) >> 3) as u8; + buf[2] = pix; + buf[1 + stride] = pix; + buf[0 + 2 * stride] = pix; + let pix = ((t[3] + t[5] + 2*t[4] + 2 + l[3]*4 + 2) >> 3) as u8; + buf[3] = pix; + buf[2 + stride] = pix; + buf[1 + 2 * stride] = pix; + buf[0 + 3 * stride] = pix; + let pix = ((t[4] + t[6] + 2*t[5] + 2 + l[3]*4 + 2) >> 3) as u8; + buf[3 + stride] = pix; + buf[2 + 2 * stride] = pix; + buf[1 + 3 * stride] = pix; + let pix = ((t[5] + t[7] + 2*t[6] + 2 + l[3]*4 + 2) >> 3) as u8; + buf[3 + 2 * stride] = pix; + buf[2 + 3 * stride] = pix; + buf[3 + 3 * stride] = ((t[6] + t[7] + 1 + 2*l[3] + 1) >> 2) as u8; + }, + PredType4x4::HorUpNoDown => { + let l = self.load_left(); + let t = self.load_top(); + buf[0] = ((t[1] + 2*t[2] + t[3] + 2*l[0] + 2*l[1] + 4) >> 3) as u8; + buf[1] = ((t[2] + 2*t[3] + t[4] + l[0] + 2*l[1] + l[2] + 4) >> 3) as u8; + let pix = ((t[3] + 2*t[4] + t[5] + 2*l[1] + 2*l[2] + 4) >> 3) as u8; + buf[2] = pix; + buf[ stride] = pix; + let pix = ((t[4] + 2*t[5] + t[6] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8; + buf[3] = pix; + buf[1 + stride] = pix; + let pix = ((t[5] + 2*t[6] + t[7] + 2*l[2] + 2*l[3] + 4) >> 3) as u8; + buf[2 + stride] = pix; + buf[ 2 * stride] = pix; + let pix = ((t[6] + 3*t[7] + l[2] + 3*l[3] + 4) >> 3) as u8; + buf[3 + stride] = pix; + buf[1 + 2 * stride] = pix; + buf[3 + 2 * stride] = l[3] as u8; + buf[1 + 3 * stride] = l[3] as u8; + let pix = ((t[6] + t[7] + 2*l[3] + 2) >> 2) as u8; + buf[0 + 3 * stride] = pix; + buf[2 + 2 * stride] = pix; + buf[2 + 3 * stride] = l[3] as u8; + buf[3 + 3 * stride] = l[3] as u8; + }, + PredType4x4::VerLeftNoDown => { + let l = [u16::from(self.left[0]), u16::from(self.left[1]), u16::from(self.left[2]), u16::from(self.left[3]), u16::from(self.left[3])]; + let t = self.load_top(); + buf[0] = ((2*t[0] + 2*t[1] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8; + let pix = ((t[1] + t[2] + 1) >> 1) as u8; + buf[1] = pix; + buf[ 2 * stride] = pix; + let pix = ((t[2] + t[3] + 1) >> 1) as u8; + buf[2] = pix; + buf[1 + 2 * stride] = pix; + let pix = ((t[3] + t[4] + 1) >> 1) as u8; + buf[3] = pix; + buf[2 + 2 * stride] = pix; + buf[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8; + buf[ stride] = ((t[0] + 2*t[1] + t[2] + l[2] + 2*l[3] + l[4] + 4) >> 3) as u8; + let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8; + buf[1 + stride] = pix; + buf[ 3 * stride] = pix; + let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8; + buf[2 + stride] = pix; + buf[1 + 3 * stride] = pix; + let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8; + buf[3 + stride] = pix; + buf[2 + 3 * stride] = pix; + buf[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; + }, + } + } +} + +pub struct BlockIntra4Pred { + pub ipred_y: Intra4Pred, + pub ipred_u: Intra4Pred, + pub ipred_v: Intra4Pred, + pub top_y: [u8; 21], + pub top_u: [u8; 13], + pub top_v: [u8; 13], + pub left_y: [u8; 16], + pub left_u: [u8; 8], + pub left_v: [u8; 8], + pub has_l: bool, +} + +impl BlockIntra4Pred { + pub fn new(src_y: &IntraPred16x16, src_u: &IntraPred16x16, src_v: &IntraPred16x16, tr_y: [u8; 4], tr_u: [u8; 4], tr_v: [u8; 4], has_l: bool) -> Self { + let mut top_y = [0; 21]; + top_y[..17].copy_from_slice(&src_y.top); + top_y[17..].copy_from_slice(&tr_y); + let mut top_u = [0; 13]; + top_u[..9].copy_from_slice(&src_u.top[..9]); + top_u[9..].copy_from_slice(&tr_u); + let mut top_v = [0; 13]; + top_v[..9].copy_from_slice(&src_v.top[..9]); + top_v[9..].copy_from_slice(&tr_v); + let mut left_y = [0; 16]; + left_y.copy_from_slice(&src_y.left[1..]); + let mut left_u = [0; 8]; + left_u.copy_from_slice(&src_u.left[1..9]); + let mut left_v = [0; 8]; + left_v.copy_from_slice(&src_v.left[1..9]); + Self { + ipred_y: Intra4Pred::new(), + ipred_u: Intra4Pred::new(), + ipred_v: Intra4Pred::new(), + top_y, top_u, top_v, left_y, left_u, left_v, + has_l, + } + } + pub fn pred_block(&mut self, dst: &mut RefMBData, x: usize, y: usize, mode: PredType4x4) { + let do_chroma = ((x & 1) == 0) && ((y & 1) == 0); + if x == 0 { + self.ipred_y.tl = if y == 0 { self.top_y[0] } else { self.left_y[y * 4 - 1] }; + if y != 3 { + self.ipred_y.left.copy_from_slice(&self.left_y[y * 4..][..8]); + } else { + self.ipred_y.left[..4].copy_from_slice(&self.left_y[12..]); + } + if y == 0 { + self.ipred_u.tl = self.top_u[0]; + self.ipred_v.tl = self.top_v[0]; + self.ipred_u.left.copy_from_slice(&self.left_u); + self.ipred_v.left.copy_from_slice(&self.left_v); + } else if y == 2 { + self.ipred_u.tl = self.left_u[3]; + self.ipred_v.tl = self.left_v[3]; + self.ipred_u.left[..4].copy_from_slice(&self.left_u[4..]); + self.ipred_v.left[..4].copy_from_slice(&self.left_v[4..]); + } + } + self.ipred_y.top.copy_from_slice(&self.top_y[x * 4 + 1..][..8]); + if do_chroma { + if x == 0 { + self.ipred_u.top.copy_from_slice(&self.top_u[1..9]); + self.ipred_v.top.copy_from_slice(&self.top_v[1..9]); + } else if x == 2 { + self.ipred_u.top.copy_from_slice(&self.top_u[5..]); + self.ipred_v.top.copy_from_slice(&self.top_v[5..]); + } + } + + self.ipred_y.apply(mode, &mut dst.y[x * 4 + y * 4 * 16..], 16); + if do_chroma { + let has_ld = if (x == 0) && (y == 0) { self.has_l } else { false }; + let off = x * 2 + y * 2 * 8; + let cmode = match mode { + PredType4x4::DiagDownLeft if !has_ld => PredType4x4::DiagDownLeftNoDown, + PredType4x4::VerLeft if !has_ld => PredType4x4::VerLeftNoDown, + PredType4x4::HorUp if !has_ld => PredType4x4::HorUpNoDown, + _ => mode, + }; + self.ipred_u.apply(cmode, &mut dst.u[off..], 8); + self.ipred_v.apply(cmode, &mut dst.v[off..], 8); + } + } + pub fn update_from(&mut self, src: &RefMBData, x: usize, y: usize) { + let do_chroma = ((x & 1) == 0) && ((y & 1) == 0); + let y_off = x * 4 + y * 4 * 16; + let c_off = x * 2 + y * 2 * 8; + + if x != 3 { + self.ipred_y.tl = self.ipred_y.top[3]; + for (left, src) in self.ipred_y.left[..4].iter_mut().zip(src.y[y_off + 3..].chunks(16)) { + *left = src[0]; + } + } + if do_chroma && x != 2 { + self.ipred_u.tl = self.ipred_u.top[3]; + self.ipred_v.tl = self.ipred_v.top[3]; + for (left, src) in self.ipred_u.left[..4].iter_mut().zip(src.u[c_off + 3..].chunks(8)) { + *left = src[0]; + } + for (left, src) in self.ipred_v.left[..4].iter_mut().zip(src.v[c_off + 3..].chunks(8)) { + *left = src[0]; + } + } + if x == 0 { + self.top_y[0] = self.left_y[x * 4 + 3]; + if y == 0 { + self.top_u[0] = self.left_u[3]; + self.top_v[0] = self.left_v[3]; + } + } + self.top_y[x * 4 + 1..][..4].copy_from_slice(&src.y[y_off + 3 * 16..][..4]); + if x == 3 { + let (head, tail) = self.top_y.split_at_mut(17); + for el in tail.iter_mut() { + *el = head[16]; + } + } + if do_chroma && y != 2 { + self.top_u[x * 2 + 1..][..4].copy_from_slice(&src.u[c_off + 3 * 8..][..4]); + self.top_v[x * 2 + 1..][..4].copy_from_slice(&src.v[c_off + 3 * 8..][..4]); + if x == 2 { + for i in 9..13 { + self.top_u[i] = self.top_u[8]; + self.top_v[i] = self.top_v[8]; + } + } + } + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/dsp/loopfilt.rs b/nihav-realmedia/src/codecs/rv40enc/dsp/loopfilt.rs new file mode 100644 index 0000000..5fa25ef --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/dsp/loopfilt.rs @@ -0,0 +1,643 @@ +use nihav_core::frame::NASimpleVideoFrame; +use super::super::types::DeblockInfo; +use super::clip8; + +const Y_TOP_ROW_MASK: u32 = 0x000F; +const Y_BOT_ROW_MASK: u32 = 0xF000; +const Y_LEFT_COL_MASK: u32 = 0x1111; +const Y_RIGHT_COL_MASK: u32 = 0x8888; +const C_TOP_ROW_MASK: u8 = 0x3; +const C_BOT_ROW_MASK: u8 = 0xC; +const C_LEFT_COL_MASK: u8 = 0x5; +const C_RIGHT_COL_MASK: u8 = 0xA; + +macro_rules! test_bit { + ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 ) +} + +pub fn loop_filter_frame(dst: &mut NASimpleVideoFrame, dblk: &[DeblockInfo], mb_w: usize, mb_h: usize) { + let small_frame = dst.width[0] * dst.height[0] <= 176 * 144; + + let mut mb_pos = 0; + for mb_y in 0..mb_h { + let is_last_row = mb_y == mb_h - 1; + let mut left_q: usize = 0; + let mut left_cbp_y = 0; + let mut left_cbp_c = 0; + let mut left_dbk_y = 0; + + for mb_x in 0..mb_w { + let q = usize::from(dblk[mb_pos].q); + let alpha = RV40_ALPHA_TAB[q]; + let beta = RV40_BETA_TAB[q]; + let beta_y = if small_frame { beta * 4 } else { beta * 3 }; + let beta_c = beta * 3; + + let is_strong = dblk[mb_pos].is_strong; + let top_is_strong = mb_y > 0 && dblk[mb_pos - mb_w].is_strong; + let left_is_strong = mb_x > 0 && dblk[mb_pos - 1].is_strong; + let bot_is_strong = !is_last_row && dblk[mb_pos + mb_w].is_strong; + + let cur_dbk_y = dblk[mb_pos].deblock_y; + let cur_cbp_y = if is_strong { 0xFFFF } else { u32::from(dblk[mb_pos].cbp_y) }; + + let (top_cbp_y, top_dbk_y) = if mb_y > 0 { + (if top_is_strong { 0xFFFF } else { u32::from(dblk[mb_pos - mb_w].cbp_y) }, dblk[mb_pos - mb_w].deblock_y) + } else { + (0, 0) + }; + let bot_dbk_y = if !is_last_row { + dblk[mb_pos + mb_w].deblock_y + } else { + 0 + }; + + let y_to_deblock = (cur_dbk_y as u32) | ((bot_dbk_y as u32) << 16); + let mut y_h_deblock = y_to_deblock | ((cur_cbp_y << 4) & !Y_TOP_ROW_MASK) | ((top_cbp_y & Y_BOT_ROW_MASK) >> 12); + let mut y_v_deblock = y_to_deblock | ((cur_cbp_y << 1) & !Y_LEFT_COL_MASK) | ((left_cbp_y & Y_RIGHT_COL_MASK) >> 3); + + if mb_x == 0 { + y_v_deblock &= !Y_LEFT_COL_MASK; + } + if mb_y == 0 { + y_h_deblock &= !Y_TOP_ROW_MASK; + } + if is_last_row || is_strong || bot_is_strong { + y_h_deblock &= !(Y_TOP_ROW_MASK << 16); + } + + for y in 0..4 { + let yoff = dst.offset[0] + mb_x * 16 + (mb_y * 16 + y * 4) * dst.stride[0]; + for x in 0..4 { + let bpos = x + y * 4; + let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong); + + let cur_strength: usize; + if is_strong { + cur_strength = 2; + } else if test_bit!(cur_dbk_y, bpos) { + cur_strength = 1; + } else { + cur_strength = 0; + } + + let left_strength: usize; + if x > 0 { + if is_strong { + left_strength = 2; + } else if test_bit!(cur_dbk_y, bpos - 1) { + left_strength = 1; + } else { + left_strength = 0; + } + } else if mb_x > 0 { + if left_is_strong { + left_strength = 2; + } else if test_bit!(left_dbk_y, bpos + 3) { + left_strength = 1; + } else { + left_strength = 0; + } + } else { + left_strength = 0; + } + + let bot_strength: usize; + if y < 3 { + if is_strong { + bot_strength = 2; + } else if test_bit!(cur_dbk_y, bpos + 4) { + bot_strength = 1; + } else { + bot_strength = 0; + } + } else if !is_last_row { + if dblk[mb_pos + mb_w].is_strong { + bot_strength = 2; + } else if test_bit!(bot_dbk_y, x) { + bot_strength = 1; + } else { + bot_strength = 0; + } + } else { + bot_strength = 0; + } + + let top_strength: usize; + if y > 0 { + if is_strong { + top_strength = 2; + } else if test_bit!(cur_dbk_y, bpos - 4) { + top_strength = 1; + } else { + top_strength = 0; + } + } else if mb_y > 0 { + if top_is_strong { + top_strength = 2; + } else if test_bit!(top_dbk_y, bpos + 12) { + top_strength = 1; + } else { + top_strength = 0; + } + } else { + top_strength = 0; + } + + let l_q = if x > 0 { q } else { left_q }; + let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 }; + + let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q]; + let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q]; + let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q]; + let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q]; + + let dmode = if y > 0 { x + y * 4 } else { x * 4 }; + + if test_bit!(y_h_deblock, bpos + 4) { + rv40_loop_filter4_h(dst.data, yoff + 4 * dst.stride[0] + x * 4, dst.stride[0], + dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false); + } + if test_bit!(y_v_deblock, bpos) && !ver_strong { + rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0], + dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false); + } + if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) { + rv40_loop_filter4_h(dst.data, yoff + x * 4, dst.stride[0], + dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true); + } + if test_bit!(y_v_deblock, bpos) && ver_strong { + rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0], + dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true); + } + } + } + + let cur_cbp_c = dblk[mb_pos].cbp_c; + let top_cbp_c = if mb_y > 0 { + if top_is_strong { 0xFF } else { dblk[mb_pos - mb_w].cbp_c } + } else { + 0 + }; + let bot_cbp_c = if !is_last_row { + dblk[mb_pos + mb_w].cbp_c + } else { + 0 + }; + for comp in 1..3 { + let cshift = (comp - 1) * 4; + let c_cur_cbp = (cur_cbp_c >> cshift) & 0xF; + let c_top_cbp = (top_cbp_c >> cshift) & 0xF; + let c_left_cbp = (left_cbp_c >> cshift) & 0xF; + let c_bot_cbp = (bot_cbp_c >> cshift) & 0xF; + + let c_deblock = c_cur_cbp | (c_bot_cbp << 4); + let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1); + let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2); + if mb_x == 0 { + c_v_deblock &= !C_LEFT_COL_MASK; + } + if mb_y == 0 { + c_h_deblock &= !C_TOP_ROW_MASK; + } + if is_last_row || is_strong || bot_is_strong { + c_h_deblock &= !(C_TOP_ROW_MASK << 4); + } + + for y in 0..2 { + let coff = dst.offset[comp] + mb_x * 8 + (mb_y * 8 + y * 4) * dst.stride[comp]; + for x in 0..2 { + let bpos = x + y * 2; + + let ver_strong = (x == 0) && (is_strong || left_is_strong); + + let cur_strength: usize; + if is_strong { + cur_strength = 2; + } else if test_bit!(c_cur_cbp, bpos) { + cur_strength = 1; + } else { + cur_strength = 0; + } + + let left_strength: usize; + if x > 0 { + if is_strong { + left_strength = 2; + } else if test_bit!(c_cur_cbp, bpos - 1) { + left_strength = 1; + } else { + left_strength = 0; + } + } else if mb_x > 0 { + if left_is_strong { + left_strength = 2; + } else if test_bit!(c_left_cbp, bpos + 1) { + left_strength = 1; + } else { + left_strength = 0; + } + } else { + left_strength = 0; + } + + let bot_strength: usize; + if y != 3 { + if is_strong { + bot_strength = 2; + } else if test_bit!(c_cur_cbp, bpos + 2) { + bot_strength = 1; + } else { + bot_strength = 0; + } + } else if !is_last_row { + if dblk[mb_pos + mb_w].is_strong { + bot_strength = 2; + } else if test_bit!(c_bot_cbp, x) { + bot_strength = 1; + } else { + bot_strength = 0; + } + } else { + bot_strength = 0; + } + + let top_strength: usize; + if y > 0 { + if is_strong { + top_strength = 2; + } else if test_bit!(c_cur_cbp, bpos - 2) { + top_strength = 1; + } else { + top_strength = 0; + } + } else if mb_y > 0 { + if top_is_strong { + top_strength = 2; + } else if test_bit!(c_top_cbp, bpos + 2) { + top_strength = 1; + } else { + top_strength = 0; + } + } else { + top_strength = 0; + } + + let l_q = if x > 0 { q } else { left_q }; + let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 }; + + let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q]; + let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q]; + let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q]; + let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q]; + + if test_bit!(c_h_deblock, bpos + 2) { + rv40_loop_filter4_h(dst.data, coff + 4 * dst.stride[comp] + x * 4, dst.stride[comp], + x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false); + } + if test_bit!(c_v_deblock, bpos) && !ver_strong { + rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp], + y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false); + } + if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) { + rv40_loop_filter4_h(dst.data, coff + x * 4, dst.stride[comp], + x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true); + } + if test_bit!(c_v_deblock, bpos) && ver_strong { + rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp], + y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true); + } + } + } + } + + left_q = q; + left_dbk_y = cur_dbk_y; + left_cbp_y = cur_cbp_y; + left_cbp_c = cur_cbp_c; + + mb_pos += 1; + } + } +} + +macro_rules! el { + ($src: ident, $o: expr) => ($src[$o] as i16); +} + +fn clip_symm(a: i16, lim: i16) -> i16 { + if a < -lim { + -lim + } else if a > lim { + lim + } else { + a + } +} + +fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize, + filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16, + lim_p0q0: i16, lim_p1: i16, lim_q1: i16) { + for _ in 0..4 { + let p0 = el!(pix, off - step); + let q0 = el!(pix, off); + + let t = q0 - p0; + if t == 0 { + off += stride; + continue; + } + + let u = (alpha * t.wrapping_abs()) >> 7; + if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) { + off += stride; + continue; + } + + let p2 = el!(pix, off - 3*step); + let p1 = el!(pix, off - 2*step); + let q1 = el!(pix, off + step); + let q2 = el!(pix, off + 2*step); + + let strength; + if filter_p1 && filter_q1 { + strength = (t << 2) + (p1 - q1); + } else { + strength = t << 2; + } + + let diff = clip_symm((strength + 4) >> 3, lim_p0q0); + pix[off - step] = clip8(p0 + diff); + pix[off ] = clip8(q0 - diff); + + if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) { + let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1; + pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1)); + } + + if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) { + let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1; + pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1)); + } + + off += stride; + } +} + +fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize, + filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16, + lim_p0q0: i16, lim_p1: i16, lim_q1: i16) { + rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1); +} +#[allow(clippy::eq_op)] +fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize, + filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16, + lim_p0q0: i16, lim_p1: i16, lim_q1: i16) { + let src = &mut pix[off - 3..][..stride * 3 + 3 + 3]; + for ch in src.chunks_mut(stride).take(4) { + assert!(ch.len() >= 3 + 3); + let p0 = el!(ch, 3 - 1); + let q0 = el!(ch, 3); + + let t = q0 - p0; + if t == 0 { + continue; + } + + let u = (alpha * t.wrapping_abs()) >> 7; + if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) { + continue; + } + + let p2 = el!(ch, 3 - 3); + let p1 = el!(ch, 3 - 2); + let q1 = el!(ch, 3 + 1); + let q2 = el!(ch, 3 + 2); + + let strength; + if filter_p1 && filter_q1 { + strength = (t << 2) + (p1 - q1); + } else { + strength = t << 2; + } + + let diff = clip_symm((strength + 4) >> 3, lim_p0q0); + ch[3 - 1] = clip8(p0 + diff); + ch[3 ] = clip8(q0 - diff); + + if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) { + let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1; + ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1)); + } + + if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) { + let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1; + ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1)); + } + } +} + +#[allow(clippy::many_single_char_names)] +fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 { + let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7; + if clip { + if val < c - lims { + c - lims + } else if val > c + lims { + c + lims + } else { + val + } + } else { + val + } +} + +fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize, + alpha: i16, lims: i16, dmode: usize, chroma: bool) { + for i in 0..4 { + let p0 = el!(pix, off - step); + let q0 = el!(pix, off); + + let t = q0 - p0; + if t == 0 { + off += stride; + continue; + } + + let fmode = (alpha * t.wrapping_abs()) >> 7; + if fmode > 1 { + off += stride; + continue; + } + + let p3 = el!(pix, off - 4*step); + let p2 = el!(pix, off - 3*step); + let p1 = el!(pix, off - 2*step); + let q1 = el!(pix, off + step); + let q2 = el!(pix, off + 2*step); + let q3 = el!(pix, off + 3*step); + + let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims); + let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims); + + let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims); + let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims); + + pix[off - 2*step] = np1 as u8; + pix[off - step] = np0 as u8; + pix[off] = nq0 as u8; + pix[off + step] = nq1 as u8; + + if !chroma { + let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0); + let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0); + pix[off - 3*step] = np2 as u8; + pix[off + 2*step] = nq2 as u8; + } + + off += stride; + } +} + +fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize, + beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) { + let mut sum_p1p0 = 0; + let mut sum_q1q0 = 0; + + let mut off1 = off; + for _ in 0..4 { + sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step); + sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1); + off1 += stride; + } + + let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4; + let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4; + + if (!filter_p1 || !filter_q1) || !edge { + return (false, filter_p1, filter_q1); + } + + let mut sum_p1p2 = 0; + let mut sum_q1q2 = 0; + + let mut off1 = off; + for _ in 0..4 { + sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step); + sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step); + off1 += stride; + } + + let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2); + + (strong, filter_p1, filter_q1) +} + +fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize, + beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) { + rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge) +} + +#[allow(clippy::eq_op)] +fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize, + beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) { + let src = &pix[off - 3..][..stride * 3 + 3 + 3]; + let mut sum_p1p0 = 0; + let mut sum_q1q0 = 0; + + for ch in src.chunks(stride).take(4) { + assert!(ch.len() >= 3 + 3); + sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1); + sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3); + } + + let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4; + let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4; + + if (!filter_p1 || !filter_q1) || !edge { + return (false, filter_p1, filter_q1); + } + + let mut sum_p1p2 = 0; + let mut sum_q1q2 = 0; + + for ch in src.chunks(stride).take(4) { + assert!(ch.len() >= 3 + 3); + sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3); + sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2); + } + + let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2); + + (strong, filter_p1, filter_q1) +} + +fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize, + dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16, + chroma: bool, edge: bool) { + let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge); + let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1; + + if strong { + rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma); + } else if filter_p1 && filter_q1 { + rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta, + lims, lim_p1, lim_q1); + } else if filter_p1 || filter_q1 { + rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta, + lims >> 1, lim_p1 >> 1, lim_q1 >> 1); + } +} + +fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize, + dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16, + chroma: bool, edge: bool) { + let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge); + let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1; + + if strong { + rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma); + } else if filter_p1 && filter_q1 { + rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta, + lims, lim_p1, lim_q1); + } else if filter_p1 || filter_q1 { + rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta, + lims >> 1, lim_p1 >> 1, lim_q1 >> 1); + } +} + +const RV40_DITHER_L: [i16; 16] = [ + 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30, + 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40 +]; +const RV40_DITHER_R: [i16; 16] = [ + 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40, + 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40 +]; + +const RV40_ALPHA_TAB: [i16; 32] = [ + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 122, 96, 75, 59, 47, 37, + 29, 23, 18, 15, 13, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1 +]; + +const RV40_BETA_TAB: [i16; 32] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6, + 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17 +]; + +const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [ + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ], [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5 + ], [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9 + ] +]; + diff --git a/nihav-realmedia/src/codecs/rv40enc/dsp/mc.rs b/nihav-realmedia/src/codecs/rv40enc/dsp/mc.rs new file mode 100644 index 0000000..b7668de --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/dsp/mc.rs @@ -0,0 +1,294 @@ +use nihav_core::frame::NAVideoBuffer; +use nihav_codec_support::codecs::MV; +use nihav_codec_support::codecs::blockdsp::edge_emu; +use super::clip8; + +pub fn luma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer, xpos: usize, ypos: usize, mv: MV, is16: bool) { + const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ]; + const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ]; + let dx = mv.x >> 2; + let cx = (mv.x & 3) as usize; + let dy = mv.y >> 2; + let cy = (mv.y & 3) as usize; + let mode = cx + cy * 4; + + let (w_, h_) = pic.get_dimensions(0); + let w = (w_ + 15) & !15; + let h = (h_ + 15) & !15; + let (bsize, mc_func) = if is16 { (16, LUMA_MC_16[mode]) } else { (8, LUMA_MC_8[mode]) }; + + if check_pos(xpos, ypos, bsize, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) { + let sstride = pic.get_stride(0); + let mut soffset = pic.get_offset(0) + xpos + ypos * sstride; + let data = pic.get_data(); + let src: &[u8] = data.as_slice(); + soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; + (mc_func)(dst, dstride, src, soffset, sstride); + } else { + let mut ebuf = [0u8; 32 * 22]; + edge_emu(pic, (xpos as isize) + (dx as isize) - 2, (ypos as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4); + (mc_func)(dst, dstride, &ebuf, 32 * 2 + 2, 32); + } +} + +pub fn chroma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer, xpos: usize, ypos: usize, comp: usize, mv: MV, is16: bool) { + let mvx = mv.x / 2; + let mvy = mv.y / 2; + let dx = mvx >> 2; + let mut cx = (mvx & 3) as usize; + let dy = mvy >> 2; + let mut cy = (mvy & 3) as usize; + + if (cx == 3) && (cy == 3) { + cx = 2; + cy = 2; + } + + let (w_, h_) = pic.get_dimensions(0); + let w = ((w_ + 15) & !15) >> 1; + let h = ((h_ + 15) & !15) >> 1; + let bsize = if is16 { 8 } else { 4 }; + + if check_pos(xpos, ypos, bsize, w, h, dx, dy, 0, 1, 0, 1) { + let sstride = pic.get_stride(comp); + let mut soffset = pic.get_offset(comp) + xpos + ypos * sstride; + let data = pic.get_data(); + let src: &[u8] = data.as_slice(); + soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; + rv40_chroma_mc(dst, dstride, src, soffset, sstride, bsize, cx, cy); + } else { + let mut ebuf = [0u8; 16 * 10]; + edge_emu(pic, (xpos as isize) + (dx as isize), (ypos as isize) + (dy as isize), bsize + 1, bsize + 1, &mut ebuf, 16, comp, 4); + rv40_chroma_mc(dst, dstride, &ebuf, 0, 16, bsize, cx, cy); + } +} + +fn check_pos(x: usize, y: usize, size: usize, width: usize, height: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool { + let xn = (x as isize) + (dx as isize); + let yn = (y as isize) + (dy as isize); + + (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (width as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (height as isize)) +} + +type MCFunc = fn (&mut [u8], usize, &[u8], usize, usize); + +macro_rules! el { + ($s: ident, $o: expr) => ( $s[$o] as i16 ) +} + +macro_rules! filter { + (01; $s: ident, $o: expr, $step: expr) => ( + clip8((( el!($s, $o - 2 * $step) + -5 * el!($s, $o - 1 * $step) + +52 * el!($s, $o - 0 * $step) + +20 * el!($s, $o + 1 * $step) + -5 * el!($s, $o + 2 * $step) + + el!($s, $o + 3 * $step) + 32) >> 6) as i16) + ); + (02; $s: ident, $o: expr, $step: expr) => ( + clip8((( el!($s, $o - 2 * $step) + -5 * el!($s, $o - 1 * $step) + +20 * el!($s, $o - 0 * $step) + +20 * el!($s, $o + 1 * $step) + -5 * el!($s, $o + 2 * $step) + + el!($s, $o + 3 * $step) + 16) >> 5) as i16) + ); + (03; $s: ident, $o: expr, $step: expr) => ( + clip8((( el!($s, $o - 2 * $step) + -5 * el!($s, $o - 1 * $step) + +20 * el!($s, $o - 0 * $step) + +52 * el!($s, $o + 1 * $step) + -5 * el!($s, $o + 2 * $step) + + el!($s, $o + 3 * $step) + 32) >> 6) as i16) + ); + (33; $s: ident, $o: expr, $stride: expr) => ( + clip8((( el!($s, $o) + + el!($s, $o + 1) + + el!($s, $o + $stride) + + el!($s, $o + 1 + $stride) + 2) >> 2) as i16) + ); +} + +macro_rules! mc_func { + (copy; $name: ident, $size: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], sidx: usize, sstride: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take($size) { + dline[..$size].copy_from_slice(&sline[..$size]); + } + } + ); + (mc01; $name: ident, $size: expr, $ver: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let step = if $ver { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(01; src, sidx + x, step); + } + sidx += sstride; + } + } + ); + (mc02; $name: ident, $size: expr, $ver: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let step = if $ver { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(02; src, sidx + x, step); + } + sidx += sstride; + } + } + ); + (mc03; $name: ident, $size: expr, $ver: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let step = if $ver { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(03; src, sidx + x, step); + } + sidx += sstride; + } + } + ); + (cm01; $name: ident, $size: expr, $ofilt: ident) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; + let mut bidx = 0; + let bstride = $size; + sidx -= sstride * 2; + for _ in 0..$size+5 { + for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); } + bidx += bstride; + sidx += sstride; + } + $ofilt(dst, dstride, &buf, 2*bstride, $size); + } + ); + (cm02; $name: ident, $size: expr, $ofilt: ident) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; + let mut bidx = 0; + let bstride = $size; + sidx -= sstride * 2; + for _ in 0..$size+5 { + for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); } + bidx += bstride; + sidx += sstride; + } + $ofilt(dst, dstride, &buf, 2*bstride, $size); + } + ); + (cm03; $name: ident, $size: expr, $ofilt: ident) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; + let mut bidx = 0; + let bstride = $size; + sidx -= sstride * 2; + for _ in 0..$size+5 { + for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); } + bidx += bstride; + sidx += sstride; + } + $ofilt(dst, dstride, &buf, 2*bstride, $size); + } + ); + (mc33; $name: ident, $size: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(33; src, sidx + x, sstride); + } + sidx += sstride; + } + } + ); +} +mc_func!(copy; copy_16, 16); +mc_func!(copy; copy_8, 8); +mc_func!(mc01; luma_mc_10_16, 16, false); +mc_func!(mc01; luma_mc_10_8, 8, false); +mc_func!(mc02; luma_mc_20_16, 16, false); +mc_func!(mc02; luma_mc_20_8, 8, false); +mc_func!(mc03; luma_mc_30_16, 16, false); +mc_func!(mc03; luma_mc_30_8, 8, false); +mc_func!(mc01; luma_mc_01_16, 16, true); +mc_func!(mc01; luma_mc_01_8, 8, true); +mc_func!(mc02; luma_mc_02_16, 16, true); +mc_func!(mc02; luma_mc_02_8, 8, true); +mc_func!(mc03; luma_mc_03_16, 16, true); +mc_func!(mc03; luma_mc_03_8, 8, true); +mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16); +mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8); +mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16); +mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8); +mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16); +mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8); +mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16); +mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8); +mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16); +mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8); +mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16); +mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8); +mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16); +mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8); +mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16); +mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8); +mc_func!(mc33; luma_mc_33_16, 16); +mc_func!(mc33; luma_mc_33_8, 8); + +const LUMA_MC_16: [MCFunc; 16] = [ + copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16, + luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16, + luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16, + luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 +]; +const LUMA_MC_8: [MCFunc; 16] = [ + copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8, + luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8, + luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8, + luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 +]; + +#[allow(clippy::many_single_char_names)] +fn rv40_chroma_mc(dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) { + const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [ + [ 0, 4, 8, 4 ], + [ 8, 7, 8, 7 ], + [ 0, 8, 4, 8 ], + [ 8, 7, 8, 7 ] + ]; + + if (x == 0) && (y == 0) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take(size) { + dline[..size].copy_from_slice(&sline[..size]); + } + return; + } + let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1]; + if (x > 0) && (y > 0) { + let a = ((4 - x) * (4 - y)) as u16; + let b = (( x) * (4 - y)) as u16; + let c = ((4 - x) * ( y)) as u16; + let d = (( x) * ( y)) as u16; + for dline in dst.chunks_mut(dstride).take(size) { + for (x, el) in dline[..size].iter_mut().enumerate() { + *el = ((a * (src[sidx + x] as u16) + + b * (src[sidx + x + 1] as u16) + + c * (src[sidx + x + sstride] as u16) + + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8; + } + sidx += sstride; + } + } else { + let a = ((4 - x) * (4 - y)) as u16; + let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16; + let step = if y > 0 { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take(size) { + for (x, el) in dline[..size].iter_mut().enumerate() { + *el = ((a * (src[sidx + x] as u16) + + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8; + } + sidx += sstride; + } + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/dsp/mod.rs b/nihav-realmedia/src/codecs/rv40enc/dsp/mod.rs new file mode 100644 index 0000000..bcb7e25 --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/dsp/mod.rs @@ -0,0 +1,110 @@ +use super::types::Block; + +mod blk; +pub use blk::*; +mod ipred; +pub use ipred::*; +mod loopfilt; +pub use loopfilt::*; +mod mc; +pub use mc::*; + +fn clip8(a: i16) -> u8 { + if a < 0 { 0 } + else if a > 255 { 255 } + else { a as u8 } +} + +pub struct RefMBData { + pub y: [u8; 16 * 16], + pub u: [u8; 8 * 8], + pub v: [u8; 8 * 8], +} + +impl RefMBData { + pub fn new() -> Self { + Self { + y: [0; 16 * 16], + u: [0; 8 * 8], + v: [0; 8 * 8], + } + } + pub fn copy_from(&mut self, other: &Self) { + self.y.copy_from_slice(&other.y); + self.u.copy_from_slice(&other.u); + self.v.copy_from_slice(&other.v); + } + pub fn calc_coeffs(&self, new: &Self, coeffs: &mut [Block; 25], q_dc: usize, q_ac: usize, is16: bool) { + let (blocks, dcs) = coeffs.split_at_mut(24); + let mut dblocks = blocks.iter_mut(); + let dcs = &mut dcs[0]; + for (y, (dstripe, sstripe)) in self.y.chunks(16 * 4).zip(new.y.chunks(16 * 4)).enumerate() { + for x in (0..16).step_by(4) { + let dst = dblocks.next().unwrap(); + Self::diff_blk(&sstripe[x..], &dstripe[x..], 16, dst); + dst.transform_4x4(); + if is16 { + dcs.coeffs[x / 4 + y * 4] = dst.coeffs[0]; + dst.coeffs[0] = 0; + } + dst.quant(q_ac, q_ac); + } + } + let (cq_dc, cq_ac) = chroma_quants(q_ac); + for (dstripe, sstripe) in self.u.chunks(8 * 4).zip(new.u.chunks(8 * 4)) { + for x in (0..8).step_by(4) { + let dst = dblocks.next().unwrap(); + Self::diff_blk(&sstripe[x..], &dstripe[x..], 8, dst); + dst.transform_4x4(); + dst.quant(cq_dc, cq_ac); + } + } + for (dstripe, sstripe) in self.v.chunks(8 * 4).zip(new.v.chunks(8 * 4)) { + for x in (0..8).step_by(4) { + let dst = dblocks.next().unwrap(); + Self::diff_blk(&sstripe[x..], &dstripe[x..], 8, dst); + dst.transform_4x4(); + dst.quant(cq_dc, cq_ac); + } + } + if is16 { + coeffs[24].transform_dcs(); + coeffs[24].quant_dcs(q_dc, q_ac); + } + } + fn diff_blk(src: &[u8], new: &[u8], stride: usize, dst: &mut Block) { + for (drow, (sline, nline)) in dst.coeffs.chunks_mut(4).zip(src.chunks(stride).zip(new.chunks(stride))) { + for (dst, (&a, &b)) in drow.iter_mut().zip(sline.iter().zip(nline.iter())) { + *dst = i16::from(a) - i16::from(b); + } + } + } + pub fn avg(&mut self, ref1: &Self, weight1: u32, ref2: &Self, weight2: u32) { + for (dst, (&src1, &src2)) in self.y.iter_mut().zip(ref1.y.iter().zip(ref2.y.iter())) { + *dst = weight(src1, weight1, src2, weight2); + } + for (dst, (&src1, &src2)) in self.u.iter_mut().zip(ref1.u.iter().zip(ref2.u.iter())) { + *dst = weight(src1, weight1, src2, weight2); + } + for (dst, (&src1, &src2)) in self.v.iter_mut().zip(ref1.v.iter().zip(ref2.v.iter())) { + *dst = weight(src1, weight1, src2, weight2); + } + } +} + +fn weight(pix1: u8, weight1: u32, pix2: u8, weight2: u32) -> u8 { + ((((u32::from(pix1) * weight1) >> 9) + ((u32::from(pix2) * weight2) >> 9) + 0x10) >> 5) as u8 +} + +pub fn chroma_quants(q: usize) -> (usize, usize) { + (RV34_CHROMA_QUANT_DC[q].into(), RV34_CHROMA_QUANT_AC[q].into()) +} + +const RV34_CHROMA_QUANT_DC: [u8; 32] = [ + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 15, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23 +]; +const RV34_CHROMA_QUANT_AC: [u8; 32] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 23, 24, 24, 25, 25 +]; diff --git a/nihav-realmedia/src/codecs/rv40enc/estimator.rs b/nihav-realmedia/src/codecs/rv40enc/estimator.rs new file mode 100644 index 0000000..d366ea9 --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/estimator.rs @@ -0,0 +1,86 @@ +use nihav_core::frame::FrameType; +use nihav_codec_support::codecs::MV; +use super::types::*; +use super::super::rv40data::*; + +pub struct BitsEstimator { + ftype: FrameType, + pred_mbt: MBType, + cur_mbt: MBType, +} + +impl BitsEstimator { + pub fn new() -> Self { + Self { + ftype: FrameType::I, + pred_mbt: MBType::Invalid, + cur_mbt: MBType::Invalid, + } + } + pub fn set_frame_type(&mut self, ftype: FrameType) { + self.ftype = ftype; + } + pub fn set_quant(&mut self, _q: usize) { + } + pub fn set_pred_mb_type(&mut self, most_prob_type: MBType) { + self.pred_mbt = most_prob_type; + } + pub fn set_mb_type(&mut self, mbt: MBType) { + self.cur_mbt = mbt; + } + pub fn estimate_mb_hdr(&self, mvs: &[MV]) -> u32 { + if self.ftype == FrameType::I { + return 1; + } + let hdr_cw_bits = if self.ftype == FrameType::P { + RV40_PTYPE_BITS[self.pred_mbt.to_code()][self.cur_mbt.to_code()] + } else { + RV40_BTYPE_BITS[self.pred_mbt.to_code()][self.cur_mbt.to_code()] + }; + let mv_bits = mvs.iter().fold(0u32, |acc, &mv| acc + Self::mv_cost(mv)); + u32::from(hdr_cw_bits) + mv_bits + } + fn block_no_to_type(&self, blk_no: usize) -> usize { + match blk_no { + 0..=15 => { + match self.cur_mbt { + MBType::Intra16 | MBType::P16x16Mix => 2, + MBType::Intra => 1, + _ => 0, + } + }, + 24 => 3, + _ if self.cur_mbt.is_intra() => 4, + _ => 5, + } + } + pub fn block_bits(&self, blk: &Block, blk_no: usize) -> u32 { + let btype = self.block_no_to_type(blk_no); + + const EXPECTED_BLOCK_BITS: [[u8; 17]; 6] = [ + [ 0, 7, 12, 17, 22, 26, 31, 35, 39, 45, 51, 56, 61, 66, 85, 103, 117], + [ 0, 7, 13, 19, 26, 30, 36, 43, 49, 57, 65, 74, 87, 99, 115, 131, 147], + [ 0, 7, 14, 20, 25, 30, 35, 40, 45, 50, 56, 62, 69, 76, 84, 93, 113], + [ 2, 9, 13, 20, 25, 29, 33, 38, 43, 48, 54, 62, 71, 82, 98, 116, 141], + [ 0, 5, 12, 18, 24, 30, 35, 42, 48, 53, 62, 69, 78, 87, 97, 106, 121], + [ 0, 6, 12, 17, 22, 27, 33, 40, 47, 53, 60, 66, 73, 80, 85, 85, 103] + ]; + EXPECTED_BLOCK_BITS[btype][blk.count_nz()].into() + } + pub fn mv_cost(mv: MV) -> u32 { + let xval = mv.x.abs() * 2 + 1; + let yval = mv.y.abs() * 2 + 1; + (15 - xval.leading_zeros()) * 2 + (15 - yval.leading_zeros()) * 2 + 2 + } + pub fn decide_set(hist: &[usize; 17]) -> usize { + let max_val = hist[16]; + let threshold = max_val - max_val / 4; + if hist[3] > threshold { + 2 + } else if hist[6] > threshold { + 1 + } else { + 0 + } + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/mb_coding.rs b/nihav-realmedia/src/codecs/rv40enc/mb_coding.rs new file mode 100644 index 0000000..8a4ebdc --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/mb_coding.rs @@ -0,0 +1,765 @@ +use nihav_codec_support::codecs::ZERO_MV; + +use super::super::rv40data::*; + +use super::*; +use super::dsp::*; +use super::motion_est::MotionEstimator; + +const PRED_TYPES8: [PredType8x8; 4] = [ + PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane +]; + +fn calc_dist(src1: &[u8], stride1: usize, src2: &[u8], stride2: usize, width: usize, height: usize) -> u32 { + let mut sum = 0u32; + for (line1, line2) in src1.chunks(stride1).zip(src2.chunks(stride2)).take(height) { + sum += line1[..width].iter().zip(line2.iter()).fold(0u32, + |acc, (&a, &b)| { let diff = u32::from(a.max(b)) - u32::from(a.min(b)); acc + diff * diff }); + } + sum +} + +struct SingleMacroblock { + cand_blk: RefMBData, + pred_blk: RefMBData, + ref_blk: RefMBData, + + wblk1: RefMBData, + wblk2: RefMBData, + + tmpc: [Block; 25], + + ratio1: u32, + ratio2: u32, + + tmp_tx: [Block; 25], +} + +impl SingleMacroblock { + fn new() -> Self { + Self { + cand_blk: RefMBData::new(), + pred_blk: RefMBData::new(), + ref_blk: RefMBData::new(), + wblk1: RefMBData::new(), + wblk2: RefMBData::new(), + tmpc: [Block::new(); 25], + ratio1: 0, + ratio2: 0, + tmp_tx: [Block::new(); 25], + } + } + fn load(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3]) { + for (dst, src) in self.ref_blk.y.chunks_mut(16).zip(src[offsets[0]..].chunks(strides[0])) { + dst.copy_from_slice(&src[..16]); + } + for (dst, src) in self.ref_blk.u.chunks_mut(8).zip(src[offsets[1]..].chunks(strides[1])) { + dst.copy_from_slice(&src[..8]); + } + for (dst, src) in self.ref_blk.v.chunks_mut(8).zip(src[offsets[2]..].chunks(strides[2])) { + dst.copy_from_slice(&src[..8]); + } + } + fn recon_pred_part(&mut self, mbt: MacroblockType, ref_p: &NAVideoBuffer, ref_n: &NAVideoBuffer, mb_x: usize, mb_y: usize) { + let (xpos, ypos) = (mb_x * 16, mb_y * 16); + + match mbt { + MacroblockType::Intra16x16(_) => unreachable!(), + MacroblockType::Intra4x4(_) => unreachable!(), + MacroblockType::Inter16x16(mv) | + MacroblockType::InterMix(mv) | + MacroblockType::Backward(mv) => { + luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, mv, true); + chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, mv, true); + chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, mv, true); + }, + MacroblockType::PSkip => { + luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, ZERO_MV, true); + chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, ZERO_MV, true); + chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, ZERO_MV, true); + }, + MacroblockType::Inter16x8(mvs) => { + let mvs = [mvs[0], mvs[0], mvs[1], mvs[1]]; + for (i, &mv) in mvs.iter().enumerate() { + let xadd = i & 1; + let yadd = i >> 1; + luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false); + chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false); + chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false); + } + }, + MacroblockType::Inter8x16(mvs) => { + let mvs = [mvs[0], mvs[1], mvs[0], mvs[1]]; + for (i, &mv) in mvs.iter().enumerate() { + let xadd = i & 1; + let yadd = i >> 1; + luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false); + chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false); + chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false); + } + }, + MacroblockType::Inter8x8(mvs) => { + for (i, &mv) in mvs.iter().enumerate() { + let xadd = i & 1; + let yadd = i >> 1; + luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false); + chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false); + chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false); + } + }, + MacroblockType::Forward(mv) => { + luma_mc(&mut self.pred_blk.y, 16, ref_p, xpos, ypos, mv, true); + chroma_mc(&mut self.pred_blk.u, 8, ref_p, xpos / 2, ypos / 2, 1, mv, true); + chroma_mc(&mut self.pred_blk.v, 8, ref_p, xpos / 2, ypos / 2, 2, mv, true); + }, + MacroblockType::Bidir(fmv, bmv) => { + luma_mc(&mut self.wblk1.y, 16, ref_p, xpos, ypos, fmv, true); + chroma_mc(&mut self.wblk1.u, 8, ref_p, xpos / 2, ypos / 2, 1, fmv, true); + chroma_mc(&mut self.wblk1.v, 8, ref_p, xpos / 2, ypos / 2, 2, fmv, true); + luma_mc(&mut self.wblk2.y, 16, ref_n, xpos, ypos, bmv, true); + chroma_mc(&mut self.wblk2.u, 8, ref_n, xpos / 2, ypos / 2, 1, bmv, true); + chroma_mc(&mut self.wblk2.v, 8, ref_n, xpos / 2, ypos / 2, 2, bmv, true); + self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2); + }, + MacroblockType::BSkip(fmvs, bmvs) => { + for (i, (&fmv, &bmv)) in fmvs.iter().zip(bmvs.iter()).enumerate() { + let xadd = i & 1; + let yadd = i >> 1; + luma_mc(&mut self.wblk1.y[xadd * 8 + yadd * 8 * 16..], 16, ref_p, xpos + xadd * 8, ypos + yadd * 8, fmv, false); + chroma_mc(&mut self.wblk1.u[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, fmv, false); + chroma_mc(&mut self.wblk1.v[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, fmv, false); + luma_mc(&mut self.wblk2.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, bmv, false); + chroma_mc(&mut self.wblk2.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, bmv, false); + chroma_mc(&mut self.wblk2.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, bmv, false); + } + self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2); + }, + }; + } + fn get_diff_metric(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, best_m: u32, q_dc: usize, q_ac: usize, is16: bool, mut bits: u32) -> (u32, u32) { + self.pred_blk.calc_coeffs(&self.ref_blk, &mut self.tmpc, q_dc, q_ac, is16); + self.tmp_tx.copy_from_slice(&self.tmpc); + if is16 { + bits += be.block_bits(&self.tmpc[24], 24); + } + for blk in self.tmpc[..16].iter() { + bits += be.block_bits(blk, 0); + } + for blk in self.tmpc[16..24].iter() { + bits += be.block_bits(blk, 16); + } + let cdist = rdm.get_metric(bits, 0); + if cdist > best_m { + return (cdist, 0); + } + + for blk in self.tmpc[..16].iter_mut() { + blk.dequant(q_ac, q_ac); + } + let (cq_dc, cq_ac) = chroma_quants(q_ac); + for blk in self.tmpc[16..24].iter_mut() { + blk.dequant(cq_dc, cq_ac); + } + if is16 { + let (blocks, dc_blk) = self.tmpc.split_at_mut(24); + dc_blk[0].dequant_dcs(q_dc, q_ac); + dc_blk[0].itransform_dcs(); + for (blk, &dc) in blocks.iter_mut().zip(dc_blk[0].coeffs.iter()) { + blk.coeffs[0] = dc; + } + } + + self.cand_blk.copy_from(&self.pred_blk); + let mut dist = 0; + for (i, blk) in self.tmpc[..16].iter_mut().enumerate() { + let off = (i & 3) * 4 + (i >> 2) * 4 * 16; + if !blk.is_empty() { + blk.itransform_4x4(); + blk.add_to(&mut self.cand_blk.y[off..], 16); + } + dist += calc_dist(&self.cand_blk.y[off..], 16, &self.ref_blk.y[off..], 16, 4, 4); + let cdist = rdm.get_metric(bits, dist); + if cdist > best_m { + return (cdist, 0); + } + } + let (_, cpart) = self.tmpc.split_at_mut(16); + let (upart, vpart) = cpart.split_at_mut(4); + for (i, (ublk, vblk)) in upart.iter_mut().zip(vpart.iter_mut()).enumerate() { + let off = (i & 1) * 4 + (i >> 1) * 4 * 8; + ublk.itransform_4x4(); + vblk.itransform_4x4(); + ublk.add_to(&mut self.cand_blk.u[off..], 8); + vblk.add_to(&mut self.cand_blk.v[off..], 8); + dist += calc_dist(&self.cand_blk.u[off..], 8, &self.ref_blk.u[off..], 8, 4, 4); + dist += calc_dist(&self.cand_blk.v[off..], 8, &self.ref_blk.v[off..], 8, 4, 4); + + let cdist = rdm.get_metric(bits, dist); + if cdist > best_m { + return (cdist, 0); + } + } + + (rdm.get_metric(bits, dist), bits) + } + fn get_skip_metric(&self, rdm: &RateDistMetric, best_m: u32) -> (u32, u32) { + let bits = 1; + let mut dist = calc_dist(&self.pred_blk.y, 16, &self.ref_blk.y, 16, 16, 16); + let cdist = rdm.get_metric(bits, dist); + if cdist > best_m { + return (cdist, 0); + } + dist += calc_dist(&self.pred_blk.u, 8, &self.ref_blk.u, 8, 8, 8); + let cdist = rdm.get_metric(bits, dist); + if cdist > best_m { + return (cdist, 0); + } + dist += calc_dist(&self.pred_blk.v, 8, &self.ref_blk.v, 8, 8, 8); + + (rdm.get_metric(bits, dist), bits) + } + fn put_mb(dst: &mut NASimpleVideoFrame, cblk: &RefMBData, mb_x: usize, mb_y: usize) { + for (dline, sline) in dst.data[dst.offset[0] + mb_x * 16 + mb_y * 16 * dst.stride[0]..].chunks_mut(dst.stride[0]).zip(cblk.y.chunks(16)) { + dline[..16].copy_from_slice(sline); + } + for (dline, sline) in dst.data[dst.offset[1] + mb_x * 8 + mb_y * 8 * dst.stride[1]..].chunks_mut(dst.stride[1]).zip(cblk.u.chunks(8)) { + dline[..8].copy_from_slice(sline); + } + for (dline, sline) in dst.data[dst.offset[2] + mb_x * 8 + mb_y * 8 * dst.stride[2]..].chunks_mut(dst.stride[2]).zip(cblk.v.chunks(8)) { + dline[..8].copy_from_slice(sline); + } + } +} + +pub struct MacroblockDecider { + pub q: usize, + has_top: bool, + has_left: bool, + has_tl: bool, + has_tr: bool, + mb_x: usize, + mb_y: usize, + best_mbt: MacroblockType, + best_dist: u32, + best_bits: u32, + ipred_y: IntraPred16x16, + ipred_u: IntraPred16x16, + ipred_v: IntraPred16x16, + top_y: Vec, + top_u: Vec, + top_v: Vec, + tr_d: u32, + tr_b: u32, + mb: SingleMacroblock, + best_coef: [Block; 25], + best_blk: RefMBData, +} + +impl MacroblockDecider { + pub fn new() -> Self { + Self { + q: 0, + has_top: false, + has_left: false, + has_tl: false, + has_tr: false, + mb_x: 0, + mb_y: 0, + ipred_y: IntraPred16x16::new(), + ipred_u: IntraPred16x16::new(), + ipred_v: IntraPred16x16::new(), + top_y: Vec::new(), + top_u: Vec::new(), + top_v: Vec::new(), + tr_b: 0, + tr_d: 0, + best_mbt: MacroblockType::default(), + best_dist: 0, + best_bits: 0, + mb: SingleMacroblock::new(), + best_coef: [Block::new(); 25], + best_blk: RefMBData::new(), + } + } + pub fn resize(&mut self, mb_w: usize) { + self.top_y.resize((mb_w + 1) * 16 + 1, 0); + self.top_u.resize((mb_w + 1) * 8 + 1, 0); + self.top_v.resize((mb_w + 1) * 8 + 1, 0); + } + pub fn set_b_distance(&mut self, tr_b: u32, tr_d: u32) { + let (ratio1, ratio2) = if tr_d != 0 { + (((tr_d - tr_b) << 14) / tr_d, (tr_b << 14) / tr_d) + } else { (1 << 13, 1 << 13) }; + self.tr_b = tr_b; + self.tr_d = tr_d; + self.mb.ratio1 = ratio1; + self.mb.ratio2 = ratio2; + } + pub fn load_mb(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3], sstate: &SliceState) { + self.has_top = sstate.has_t; + self.has_left = sstate.has_l; + self.has_tl = sstate.has_tl; + self.has_tr = sstate.has_tr; + self.mb_x = sstate.mb_x; + self.mb_y = sstate.mb_y; + + self.ipred_y.top[1..].copy_from_slice(&self.top_y[self.mb_x * 16 + 1..][..16]); + self.ipred_u.top[1..9].copy_from_slice(&self.top_u[self.mb_x * 8 + 1..][..8]); + self.ipred_v.top[1..9].copy_from_slice(&self.top_v[self.mb_x * 8 + 1..][..8]); + + self.mb.load(src, offsets, strides); + + self.best_mbt = MacroblockType::default(); + self.best_dist = std::u32::MAX; + self.best_bits = 0; + } + pub fn try_b_coding(&mut self, ref_p: &NAVideoBuffer, ref_n: &NAVideoBuffer, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState, refine: bool) { + let q_dc = usize::from(RV40_QUANT_DC[1][self.q]); + + let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y); + let mut smb_f = [ZERO_MV; 4]; + let mut smb_b = [ZERO_MV; 4]; + for (i, (fwd, bwd)) in smb_f.iter_mut().zip(smb_b.iter_mut()).enumerate() { + let ref_mv = mbstate.ref_mv[blk8_idx + (i & 1) + (i >> 1) * mbstate.blk8_stride]; + let (fm, bm) = ref_mv.scale(self.tr_d, self.tr_b); + *fwd = fm; + *bwd = bm; + } + self.mb.recon_pred_part(MacroblockType::BSkip(smb_f, smb_b), ref_p, ref_n, self.mb_x, self.mb_y); + be.set_mb_type(MBType::Skip); + let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist); + if cur_dist < self.best_dist { + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_mbt = MacroblockType::BSkip(smb_f, smb_b); + self.best_blk.copy_from(&self.mb.pred_blk); + if self.best_dist < rdm.good_enough { + return; + } + } + + let fwd_cand = [ + -mbstate.ref_mv[blk8_idx], + mbstate.fwd_mv[blk8_idx - 1], + mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride] + ]; + let (fmv, _fdist) = me.search_mb_p(ref_p, &self.mb.ref_blk, self.mb_x, self.mb_y, &fwd_cand); + be.set_mb_type(MBType::Forward); + let bcost = be.estimate_mb_hdr(&[fmv]); + self.mb.recon_pred_part(MacroblockType::Forward(fmv), ref_p, ref_n, self.mb_x, self.mb_y); + let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost); + if cur_dist < self.best_dist { + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_mbt = MacroblockType::Forward(fmv); + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + if self.best_dist < rdm.good_enough { + return; + } + } + + let bwd_cand = [ + mbstate.ref_mv[blk8_idx], + mbstate.bwd_mv[blk8_idx - 1], + mbstate.bwd_mv[blk8_idx - 1 - mbstate.blk8_stride], + mbstate.bwd_mv[blk8_idx - mbstate.blk8_stride], + mbstate.bwd_mv[blk8_idx + 2 - mbstate.blk8_stride] + ]; + let (bmv, _bdist) = me.search_mb_p(ref_n, &self.mb.ref_blk, self.mb_x, self.mb_y, &bwd_cand); + be.set_mb_type(MBType::Backward); + let bcost = be.estimate_mb_hdr(&[bmv]); + self.mb.recon_pred_part(MacroblockType::Backward(bmv), ref_p, ref_n, self.mb_x, self.mb_y); + let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost); + if cur_dist < self.best_dist { + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_mbt = MacroblockType::Backward(bmv); + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + if self.best_dist < rdm.good_enough { + return; + } + } + + be.set_mb_type(MBType::Bidir); + let (i_fmv, i_bmv) = if !refine { + (fmv, bmv) + } else { + let mut b_searcher = SearchB::new(ref_p, ref_n, self.mb_x, self.mb_y, [self.mb.ratio1, self.mb.ratio2]); + b_searcher.search_mb(&self.mb.ref_blk, [fmv, bmv]) + }; + + let bcost = be.estimate_mb_hdr(&[i_fmv, i_bmv]); + self.mb.recon_pred_part(MacroblockType::Bidir(i_fmv, i_bmv), ref_p, ref_n, self.mb_x, self.mb_y); + let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost); + if cur_dist < self.best_dist { + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + self.best_mbt = MacroblockType::Bidir(i_fmv, i_bmv); + } + } + pub fn try_p_coding(&mut self, ref_pic: &NAVideoBuffer, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState) { + let q_dc = usize::from(RV40_QUANT_DC[1][self.q]); + + self.mb.recon_pred_part(MacroblockType::Inter16x16(ZERO_MV), ref_pic, ref_pic, self.mb_x, self.mb_y); + be.set_mb_type(MBType::Skip); + let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist); + if cur_dist < self.best_dist { + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_mbt = MacroblockType::PSkip; + self.best_blk.copy_from(&self.mb.pred_blk); + if self.best_dist < rdm.good_enough { + return; + } + } + + let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y); + let mv_cand = [ + mbstate.fwd_mv[blk8_idx - 1], + mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride] + ]; + let (mv, pdist) = me.search_mb_p(ref_pic, &self.mb.ref_blk, self.mb_x, self.mb_y, &mv_cand); + + self.mb.recon_pred_part(MacroblockType::Inter16x16(mv), ref_pic, ref_pic, self.mb_x, self.mb_y); + + be.set_mb_type(MBType::P16x16); + let pcost = be.estimate_mb_hdr(&[mv]); + let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost); + if cur_dist < self.best_dist { + self.best_mbt = MacroblockType::Inter16x16(mv); + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + } + be.set_mb_type(MBType::P16x16Mix); + let p16cost = be.estimate_mb_hdr(&[mv]); + let (cur_dist16, cur_bits16) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, p16cost); + if cur_dist16 < self.best_dist { + self.best_mbt = MacroblockType::InterMix(mv); + self.best_dist = cur_dist16; + self.best_bits = cur_bits16; + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + } + + if pdist > rdm.p_split_thr { + let xpos = self.mb_x * 16; + let ypos = self.mb_y * 16; + + let mv_cand = [ + mv, + mbstate.fwd_mv[blk8_idx - 1], + mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx - 1 + mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride], + mbstate.fwd_mv[blk8_idx + 1 - mbstate.blk8_stride] + ]; + + let (mv0, pdist0) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos, &mv_cand); + let (mv1, pdist1) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos, &mv_cand); + let (mv2, pdist2) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos + 8, &mv_cand); + let (mv3, pdist3) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos + 8, &mv_cand); + if pdist0 + pdist1 + pdist2 + pdist3 < pdist - pdist / 4 { + let mvs = [mv0, mv1, mv2, mv3]; + let (cand_mbt, cand_mbtype) = if mv0 == mv1 && mv2 == mv3 { + (MBType::P16x8, MacroblockType::Inter16x8([mv0, mv2])) + } else if mv0 == mv2 && mv1 == mv3 { + (MBType::P8x16, MacroblockType::Inter8x16([mv0, mv1])) + } else { + (MBType::P8x8, MacroblockType::Inter8x8(mvs)) + }; + be.set_mb_type(cand_mbt); + let pcost = be.estimate_mb_hdr(&mvs); + + self.mb.recon_pred_part(MacroblockType::Inter8x8(mvs), ref_pic, ref_pic, self.mb_x, self.mb_y); + let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost); + if cur_dist < self.best_dist { + self.best_dist = cur_dist; + self.best_mbt = cand_mbtype; + self.best_bits = cur_bits; + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + } + } + } + } + fn recon_intra_16_pred(&mut self, ptype: PredType8x8) { + self.ipred_y.apply16(ptype, &mut self.mb.pred_blk.y, 16); + self.ipred_u.apply8(ptype, &mut self.mb.pred_blk.u, 8); + self.ipred_v.apply8(ptype, &mut self.mb.pred_blk.v, 8); + } + pub fn try_intra_16_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric) { + if self.best_dist < rdm.good_enough { + return; + } + let pred_types_try: &[PredType8x8] = match (self.has_top, self.has_left) { + (false, false) => &[PredType8x8::DC128], + (true, false) => &[PredType8x8::TopDC], + (false, true) => &[PredType8x8::LeftDC], + _ => &PRED_TYPES8, + }; + + be.set_mb_type(MBType::Intra16); + let hdr_cost = be.estimate_mb_hdr(&[]); + for &ptype in pred_types_try.iter() { + if !self.has_tl && matches!(ptype, PredType8x8::Plane) { + continue; + } + self.recon_intra_16_pred(ptype); + let q_dc = usize::from(RV40_QUANT_DC[0][self.q]); + let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, hdr_cost); + if cur_dist < self.best_dist { + self.best_mbt = MacroblockType::Intra16x16(ptype); + self.best_dist = cur_dist; + self.best_bits = cur_bits; + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + if cur_dist < rdm.good_enough { + break; + } + } + } + } + pub fn try_intra_4x4_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, mbstate: &mut MBState) { + const PRED4_DEF: &[PredType4x4] = &[ PredType4x4::DC128 ]; + const PRED4_NO_TOP: &[PredType4x4] = &[ PredType4x4::Hor, PredType4x4::LeftDC ]; + const PRED4_NO_LEFT: &[PredType4x4] = &[ PredType4x4::Ver, PredType4x4::TopDC ]; + const PRED4_FULL: &[PredType4x4] = &[ + PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, + PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight, + PredType4x4::VerRight, PredType4x4::HorDown, + PredType4x4::VerLeft, PredType4x4::HorUp + ]; + const PRED4_FULL_NO_LD: &[PredType4x4] = &[ + PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, + PredType4x4::DiagDownLeftNoDown, PredType4x4::DiagDownRight, + PredType4x4::VerRight, PredType4x4::HorDown, + PredType4x4::VerLeftNoDown, PredType4x4::HorUpNoDown + ]; + + if self.best_dist < rdm.good_enough { + return; + } + be.set_mb_type(MBType::Intra); + + let (tr_y, tr_u, tr_v) = if self.has_tr { + let mut tr_y = [0; 4]; + let mut tr_u = [0; 4]; + let mut tr_v = [0; 4]; + tr_y.copy_from_slice(&self.top_y[self.mb_x * 16 + 16 + 1..][..4]); + tr_u.copy_from_slice(&self.top_u[self.mb_x * 8 + 8 + 1..][..4]); + tr_v.copy_from_slice(&self.top_v[self.mb_x * 8 + 8 + 1..][..4]); + (tr_y, tr_u, tr_v) + } else { + ([self.ipred_y.top[16]; 4], [self.ipred_u.top[8]; 4], [self.ipred_v.top[8]; 4]) + }; + let mut ipred4 = BlockIntra4Pred::new(&self.ipred_y, &self.ipred_u, &self.ipred_v, tr_y, tr_u, tr_v, self.has_left); + + let q_ac = self.q; + let (cq_dc, cq_ac) = chroma_quants(self.q); + let mut tot_dist = 0; + let mut tot_bits = be.estimate_mb_hdr(&[]); + let mut modes = [PredType4x4::DC; 16]; + let mut tblk = Block::new(); + let mut has_t = self.has_top; + + for y in 0..4 { + let mut has_l = self.has_left; + let mut has_ld = has_l && y != 3; + for x in 0..4 { + let list = match (has_l, has_t) { + (true, true) if has_ld => PRED4_FULL, + (true, true) => PRED4_FULL_NO_LD, + (false, true) => PRED4_NO_LEFT, + (true, false) => PRED4_NO_TOP, + _ => PRED4_DEF, + }; + + let do_chroma = ((x & 1) == 0) && ((y & 1) == 0); + + let mut best_mode = PRED4_DEF[0]; + let mut best_cdist = std::u32::MAX; + let mut best_dist = 0; + let mut best_bits = 0; + for &try_mode in list.iter() { + ipred4.pred_block(&mut self.mb.cand_blk, x, y, try_mode); + let off = x * 4 + y * 4 * 16; + let (mut cur_dist, mut cur_bits) = Self::blk4_diff(&self.mb.cand_blk.y[off..], &self.mb.ref_blk.y[off..], 16, q_ac, q_ac, be); + if do_chroma { + let off = x * 2 + y * 2 * 8; + let (du, bu) = Self::blk4_diff(&self.mb.cand_blk.u[off..], &self.mb.ref_blk.u[off..], 8, cq_dc, cq_ac, be); + let (dv, bv) = Self::blk4_diff(&self.mb.cand_blk.v[off..], &self.mb.ref_blk.v[off..], 8, cq_dc, cq_ac, be); + cur_dist += du + dv; + cur_bits += bu + bv; + } + + let cand_dist = rdm.get_metric(cur_bits, cur_dist); + if cand_dist < best_cdist { + best_cdist = cand_dist; + best_mode = try_mode; + best_dist = cur_dist; + best_bits = cur_bits; + } + } + + ipred4.pred_block(&mut self.mb.cand_blk, x, y, best_mode); + + let off = x * 4 + y * 4 * 16; + tblk.from_diff(&self.mb.ref_blk.y[off..], &self.mb.cand_blk.y[off..], 16); + tblk.transform_4x4(); + tblk.quant(q_ac, q_ac); + self.mb.tmp_tx[x + y * 4] = tblk; + if !tblk.is_empty() { + tblk.dequant(q_ac, q_ac); + tblk.itransform_4x4(); + tblk.add_to(&mut self.mb.cand_blk.y[off..], 16); + } + if do_chroma { + let off = x * 2 + y * 2 * 8; + let mut dests = [&mut self.mb.cand_blk.u[off..], &mut self.mb.cand_blk.v[off..]]; + let sources = [&self.mb.ref_blk.u[off..], &self.mb.ref_blk.v[off..]]; + for (comp, (dblk, &sblk)) in dests.iter_mut().zip(sources.iter()).enumerate() { + tblk.from_diff(sblk, dblk, 8); + tblk.transform_4x4(); + tblk.quant(cq_dc, cq_ac); + self.mb.tmp_tx[16 + comp * 4 + x / 2 + y] = tblk; + if !tblk.is_empty() { + tblk.dequant(cq_dc, cq_ac); + tblk.itransform_4x4(); + tblk.add_to(dblk, 8); + } + } + } + + ipred4.update_from(&self.mb.cand_blk, x, y); + + tot_dist += best_dist; + tot_bits += best_bits; + + let cand_dist = rdm.get_metric(tot_bits, tot_dist); + if cand_dist > self.best_dist { + return; + } + + modes[x + y * 4] = best_mode; + + has_l = true; + has_ld = false; + } + has_t = true; + } + + mbstate.set_ipred4x4(self.mb_x, self.mb_y, &modes); + + if !self.has_top { + let mut code = 0usize; + for &el in modes[..4].iter() { + code = code * 2 + if el.to_index() == 0 { 0 } else { 1 }; + } + tot_bits += u32::from(RV40_AIC_TOP_BITS[code]); + } + + let ystart = if self.has_top { 0 } else { 1 }; + for y in ystart..4 { + let mut x = 0; + while x < 4 { + let (lctx, tctx, trctx) = mbstate.get_ipred4x4_ctx(self.mb_x, self.mb_y, x, y); + let ctx_word = if x < 3 { + ((trctx & 0xF) as u16) + (((tctx & 0xF) as u16) << 4) + (((lctx & 0xF) as u16) << 8) + } else { 0xFFF }; + if let Some(idx) = RV40_AIC_PATTERNS.iter().position(|&x| x == ctx_word) { + let code = modes[x + y * 4].to_index() * 9 + modes[x + y * 4 + 1].to_index(); + tot_bits += u32::from(RV40_AIC_MODE2_BITS[idx][code as usize]); + x += 2; + } else if tctx != -1 && lctx != -1 { + let idx = (tctx + lctx * 10) as usize; + let code = modes[x + y * 4].to_index() as usize; + tot_bits += u32::from(RV40_AIC_MODE1_BITS[idx][code]); + x += 1; + } else { + match lctx { + -1 if tctx < 2 => tot_bits += 1, + 0 | 2 => tot_bits += 1, + _ => {}, + }; + x += 1; + } + } + } + + let cand_dist = rdm.get_metric(tot_bits, tot_dist); + if cand_dist < self.best_dist { + self.best_dist = cand_dist; + self.best_mbt = MacroblockType::Intra4x4(modes); + self.best_bits = tot_bits; + self.best_coef.copy_from_slice(&self.mb.tmp_tx); + self.best_blk.copy_from(&self.mb.cand_blk); + } + } + pub fn get_est_bits(&self) -> u32 { self.best_bits } + pub fn get_macroblock(&mut self) -> Macroblock { + let mut coeffs = [Block::new(); 25]; + if !self.best_mbt.is_skip() { + coeffs.copy_from_slice(&self.best_coef); + } + Macroblock { + mb_type: self.best_mbt.clone(), + coeffs, + } + } + pub fn recon_mb(&mut self, dst: &mut NASimpleVideoFrame) { + let src_mb = &self.best_blk; + SingleMacroblock::put_mb(dst, src_mb, self.mb_x, self.mb_y); + + self.top_y[self.mb_x * 16 + 1..][..16].copy_from_slice(&src_mb.y[15 * 16..]); + self.top_u[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.u[7 * 8..]); + self.top_v[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.v[7 * 8..]); + + self.ipred_y.top[0] = self.ipred_y.top[16]; + self.ipred_y.left[0] = self.ipred_y.top[0]; + self.ipred_u.top[0] = self.ipred_u.top[8]; + self.ipred_u.left[0] = self.ipred_u.top[0]; + self.ipred_v.top[0] = self.ipred_v.top[8]; + self.ipred_v.left[0] = self.ipred_v.top[0]; + + for (left, src) in self.ipred_y.left[1..].iter_mut().zip(src_mb.y.chunks_exact(16)) { + *left = src[15]; + } + for (left, src) in self.ipred_u.left[1..9].iter_mut().zip(src_mb.u.chunks_exact(8)) { + *left = src[7]; + } + for (left, src) in self.ipred_v.left[1..9].iter_mut().zip(src_mb.v.chunks_exact(8)) { + *left = src[7]; + } + } + fn blk4_diff(pred: &[u8], refsrc: &[u8], stride: usize, q_dc: usize, q_ac: usize, be: &mut BitsEstimator) -> (u32, u32) { + let mut blk = Block::new(); + blk.from_diff(refsrc, pred, stride); + blk.transform_4x4(); + blk.quant(q_dc, q_ac); + let bits = be.block_bits(&blk, 0); + if !blk.is_empty() { + blk.dequant(q_dc, q_ac); + blk.itransform_4x4(); + } + let mut dist = 0u32; + for (diffs, (pred, refsrc)) in blk.coeffs.chunks(4).zip(pred.chunks(stride).zip(refsrc.chunks(stride))) { + for (&diff, (&p, &r)) in diffs.iter().zip(pred.iter().zip(refsrc.iter())) { + let new = (i32::from(p) + i32::from(diff)).max(0).min(255); + let expected = i32::from(r); + dist += ((new - expected) * (new - expected)) as u32; + } + } + (dist, bits) + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/mod.rs b/nihav-realmedia/src/codecs/rv40enc/mod.rs new file mode 100644 index 0000000..9adecfb --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/mod.rs @@ -0,0 +1,1083 @@ +use std::collections::VecDeque; +use std::str::FromStr; + +use nihav_core::codecs::*; +use nihav_core::io::byteio::*; +use nihav_core::io::bitwriter::*; + +mod bitstream; +use bitstream::*; + +mod dsp; +use dsp::loop_filter_frame; + +mod estimator; +use estimator::*; + +mod mb_coding; +use mb_coding::*; + +mod motion_est; +use motion_est::*; + +mod ratectl; +use ratectl::*; +pub use ratectl::RateDistMetric; + +mod types; +pub use types::*; + +const DEBUG_BIT_FRAMENO: u8 = 0; +const DEBUG_BIT_SLICE_SIZE: u8 = 1; +const DEBUG_BIT_PSNR: u8 = 2; +const DEBUG_BIT_RATECTL: u8 = 3; +const DEBUG_FLAG_BITS: &[(&str, u8)] = &[ + ("frameno", DEBUG_BIT_FRAMENO), + ("slicesize", DEBUG_BIT_SLICE_SIZE), + ("psnr", DEBUG_BIT_PSNR), + ("rc", DEBUG_BIT_RATECTL), +]; + +#[derive(Clone,Copy,Default)] +struct DebugFlags { + flags: u32, +} + +impl DebugFlags { + fn new() -> Self { Self::default() } + fn is_set(self, bit: u8) -> bool { (self.flags & (1 << bit)) != 0 } + fn parse(&mut self, args: &str) { + self.flags = 0; + for arg in args.split('+') { + for &(name, bit) in DEBUG_FLAG_BITS.iter() { + if name == arg { + self.flags += 1 << bit; + } + } + } + } +} + +impl std::fmt::Display for DebugFlags { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut flags = String::new(); + let mut first = true; + for &(name, bit) in DEBUG_FLAG_BITS.iter() { + if self.is_set(bit) { + if !first { + flags.push('+'); + } + flags.push_str(name); + first = false; + } + } + write!(f, "{}", flags) + } +} + +struct StaticFrameOrder { + groups: Vec<(FrameType, usize)>, + start: bool, + cur_grp: usize, + cur_frm: usize, +} + +impl StaticFrameOrder { + /*fn new() -> Self { + Self { + groups: vec![(FrameType::I, 0)], + start: true, + cur_grp: 0, + cur_frm: 0, + } + }*/ + fn get_max_grp_len(&self) -> usize { + let mut g_len = 1; + for &(_, num_b) in self.groups.iter() { + g_len = g_len.max(1 + num_b); + } + g_len + } + fn peek_next_frame(&self) -> (FrameType, usize) { + if !self.start { + let grp = &self.groups[self.cur_grp]; + if self.cur_frm == 0 { + (grp.0, grp.1) + } else { + (FrameType::B, 0) + } + } else { + (FrameType::I, 0) + } + } + fn next_frame(&mut self) -> FrameType { + if !self.start { + let grp = &self.groups[self.cur_grp]; + let frm_type = if self.cur_frm == 0 { + grp.0 + } else { + FrameType::B + }; + self.cur_frm += 1; + if self.cur_frm > grp.1 { + self.cur_frm = 0; + self.cur_grp += 1; + if self.cur_grp >= self.groups.len() { + self.cur_grp = 0; + } + } + frm_type + } else { + self.start = false; + self.cur_grp = if self.groups.len() > 1 { 1 } else { 0 }; + self.cur_frm = 0; + FrameType::I + } + } +} + +impl std::fmt::Display for StaticFrameOrder { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut seq = String::with_capacity(self.groups.len() * 2); + for &(ftype, brun) in self.groups.iter() { + seq.push(match ftype { + FrameType::I => 'I', + _ => 'P', + }); + for _ in 0..brun { + seq.push('B'); + } + } + write!(f, "{}", seq) + } +} + +struct DynamicFrameOrder { + cur_ft: FrameType, + next_ft: FrameType, + p_count: usize, +} + +const NUM_GOP_KF: usize = 8; + +impl DynamicFrameOrder { + fn new() -> Self { + Self { + cur_ft: FrameType::I, + next_ft: FrameType::Other, + p_count: 0, + } + } + fn peek_next_frame(&self) -> (FrameType, usize) { + (self.cur_ft, if self.cur_ft == FrameType::Other || self.next_ft == FrameType::B { 1 } else { 0 }) + } + fn next_frame(&mut self) -> FrameType { + if self.cur_ft == FrameType::P { + self.p_count += 1; + if self.p_count >= NUM_GOP_KF { + self.cur_ft = FrameType::I; + self.p_count = 0; + } + } + let next = self.cur_ft; + self.cur_ft = self.next_ft; + self.next_ft = if self.cur_ft != FrameType::B { FrameType::Other } else { FrameType::P }; + next + } + fn update(&mut self, ftype: FrameType) { + if self.cur_ft == FrameType::Other { + self.cur_ft = ftype; + if self.cur_ft == FrameType::B { + self.cur_ft = FrameType::P; + self.next_ft = FrameType::B; + } else { + self.next_ft = FrameType::Other; + } + } + } +} + +impl std::fmt::Display for DynamicFrameOrder { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "dynamic") + } +} + +enum FrameOrder { + Static(StaticFrameOrder), + Dynamic(DynamicFrameOrder), +} + +impl FrameOrder { + fn new() -> Self { + FrameOrder::Dynamic(DynamicFrameOrder::new()) + } + fn get_max_grp_len(&self) -> usize { + match self { + FrameOrder::Static(ref order) => order.get_max_grp_len(), + FrameOrder::Dynamic(ref _order) => 2, + } + } + fn peek_next_frame(&self) -> (FrameType, usize) { + match self { + FrameOrder::Static(ref order) => order.peek_next_frame(), + FrameOrder::Dynamic(ref order) => order.peek_next_frame(), + } + } + fn next_frame(&mut self) -> FrameType { + match self { + FrameOrder::Static(ref mut order) => order.next_frame(), + FrameOrder::Dynamic(ref mut order) => order.next_frame(), + } + } + fn update(&mut self, ftype: FrameType) { + if let FrameOrder::Dynamic(ref mut order) = self { + order.update(ftype); + } + } + fn is_dynamic(&self) -> bool { matches!(self, FrameOrder::Dynamic(_)) } +} + +impl std::fmt::Display for FrameOrder { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + FrameOrder::Static(ref order) => order.fmt(f), + FrameOrder::Dynamic(ref order) => order.fmt(f), + } + } +} + +#[derive(Clone,Copy,Debug)] +enum ParseError { + TooShort, + TooLong, + InvalidValue, + InvalidCombination, +} + +impl FromStr for FrameOrder { + type Err = ParseError; + fn from_str(s: &str) -> Result { + if s == "dynamic" { + return Ok(FrameOrder::Dynamic(DynamicFrameOrder::new())); + } + let mut ftypes = Vec::new(); + for ch in s.bytes() { + match ch { + b'I' | b'i' => ftypes.push(FrameType::I), + b'P' | b'p' => ftypes.push(FrameType::P), + b'B' | b'b' => ftypes.push(FrameType::B), + b' ' | b',' => {}, + _ => return Err(ParseError::InvalidValue), + }; + if ftypes.len() > 16 { + return Err(ParseError::TooLong); + } + } + if ftypes.is_empty() { + return Err(ParseError::TooShort); + } + if ftypes[0] != FrameType::I { + return Err(ParseError::InvalidCombination); + } + + let mut groups = Vec::new(); + let mut cur_ftype = ftypes[0]; + let mut cur_run = 0; + for &ft in ftypes[1..].iter() { + match ft { + FrameType::I | FrameType::P => { + groups.push((cur_ftype, cur_run)); + cur_ftype = ft; + cur_run = 0; + }, + _ => { + cur_run += 1; + if cur_run > 4 { + return Err(ParseError::InvalidCombination); + } + }, + }; + } + groups.push((cur_ftype, cur_run)); + + Ok(FrameOrder::Static(StaticFrameOrder{ + groups, + start: true, + cur_grp: 0, + cur_frm: 0, + })) + } +} + +struct RV40Encoder { + stream: Option, + vinfo: NAVideoInfo, + width: usize, + height: usize, + mb_w: usize, + mb_h: usize, + + slice_bits: u32, + deblock: bool, + force_set: Option, + + fce: FrameComplexityEstimate, + mbd: MacroblockDecider, + order: FrameOrder, + brc: BitRateControl, + rdm: RateDistMetric, + be: BitsEstimator, + me: MotionEstimator, + cset: CodeSets, + + sstate: SliceState, + mbstate: MBState, + mbs: VecDeque, + dblk: Vec, + + qframes: Vec, + frm_pool: NAVideoBufferPool, + ref_p: NAVideoBufferRef, + ref_n: NAVideoBufferRef, + pkts: VecDeque, + + p_pts: u64, + n_pts: u64, + last_k_ts: u64, + last_b_ts: u64, + + needs_alloc: bool, + max_grp_bufs: usize, + + debug_log: DebugFlags, + + refine_b: bool, + i4_in_b: bool, +} + +impl RV40Encoder { + fn new() -> Self { + let vinfo = NAVideoInfo::new(24, 24, false, YUV420_FORMAT); + let vt = alloc_video_buffer(vinfo, 4).unwrap(); + let ref_p = vt.get_vbuf().unwrap(); + let vt = alloc_video_buffer(vinfo, 4).unwrap(); + let ref_n = vt.get_vbuf().unwrap(); + Self { + stream: None, + vinfo, + width: 0, + height: 0, + mb_w: 0, + mb_h: 0, + + slice_bits: 10000, + deblock: true, + force_set: None, + + fce: FrameComplexityEstimate::new(), + mbd: MacroblockDecider::new(), + order: FrameOrder::new(), + brc: BitRateControl::new(), + rdm: RateDistMetric::new(), + be: BitsEstimator::new(), + me: MotionEstimator::new(), + cset: CodeSets::new(), + + sstate: SliceState::new(), + mbstate: MBState::new(), + mbs: VecDeque::new(), + dblk: Vec::new(), + + qframes: Vec::new(), + frm_pool: NAVideoBufferPool::new(0), + pkts: VecDeque::new(), + ref_p, ref_n, + + p_pts: 0, + n_pts: 0, + last_k_ts: 0, + last_b_ts: 0, + + needs_alloc: true, + max_grp_bufs: 0, + + debug_log: DebugFlags::new(), + + refine_b: false, + i4_in_b: false, + } + } + fn encode_frame(&mut self, frm: NAFrame, frameno: usize) -> EncoderResult { + let ftype = self.order.next_frame(); + let buf = frm.get_buffer(); + + let tinfo = frm.get_time_information(); + let pts = NATimeInfo::ts_to_time(tinfo.pts.unwrap_or(0), 1000, tinfo.tb_num, tinfo.tb_den); + let fpts = (pts & 0x1FFF) as u32; + + let ts_diff = if ftype == FrameType::B { + pts.saturating_sub(self.last_k_ts.min(self.last_b_ts)) as u32 + } else { + let diff = pts.saturating_sub(self.last_k_ts) as u32; + diff / ((frameno + 1) as u32) + }; + + if self.debug_log.is_set(DEBUG_BIT_FRAMENO) { + println!("encode frame type {} pts {}", ftype, pts); + } + let is_ref_frame = matches!(ftype, FrameType::I | FrameType::P); + + let tr_d = (self.n_pts - self.p_pts) as u32; + let tr_b = (pts - self.p_pts) as u32; + if !is_ref_frame { + self.mbd.set_b_distance(tr_b, tr_d); + } + + let mut rvbuf = if let Some(nfrm) = self.frm_pool.get_free() { + nfrm + } else { + return Err(EncoderError::AllocError); + }; + let mut recon_frm = NASimpleVideoFrame::from_video_buf(&mut rvbuf).unwrap(); + + self.be.set_frame_type(ftype); + if let Some(ref vbuf) = buf.get_vbuf() { + let src = vbuf.get_data(); + + if self.brc.rate_ctl_in_use() || self.order.is_dynamic() { + self.fce.set_current(vbuf); + } + + let complexity = if self.brc.rate_ctl_in_use() { + self.fce.get_complexity(ftype) + } else { 0 }; + + self.mbd.q = self.brc.get_quant(ftype, complexity); + self.brc.init_metric(ftype, &mut self.rdm); + self.be.set_quant(self.mbd.q); + if self.debug_log.is_set(DEBUG_BIT_RATECTL) { + println!(" expected frame size {}", self.brc.get_target_size(ftype)); + println!(" target quantiser {} lambda {} thresholds {} / {}", self.brc.get_last_quant(ftype), self.rdm.lambda, self.rdm.good_enough, self.rdm.p_split_thr); + } + + let mut nslices = 0; + let mut dvec = Vec::new(); + let mut mb_idx = 0; + let mut slice_starts = Vec::new(); + let num_mbs = self.mb_w * self.mb_h; + while mb_idx < num_mbs { + slice_starts.push(dvec.len()); + let mut bw = BitWriter::new(dvec, BitWriterMode::BE); + let slice_start_mb = mb_idx; + + self.mbstate.reset(); + + let mut est_bits = 0; + while est_bits < self.slice_bits && mb_idx < num_mbs { + let mb_x = mb_idx % self.mb_w; + let mb_y = mb_idx / self.mb_w; + self.sstate.has_t = mb_idx >= slice_start_mb + self.mb_w; + self.sstate.has_l = (mb_idx > slice_start_mb) && (mb_x > 0); + self.sstate.has_tl = (mb_idx > slice_start_mb + self.mb_w) && (mb_x > 0); + self.sstate.has_tr = (mb_idx >= slice_start_mb + self.mb_w - 1) && (mb_x + 1 < self.mb_w); + self.sstate.mb_x = mb_x; + self.sstate.mb_y = mb_y; + + let offsets = [ + vbuf.get_offset(0) + mb_x * 16 + mb_y * 16 * vbuf.get_stride(0), + vbuf.get_offset(1) + mb_x * 8 + mb_y * 8 * vbuf.get_stride(1), + vbuf.get_offset(2) + mb_x * 8 + mb_y * 8 * vbuf.get_stride(2), + ]; + let strides = [vbuf.get_stride(0), vbuf.get_stride(1), vbuf.get_stride(2)]; + self.mbd.load_mb(src, offsets, strides, &self.sstate); + + self.be.set_pred_mb_type(self.mbstate.get_pred_mbtype(&self.sstate, ftype == FrameType::B)); + if ftype == FrameType::B { + self.mbd.try_b_coding(&self.ref_p, &self.ref_n, &mut self.be, &mut self.me, &self.rdm, &self.mbstate, self.refine_b); + } + if ftype == FrameType::P { + self.mbd.try_p_coding(&self.ref_n, &mut self.be, &mut self.me, &self.rdm, &self.mbstate); + } + self.mbd.try_intra_16_pred(&mut self.be, &self.rdm); + if ftype != FrameType::B || self.i4_in_b { + self.mbd.try_intra_4x4_pred(&mut self.be, &self.rdm, &mut self.mbstate); + } + + let mb = self.mbd.get_macroblock(); + est_bits += self.mbd.get_est_bits(); + self.mbd.recon_mb(&mut recon_frm); + self.mbstate.update(&mb.mb_type, mb_x, mb_y); + + if self.deblock { + self.dblk[mb_idx].q = self.mbd.q as u8; + if ftype == FrameType::I { + self.dblk[mb_idx].is_strong = true; + self.dblk[mb_idx].cbp_y = 0xFFFF; + self.dblk[mb_idx].cbp_c = 0xFF; + } else { + self.dblk[mb_idx].is_strong = mb.mb_type.is_intra() || mb.mb_type.is_16(); + let mut cbp = 0u16; + let mut mask = 1; + for blk in mb.coeffs[..16].iter() { + if !blk.is_empty() { + cbp |= mask; + } + mask <<= 1; + } + self.dblk[mb_idx].cbp_y = cbp; + let mut cbp = 0u8; + let mut mask = 1; + for blk in mb.coeffs[16..24].iter() { + if !blk.is_empty() { + cbp |= mask; + } + mask <<= 1; + } + self.dblk[mb_idx].cbp_c = cbp; + } + self.mbstate.fill_deblock(&mut self.dblk[mb_idx], &self.sstate); + } + + self.mbs.push_back(mb); + + mb_idx += 1; + } + + let set_idx = if let Some(idx) = self.force_set { + idx + } else { + let mut hist = [0usize; 17]; + for mb in self.mbs.iter() { + let blocks = if mb.mb_type.is_16() { &mb.coeffs } else { &mb.coeffs[..24] }; + + for blk in blocks.iter() { + let nz = blk.count_nz(); + for el in hist[nz..].iter_mut() { + *el += 1; + } + } + } + BitsEstimator::decide_set(&hist) + }; + + let start_bits = bw.tell(); + write_slice_header(&mut bw, ftype, self.mbd.q, set_idx, self.deblock, fpts); + if ftype == FrameType::I { + write_slice_dimensions(&mut bw, self.width, self.height); + } else { + bw.write1(); // keep dimensions flag + } + write_slice_mb_idx(&mut bw, slice_start_mb, num_mbs); + + mb_idx = slice_start_mb; + let mut skip_count = 0; + self.cset.init(self.mbd.q, set_idx); + while let Some(mb) = self.mbs.pop_front() { + if bw.tell() > start_bits + (self.slice_bits as usize) { + break; + } + let mb_x = mb_idx % self.mb_w; + let mb_y = mb_idx / self.mb_w; + self.sstate.has_t = mb_idx >= slice_start_mb + self.mb_w; + self.sstate.has_l = (mb_idx > slice_start_mb) && (mb_x > 0); + self.sstate.has_tl = (mb_idx > slice_start_mb + self.mb_w) && (mb_x > 0); + self.sstate.has_tr = (mb_idx >= slice_start_mb + self.mb_w - 1) && (mb_x + 1 < self.mb_w); + self.sstate.mb_x = mb_x; + self.sstate.mb_y = mb_y; + if mb.mb_type.is_skip() { + skip_count += 1; + } else { + if skip_count > 0 { + write_skip_count(&mut bw, skip_count); + skip_count = 0; + } else if ftype != FrameType::I { + bw.write1(); // zero skip count + } + + write_mb_header(&mut bw, ftype, &self.sstate, &self.mbstate); + self.cset.set_params(&mb.mb_type); + self.cset.write_coeffs(&mut bw, &mb.coeffs); + } + mb_idx += 1; + } + self.mbs.clear(); + if skip_count > 0 { + write_skip_count(&mut bw, skip_count); + } + while (bw.tell() & 7) != 0 { + bw.write0(); + } + if self.debug_log.is_set(DEBUG_BIT_SLICE_SIZE) { + println!(" slice {}..{} wrote {} bits / estimated {} bits", slice_start_mb, mb_idx, bw.tell(), est_bits); + } + dvec = bw.end(); + nslices += 1; + } + for _ in 0..(nslices * 8 + 1) { + dvec.insert(0, 0); + } + dvec[0] = (nslices - 1) as u8; + for (i, &off) in slice_starts.iter().enumerate() { + dvec[i * 8 + 4] = 1; + write_u32be(&mut dvec[i * 8 + 5..], off as u32)?; + } + if self.debug_log.is_set(DEBUG_BIT_RATECTL) { + println!(" got frame size {}", dvec.len()); + } + + if is_ref_frame && self.deblock { + loop_filter_frame(&mut recon_frm, &self.dblk, self.mb_w, self.mb_h); + } + + if self.debug_log.is_set(DEBUG_BIT_PSNR) { + let psnr = calc_psnr(&vbuf, &rvbuf); + println!(" encoded frame PSNR {} size {}", psnr, dvec.len()); + } + + if is_ref_frame { + std::mem::swap(&mut self.ref_p, &mut self.ref_n); + self.ref_n = rvbuf; + + self.p_pts = self.n_pts; + self.n_pts = pts; + + self.mbstate.swap_mvs(); + } + + if is_ref_frame { + if self.last_k_ts > self.last_b_ts { + self.last_b_ts = self.last_k_ts; + } + self.last_k_ts = pts; + self.fce.update_ref(); + } else { + self.last_b_ts = pts; + } + + self.brc.update_stats(ftype, dvec.len(), ts_diff); + + Ok(NAPacket::new(self.stream.clone().unwrap(), frm.ts, ftype == FrameType::I, dvec)) + } else { + Err(EncoderError::InvalidParameters) + } + } +} + +fn calc_psnr(pic1: &NAVideoBuffer, pic2: &NAVideoBuffer) -> f64 { + let data1 = pic1.get_data(); + let data2 = pic2.get_data(); + let mut sum = 0u64; + let mut size = 0; + for comp in 0..3 { + let (w, h) = pic1.get_dimensions(comp); + size += w * h; + for (line1, line2) in data1[pic1.get_offset(comp)..].chunks(pic1.get_stride(comp)).zip( + data2[pic2.get_offset(comp)..].chunks(pic2.get_stride(comp))).take(h) { + for (&pix1, &pix2) in line1[..w].iter().zip(line2.iter()) { + let diff = (i32::from(pix1) - i32::from(pix2)).abs() as u32; + sum += u64::from(diff * diff); + } + } + } + if size > 0 { + 48.13080360867910341240 - 10.0 * ((sum as f64) / (size as f64)).log10() + } else { + std::f64::INFINITY + } +} + +impl NAEncoder for RV40Encoder { + fn negotiate_format(&self, encinfo: &EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => { + Ok(EncodeParameters { + format: NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, YUV420_FORMAT)), + ..Default::default() }) + }, + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + let outinfo = NAVideoInfo::new((vinfo.width + 15) & !15, (vinfo.height + 15) & !15, false, YUV420_FORMAT); + let mut ofmt = *encinfo; + ofmt.format = NACodecTypeInfo::Video(outinfo); + Ok(ofmt) + } + } + } + fn get_capabilities(&self) -> u64 { 0 } + fn init(&mut self, stream_id: u32, encinfo: EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => Err(EncoderError::FormatError), + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + if vinfo.format != YUV420_FORMAT { + return Err(EncoderError::FormatError); + } + if ((vinfo.width | vinfo.height) & 15) != 0 { + return Err(EncoderError::FormatError); + } + if (vinfo.width | vinfo.height) >= (1 << 12) { + return Err(EncoderError::FormatError); + } + + // 32-bit flags (VBR, bframes, slices, something else) and 32-bit version + let edata = vec![0x01, 0x08, 0x10, 0x20, 0x40, 0x00, 0x80, 0x00]; + let out_info = NAVideoInfo::new(vinfo.width, vinfo.height, false, vinfo.format); + let info = NACodecInfo::new("realvideo4", NACodecTypeInfo::Video(out_info), Some(edata)); + let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den, 0); + stream.set_num(stream_id as usize); + let stream = stream.into_ref(); + + self.stream = Some(stream.clone()); + + self.width = vinfo.width; + self.height = vinfo.height; + self.mb_w = (vinfo.width + 15) >> 4; + self.mb_h = (vinfo.height + 15) >> 4; + + if self.mb_w * self.mb_h > 9216 { + return Err(EncoderError::FormatError); + } + + if (1..=100u8).contains(&encinfo.quality) { + self.brc.set_force_quality(Some(encinfo.quality)); + } else { + self.brc.set_force_quality(None); + } + self.brc.set_bitrate(encinfo.bitrate); + + self.vinfo = out_info; + let max_frames = self.order.get_max_grp_len(); + self.frm_pool.set_dec_bufs(max_frames + 3); + self.max_grp_bufs = max_frames; + self.needs_alloc = true; + + self.fce.resize(self.width, self.height); + self.mbstate.resize(self.mb_w, self.mb_h); + self.mbd.resize(self.mb_w); + self.dblk.resize(self.mb_w * self.mb_h, DeblockInfo::default()); + + Ok(stream) + }, + } + } + fn encode(&mut self, frm: &NAFrame) -> EncoderResult<()> { + if self.needs_alloc { + self.frm_pool.prealloc_video(self.vinfo, 4)?; + self.ref_n = self.frm_pool.get_free().unwrap(); + self.ref_p = self.frm_pool.get_free().unwrap(); + self.needs_alloc = false; + } + if let Some(ref vbuf) = frm.get_buffer().get_vbuf() { + if let Some(dbuf) = self.frm_pool.get_copy(vbuf) { + let newfrm = NAFrame::new(frm.ts, frm.frame_type, frm.key, frm.get_info(), NABufferType::Video(dbuf)); + self.qframes.push(newfrm); + + loop { + let (mut ftype, mut frame_pos) = self.order.peek_next_frame(); + if frame_pos >= self.qframes.len() { + break; + } + + if ftype == FrameType::Other { + if self.qframes.len() < 2 { + return Err(EncoderError::Bug); + } + if let (Some(ref frm1), Some(ref frm2)) = (self.qframes[0].get_buffer().get_vbuf(), self.qframes[1].get_buffer().get_vbuf()) { + let is_b = self.fce.decide_b_frame(frm1, frm2); + ftype = if is_b { + frame_pos = 1; + FrameType::B + } else { + frame_pos = 0; + FrameType::P + }; + } else { + return Err(EncoderError::Bug); + } + self.order.update(ftype); + } + + let frm = self.qframes.remove(frame_pos); + let pkt = self.encode_frame(frm, frame_pos)?; + self.pkts.push_back(pkt); + } + Ok(()) + } else { + Err(EncoderError::AllocError) + } + } else { + Err(EncoderError::FormatError) + } + } + fn get_packet(&mut self) -> EncoderResult> { + Ok(self.pkts.pop_front()) + } + fn flush(&mut self) -> EncoderResult<()> { + Ok(()) + } +} + +const DEBUG_LOG_OPTION: &str = "debug"; +const SLICE_SIZE_OPTION: &str = "slice_size"; +const FRAME_ORDER_OPTION: &str = "frame_order"; +const DEBLOCK_OPTION: &str = "loop_filt"; +const QUANT_OPTION: &str = "quant"; +const QUALITY_OPTION: &str = "quality"; +const SET_OPTION: &str = "coding_set"; +const SEARCH_MODE_OPTION: &str = "me_mode"; +const SEARCH_RANGE_OPTION: &str = "me_range"; +const SEARCH_THR_OPTION: &str = "me_thr"; +const B_REFINE_OPTION: &str = "refine_b"; +const I4_IN_B_OPTION: &str = "i4_in_b"; +const B_OFFSET_OPTION: &str = "b_offset"; + +const ENCODER_OPTS: &[NAOptionDefinition] = &[ + NAOptionDefinition { + name: DEBUG_LOG_OPTION, description: "debug flags", + opt_type: NAOptionDefinitionType::String(None) }, + NAOptionDefinition { + name: SLICE_SIZE_OPTION, description: "soft slice size limit in bits", + opt_type: NAOptionDefinitionType::Int(Some(4096), Some(100000)) }, + NAOptionDefinition { + name: FRAME_ORDER_OPTION, description: "frame order (e.g. IBBPBB)", + opt_type: NAOptionDefinitionType::String(None) }, + NAOptionDefinition { + name: DEBLOCK_OPTION, description: "in-loop filter", + opt_type: NAOptionDefinitionType::Bool }, + NAOptionDefinition { + name: QUANT_OPTION, description: "force quantiser (-1 = none)", + opt_type: NAOptionDefinitionType::Int(Some(-1), Some(31)) }, + NAOptionDefinition { + name: QUALITY_OPTION, description: "force quality (-1 = none)", + opt_type: NAOptionDefinitionType::Int(Some(-1), Some(100)) }, + NAOptionDefinition { + name: SET_OPTION, description: "force coding set (-1 = none)", + opt_type: NAOptionDefinitionType::Int(Some(-1), Some(3)) }, + NAOptionDefinition { + name: SEARCH_MODE_OPTION, description: "motion search mode", + opt_type: NAOptionDefinitionType::String(Some(MVSearchMode::get_possible_modes())) }, + NAOptionDefinition { + name: SEARCH_RANGE_OPTION, description: "motion search range", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(256)) }, + NAOptionDefinition { + name: SEARCH_THR_OPTION, description: "motion search cut-off threshold", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(1048576)) }, + NAOptionDefinition { + name: B_REFINE_OPTION, description: "better ME for B-frames", + opt_type: NAOptionDefinitionType::Bool }, + NAOptionDefinition { + name: I4_IN_B_OPTION, description: "allow intra 4x4 coding in B-frames", + opt_type: NAOptionDefinitionType::Bool }, + NAOptionDefinition { + name: B_OFFSET_OPTION, description: "B-frame quantiser offset", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(16)) }, +]; + +impl NAOptionHandler for RV40Encoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { ENCODER_OPTS } + fn set_options(&mut self, options: &[NAOption]) { + for option in options.iter() { + for opt_def in ENCODER_OPTS.iter() { + if opt_def.check(option).is_ok() { + match option.name { + DEBUG_LOG_OPTION => { + if let NAValue::String(ref strval) = option.value { + self.debug_log.parse(strval); + } + }, + SLICE_SIZE_OPTION => { + if let NAValue::Int(intval) = option.value { + self.slice_bits = intval as u32; + } + }, + FRAME_ORDER_OPTION => { + if let NAValue::String(ref strval) = option.value { + if let Ok(norder) = strval.parse::() { + self.order = norder; + let max_frames = self.order.get_max_grp_len(); + if max_frames > self.max_grp_bufs { + self.frm_pool.set_dec_bufs(max_frames + 3); + self.needs_alloc = true; + self.max_grp_bufs = max_frames; + } + } else { + println!("Invalid order sequence"); + } + } + }, + DEBLOCK_OPTION => { + if let NAValue::Bool(val) = option.value { + self.deblock = val; + } + }, + QUANT_OPTION => { + if let NAValue::Int(val) = option.value { + if val != -1 { + self.brc.set_force_quant(Some(val as usize)); + } else { + self.brc.set_force_quant(None); + } + } + }, + QUALITY_OPTION => { + if let NAValue::Int(val) = option.value { + if val != -1 { + self.brc.set_force_quality(Some(val as u8)); + } else { + self.brc.set_force_quality(None); + } + } + }, + SET_OPTION => { + if let NAValue::Int(val) = option.value { + self.force_set = if val != -1 { Some(val as usize) } else { None }; + } + }, + SEARCH_MODE_OPTION => { + if let NAValue::String(ref strval) = option.value { + if let Ok(mmode) = strval.parse::() { + self.me.set_mode(mmode); + } else { + println!("Invalid mode"); + } + } + }, + SEARCH_RANGE_OPTION => { + if let NAValue::Int(val) = option.value { + self.me.range = val as i16; + } + }, + SEARCH_THR_OPTION => { + if let NAValue::Int(val) = option.value { + self.me.thresh = val as u32; + } + }, + B_REFINE_OPTION => { + if let NAValue::Bool(val) = option.value { + self.refine_b = val; + } + }, + I4_IN_B_OPTION => { + if let NAValue::Bool(val) = option.value { + self.i4_in_b = val; + } + }, + B_OFFSET_OPTION => { + if let NAValue::Int(val) = option.value { + self.brc.b_offset = val as usize; + } + }, + _ => {}, + }; + } + } + } + } + fn query_option_value(&self, name: &str) -> Option { + match name { + DEBUG_LOG_OPTION => Some(NAValue::String(self.debug_log.to_string())), + SLICE_SIZE_OPTION => Some(NAValue::Int(self.slice_bits as i64)), + FRAME_ORDER_OPTION => Some(NAValue::String(self.order.to_string())), + DEBLOCK_OPTION => Some(NAValue::Bool(self.deblock)), + QUANT_OPTION => Some(NAValue::Int(self.brc.get_force_quant().into())), + QUALITY_OPTION => Some(NAValue::Int(self.brc.get_force_quality().into())), + SET_OPTION => Some(NAValue::Int(if let Some(set) = self.force_set { set as i64 } else { -1 })), + SEARCH_MODE_OPTION => Some(NAValue::String(self.me.get_mode().to_string())), + SEARCH_THR_OPTION => Some(NAValue::Int(self.me.thresh.into())), + SEARCH_RANGE_OPTION => Some(NAValue::Int(self.me.range.into())), + B_REFINE_OPTION => Some(NAValue::Bool(self.refine_b)), + I4_IN_B_OPTION => Some(NAValue::Bool(self.i4_in_b)), + B_OFFSET_OPTION => Some(NAValue::Int(self.brc.b_offset as i64)), + _ => None, + } + } +} + +pub fn get_encoder() -> Box { + Box::new(RV40Encoder::new()) +} + +#[cfg(test)] +mod test { + use nihav_core::codecs::*; + use nihav_core::demuxers::*; + use nihav_core::muxers::*; + use crate::*; + use nihav_codec_support::test::enc_video::*; + use nihav_commonfmt::*; + + #[allow(unused_variables)] + fn encode_test(out_name: &'static str, enc_options: &[NAOption], limit: Option, hash: &[u32; 4]) { + let mut dmx_reg = RegisteredDemuxers::new(); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredDecoders::new(); + generic_register_all_decoders(&mut dec_reg); + let mut mux_reg = RegisteredMuxers::new(); + realmedia_register_all_muxers(&mut mux_reg); + let mut enc_reg = RegisteredEncoders::new(); + realmedia_register_all_encoders(&mut enc_reg); + + // sample from private collection + let dec_config = DecoderTestParams { + demuxer: "yuv4mpeg", + in_name: "assets/day3b.y4m", + stream_type: StreamType::Video, + limit, + dmx_reg, dec_reg, + }; + let enc_config = EncoderTestParams { + muxer: "realmedia", + enc_name: "realvideo4", + out_name, + mux_reg, enc_reg, + }; + let dst_vinfo = NAVideoInfo { + width: 0, + height: 0, + format: YUV420_FORMAT, + flipped: false, + bits: 12, + }; + let enc_params = EncodeParameters { + format: NACodecTypeInfo::Video(dst_vinfo), + quality: 0, + bitrate: 300000, + tb_num: 0, + tb_den: 0, + flags: 0, + }; + //test_encoding_to_file(&dec_config, &enc_config, enc_params, enc_options); + test_encoding_md5(&dec_config, &enc_config, enc_params, enc_options, + hash); + } + #[test] + fn test_rv40_encoder_simple() { + let enc_options = &[ + NAOption { name: super::FRAME_ORDER_OPTION, value: NAValue::String("I".to_owned()) }, + NAOption { name: super::DEBLOCK_OPTION, value: NAValue::Bool(false) }, + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(17) }, + NAOption { name: super::SEARCH_MODE_OPTION, value: NAValue::String("diamond".to_owned()) }, + ]; + encode_test("rv40simple.rmvb", enc_options, Some(10), &[0x03b0d743, 0x36c20dbb, 0x18fa1c9e, 0x4b2b7324]); + } + #[test] + fn test_rv40_encoder_ipb() { + let enc_options = &[ + NAOption { name: super::FRAME_ORDER_OPTION, value: NAValue::String("IBPB".to_owned()) }, + NAOption { name: super::DEBLOCK_OPTION, value: NAValue::Bool(true) }, + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(17) }, + NAOption { name: super::SEARCH_MODE_OPTION, value: NAValue::String("hexagon".to_owned()) }, + ]; + encode_test("rv40ipb.rmvb", enc_options, Some(8), &[0xc382ab0b, 0xbcfbb02a, 0xf12a064f, 0xe6a5c2c3]); + } + #[test] + fn test_rv40_encoder_advanced() { + let enc_options = &[ + NAOption { name: super::FRAME_ORDER_OPTION, value: NAValue::String("dynamic".to_owned()) }, + NAOption { name: super::DEBLOCK_OPTION, value: NAValue::Bool(true) }, + NAOption { name: super::SEARCH_MODE_OPTION, value: NAValue::String("umh".to_owned()) }, + ]; + encode_test("rv40adv.rmvb", enc_options, Some(8), &[0xc4395f49, 0x0536d5f0, 0x32406834, 0xb7b634be]); + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/motion_est.rs b/nihav-realmedia/src/codecs/rv40enc/motion_est.rs new file mode 100644 index 0000000..63d4663 --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/motion_est.rs @@ -0,0 +1,920 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use std::str::FromStr; +use super::dsp::{RefMBData, luma_mc, chroma_mc}; + +#[derive(Clone,Copy,PartialEq)] +pub enum MVSearchMode { + Dummy, + Diamond, + Hexagon, + UMH, +} + +impl MVSearchMode { + pub const fn get_possible_modes() -> &'static [&'static str] { + &["diamond", "hexagon", "umh"] + } + fn create(self) -> Box { + match self { + MVSearchMode::Dummy => Box::new(DummySearcher{}), + MVSearchMode::Diamond => Box::new(DiaSearch::new()), + MVSearchMode::Hexagon => Box::new(HexSearch::new()), + MVSearchMode::UMH => Box::new(UnevenHexSearch::new()), + } + } +} + +impl Default for MVSearchMode { + fn default() -> Self { MVSearchMode::Hexagon } +} + +impl std::fmt::Display for MVSearchMode { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + MVSearchMode::Diamond => write!(f, "diamond"), + MVSearchMode::Hexagon => write!(f, "hexagon"), + MVSearchMode::UMH => write!(f, "umh"), + MVSearchMode::Dummy => write!(f, "dummy"), + } + } +} + +impl FromStr for MVSearchMode { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "diamond" => Ok(MVSearchMode::Diamond), + "hexagon" => Ok(MVSearchMode::Hexagon), + "umh" => Ok(MVSearchMode::UMH), + "dummy" => Ok(MVSearchMode::Dummy), + _ => Err(()), + } + } +} + +const MAX_DIST: u32 = std::u32::MAX; +const DIST_THRESH: u32 = 256; + +trait FromPixels { + fn from_pixels(self) -> Self; +} + +impl FromPixels for MV { + fn from_pixels(self) -> MV { + MV { x: self.x * 4, y: self.y * 4 } + } +} + +const DIA_PATTERN: [MV; 9] = [ + ZERO_MV, + MV {x: -2, y: 0}, + MV {x: -1, y: 1}, + MV {x: 0, y: 2}, + MV {x: 1, y: 1}, + MV {x: 2, y: 0}, + MV {x: 1, y: -1}, + MV {x: 0, y: -2}, + MV {x: -1, y: -1} +]; + +const HEX_PATTERN: [MV; 7] = [ + ZERO_MV, + MV {x: -2, y: 0}, + MV {x: -1, y: 2}, + MV {x: 1, y: 2}, + MV {x: 2, y: 0}, + MV {x: 1, y: -2}, + MV {x: -1, y: -2} +]; + +const REFINEMENT: [MV; 4] = [ + MV {x: -1, y: 0}, + MV {x: 0, y: 1}, + MV {x: 1, y: 0}, + MV {x: 0, y: -1} +]; + +macro_rules! search_template { + ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident, $threshold: expr) => ({ + search_template!($self, $mv_est, $cur_blk, $mb_x, $mb_y, $sad_func, $threshold, ZERO_MV, MAX_DIST, true) + }); + ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident, $threshold: expr, $start_mv: expr, $best_dist: expr, $fullpel_stage: expr) => ({ + let mut best_dist = $best_dist; + let mut best_mv = $start_mv; + + let mut min_dist; + let mut min_idx; + + if $fullpel_stage { + $self.reset(); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= $threshold { + break; + } + } + } + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= $threshold { + break; + } + } + } + if min_dist <= $threshold || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); + } + best_dist = min_dist; + best_mv = $self.point[min_idx]; + if best_dist <= $threshold { + return (best_mv.from_pixels(), best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + best_mv = best_mv.from_pixels(); + if best_dist <= $threshold { + return (best_mv, best_dist); + } + } + + // subpel refinement + $self.set_new_point(best_mv, best_dist); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point, cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= $threshold { + break; + } + } + } + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= $threshold { + break; + } + } + } + if min_dist <= $threshold || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 8 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 8 { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); + } + best_dist = min_dist; + best_mv = $self.point[min_idx]; + if best_dist <= $threshold { + return (best_mv, best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv, MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + (best_mv, best_dist) + }); +} + +macro_rules! pattern_search { + ($struct_name: ident, $patterns: expr) => { + pub struct $struct_name { + point: [MV; $patterns.len()], + dist: [u32; $patterns.len()], + steps: &'static [MV; $patterns.len()], + } + + impl $struct_name { + pub fn new() -> Self { + Self { + point: $patterns, + dist: [MAX_DIST; $patterns.len()], + steps: &$patterns, + } + } + fn reset(&mut self) { + self.point = $patterns; + self.dist = [MAX_DIST; $patterns.len()]; + } + fn set_new_point(&mut self, start: MV, dist: u32) { + for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { + *dst = src + start; + } + self.dist = [MAX_DIST; $patterns.len()]; + self.dist[0] = dist; + } + fn update(&mut self, step: MV) { + let mut new_point = self.point; + let mut new_dist = [MAX_DIST; $patterns.len()]; + + for point in new_point.iter_mut() { + *point += step; + } + + for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { + for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { + if *new_point == old_point { + *new_dist = old_dist; + break; + } + } + } + self.point = new_point; + self.dist = new_dist; + } + } + + impl MVSearch for $struct_name { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &RefMBData, mb_x: usize, mb_y: usize, _cand_mvs: &[MV]) -> (MV, u32) { + search_template!(self, mv_est, cur_mb, mb_x, mb_y, sad_mb, DIST_THRESH) + } + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &RefMBData, xpos: usize, ypos: usize, _cand_mvs: &[MV]) -> (MV, u32) { + search_template!(self, mv_est, ref_blk, xpos, ypos, sad_blk8, DIST_THRESH / 4) + } + } + } +} + +pattern_search!(DiaSearch, DIA_PATTERN); +pattern_search!(HexSearch, HEX_PATTERN); + +const LARGE_HEX_PATTERN: [MV; 16] = [ + MV { x: -4, y: 0 }, + MV { x: -4, y: 1 }, + MV { x: -4, y: 2 }, + MV { x: -2, y: 3 }, + MV { x: 0, y: 4 }, + MV { x: 2, y: 3 }, + MV { x: 4, y: 2 }, + MV { x: 4, y: 1 }, + MV { x: 4, y: 0 }, + MV { x: 4, y: -1 }, + MV { x: 4, y: -2 }, + MV { x: -2, y: -3 }, + MV { x: 0, y: -4 }, + MV { x: -2, y: -3 }, + MV { x: -4, y: -2 }, + MV { x: -4, y: -1 } +]; + +const UNSYMM_CROSS: [MV; 4] = [ + MV { x: -2, y: 0 }, + MV { x: 0, y: 1 }, + MV { x: 2, y: 0 }, + MV { x: 0, y: -1 } +]; + +#[derive(Default)] +struct UniqueSet { + list: [T; 16], + count: usize, +} + +impl UniqueSet { + fn new() -> Self { Self::default() } + fn clear(&mut self) { self.count = 0; } + fn get_list(&self) -> &[T] { &self.list[..self.count] } + fn add(&mut self, val: T) { + if self.count < self.list.len() && !self.get_list().contains(&val) { + self.list[self.count] = val; + self.count += 1; + } + } +} + +trait MVOps { + fn scale(self, scale: i16) -> Self; + fn is_in_range(self, range: i16) -> bool; +} + +impl MVOps for MV { + fn scale(self, scale: i16) -> MV { + MV { x: self.x * scale, y: self.y * scale } + } + fn is_in_range(self, range: i16) -> bool { + self.x.abs() <= range && self.y.abs() <= range + } +} + +macro_rules! single_search_step { + ($start:expr, $best_dist:expr, $mv_est:expr, $sad_func:ident, $ref_blk:expr, $xpos:expr, $ypos:expr, $pattern:expr, $scale:expr, $dist_thr:expr) => {{ + let mut best_mv = $start; + let mut best_dist = $best_dist; + for point in $pattern.iter() { + let mv = point.scale($scale) + $start; + if !mv.is_in_range($mv_est.mv_range * 4) { + continue; + } + let dist = $mv_est.$sad_func($ref_blk, $xpos, $ypos, mv, best_dist); + if dist < best_dist { + best_mv = mv; + best_dist = dist; + if best_dist < $dist_thr { + break; + } + } + } + (best_mv, best_dist, best_mv != $start) + }} +} + +struct UnevenHexSearch { + mv_list: UniqueSet, +} + +impl UnevenHexSearch { + fn new() -> Self { + Self { + mv_list: UniqueSet::new(), + } + } + fn get_cand_mv(&mut self, cand_mvs: &[MV]) -> MV { + self.mv_list.clear(); + for &mv in cand_mvs.iter() { + self.mv_list.add(mv); + } + match self.mv_list.count { + 1 => self.mv_list.list[0], + 3 => MV::pred(self.mv_list.list[0], self.mv_list.list[1], self.mv_list.list[2]), + _ => { + let sum = self.mv_list.get_list().iter().fold((0i32, 0i32), + |acc, mv| (acc.0 + i32::from(mv.x), acc.1 + i32::from(mv.y))); + MV {x: (sum.0 / (self.mv_list.count as i32)) as i16, + y: (sum.1 / (self.mv_list.count as i32)) as i16} + }, + } + } +} + +macro_rules! umh_search_template { + ($cand_mv:expr, $cutoff:expr, $mv_est:expr, $sad_func:ident, $ref_blk:expr, $xpos:expr, $ypos:expr) => {{ + let cand_mv = $cand_mv; + let best_dist = $mv_est.$sad_func($ref_blk, $xpos, $ypos, cand_mv, MAX_DIST); + if best_dist < $cutoff { + return (cand_mv, best_dist); + } + + // step 1 - small refinement search + let (mut cand_mv, mut best_dist, _) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, DIA_PATTERN, 1, $cutoff); + if best_dist < $cutoff { + return (cand_mv, best_dist); + } + + // step 2 - unsymmetrical cross search + loop { + let (mv, dist, changed) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, UNSYMM_CROSS, 4, $cutoff); + if !changed { + break; + } + cand_mv = mv; + best_dist = dist; + if best_dist < $cutoff { + return (mv, dist); + } + } + + // step 3 - multi-hexagon grid search + let mut scale = 4; + while scale > 0 { + let (mv, dist, changed) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, LARGE_HEX_PATTERN, scale, $cutoff); + if !changed { + break; + } + cand_mv = mv; + best_dist = dist; + if best_dist < $cutoff { + return (mv, dist); + } + scale >>= 1; + } + // step 4 - final hexagon search + let (cand_mv, best_dist, _) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, HEX_PATTERN, 1, $cutoff); + if best_dist > $cutoff { + let (mv, dist, _) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, DIA_PATTERN, 1, $cutoff); + (mv, dist) + } else { + (cand_mv, best_dist) + } + }} +} + +impl MVSearch for UnevenHexSearch { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &RefMBData, mb_x: usize, mb_y: usize, cand_mvs: &[MV]) -> (MV, u32) { + let cand_mv = self.get_cand_mv(cand_mvs); + let cutoff = mv_est.cutoff_thr; + umh_search_template!(cand_mv, cutoff, mv_est, sad_mb, cur_mb, mb_x, mb_y) + } + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &RefMBData, xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32) { + let cand_mv = self.get_cand_mv(cand_mvs); + let cutoff = mv_est.cutoff_thr / 4; + umh_search_template!(cand_mv, cutoff, mv_est, sad_blk8, ref_blk, xpos, ypos) + } +} + +struct MVEstimator<'a> { + pic: &'a NAVideoBuffer, + mv_range: i16, + cutoff_thr: u32, +} + +macro_rules! sad { + ($src1:expr, $src2:expr) => { + $src1.iter().zip($src2.iter()).fold(0u32, |acc, (&a, &b)| + acc + (((i32::from(a) - i32::from(b)) * (i32::from(a) - i32::from(b))) as u32)) + } +} + +impl<'a> MVEstimator<'a> { + fn sad_mb(&self, ref_mb: &RefMBData, mb_x: usize, mb_y: usize, mv: MV, cur_best_dist: u32) -> u32 { + let mut dst = RefMBData::new(); + luma_mc(&mut dst.y, 16, self.pic, mb_x * 16, mb_y * 16, mv, true); + + let mut dist = 0; + for (dline, sline) in dst.y.chunks(16).zip(ref_mb.y.chunks(16)) { + dist += sad!(dline, sline); + if dist > cur_best_dist { + return dist; + } + } + chroma_mc(&mut dst.u, 8, self.pic, mb_x * 8, mb_y * 8, 1, mv, true); + dist += sad!(dst.u, ref_mb.u); + if dist > cur_best_dist { + return dist; + } + chroma_mc(&mut dst.v, 8, self.pic, mb_x * 8, mb_y * 8, 2, mv, true); + dist += sad!(dst.v, ref_mb.v); + + dist + } + fn sad_blk8(&self, ref_mb: &RefMBData, xpos: usize, ypos: usize, mv: MV, cur_best_dist: u32) -> u32 { + let mut cur_y = [0; 64]; + let mut cur_u = [0; 16]; + let mut cur_v = [0; 16]; + + let mut dist = 0; + + let y_off = (xpos & 8) + (ypos & 8) * 16; + luma_mc(&mut cur_y, 8, self.pic, xpos, ypos, mv, false); + for (dline, sline) in cur_y.chunks(8).zip(ref_mb.y[y_off..].chunks(16)) { + dist += sad!(dline, sline); + if dist > cur_best_dist { + return dist; + } + } + + let c_off = (xpos & 8) / 2 + (ypos & 8) * 4; + chroma_mc(&mut cur_u, 4, self.pic, xpos / 2, ypos / 2, 1, mv, false); + for (dline, sline) in cur_u.chunks(4).zip(ref_mb.u[c_off..].chunks(8)) { + dist += sad!(dline, sline); + if dist > cur_best_dist { + return dist; + } + } + chroma_mc(&mut cur_v, 4, self.pic, xpos / 2, ypos / 2, 2, mv, false); + for (dline, sline) in cur_v.chunks(4).zip(ref_mb.v[c_off..].chunks(8)) { + dist += sad!(dline, sline); + if dist > cur_best_dist { + return dist; + } + } + + dist + } +} + +trait MVSearch { + fn search_mb(&mut self, mv_est: &mut MVEstimator, ref_mb: &RefMBData, mb_x: usize, mb_y: usize, cand_mvs: &[MV]) -> (MV, u32); + fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &RefMBData, xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32); +} + +struct DummySearcher {} + +impl MVSearch for DummySearcher { + fn search_mb(&mut self, _mv_est: &mut MVEstimator, _ref_mb: &RefMBData, _mb_x: usize, _mb_y: usize, _cand_mvs: &[MV]) -> (MV, u32) { + (ZERO_MV, std::u32::MAX / 2) + } + fn search_blk8(&mut self, _mv_est: &mut MVEstimator, _ref_mb: &RefMBData, _xpos: usize, _ypos: usize, _cand_mvs: &[MV]) -> (MV, u32) { + (ZERO_MV, std::u32::MAX / 2) + } +} + +pub struct MotionEstimator { + pub range: i16, + pub thresh: u32, + mode: MVSearchMode, + srch: Box, +} + +impl MotionEstimator { + pub fn new() -> Self { + let mode = MVSearchMode::default(); + Self { + range: 64, + thresh: 32, + mode, + srch: mode.create(), + } + } + pub fn get_mode(&self) -> MVSearchMode { self.mode } + pub fn set_mode(&mut self, new_mode: MVSearchMode) { + if self.mode != new_mode { + self.mode = new_mode; + self.srch = self.mode.create(); + } + } + pub fn search_mb_p(&mut self, pic: &NAVideoBuffer, refmb: &RefMBData, mb_x: usize, mb_y: usize, cand_mvs: &[MV]) -> (MV, u32) { + let mut mv_est = MVEstimator { + mv_range: self.range, + cutoff_thr: self.thresh, + pic, + }; + self.srch.search_mb(&mut mv_est, refmb, mb_x, mb_y, cand_mvs) + } + pub fn search_blk8(&mut self, pic: &NAVideoBuffer, refmb: &RefMBData, xoff: usize, yoff: usize, cand_mvs: &[MV]) -> (MV, u32) { + let mut mv_est = MVEstimator { + mv_range: self.range, + cutoff_thr: self.thresh, + pic, + }; + self.srch.search_blk8(&mut mv_est, refmb, xoff, yoff, cand_mvs) + } +} + +pub struct SearchB<'a> { + ref_p: &'a NAVideoBuffer, + ref_n: &'a NAVideoBuffer, + xpos: usize, + ypos: usize, + ratios: [u32; 2], + tmp1: RefMBData, + tmp2: RefMBData, + pred_blk: RefMBData, +} + +impl<'a> SearchB<'a> { + pub fn new(ref_p: &'a NAVideoBuffer, ref_n: &'a NAVideoBuffer, mb_x: usize, mb_y: usize, ratios: [u32; 2]) -> Self { + Self { + ref_p, ref_n, + xpos: mb_x * 16, + ypos: mb_y * 16, + ratios, + tmp1: RefMBData::new(), + tmp2: RefMBData::new(), + pred_blk: RefMBData::new(), + } + } + pub fn search_mb(&mut self, ref_mb: &RefMBData, cand_mvs: [MV; 2]) -> (MV, MV) { + let mut best_cand = cand_mvs; + let mut best_dist = self.interp_b_dist(ref_mb, best_cand, MAX_DIST); + + loop { + let mut improved = false; + for &fmv_add in DIA_PATTERN.iter() { + for &bmv_add in DIA_PATTERN.iter() { + let cand = [best_cand[0] + fmv_add.from_pixels(), + best_cand[1] + bmv_add.from_pixels()]; + let dist = self.interp_b_dist(ref_mb, cand, best_dist); + if dist < best_dist { + best_dist = dist; + best_cand = cand; + improved = true; + } + } + } + if !improved { + break; + } + } + + for &fmv_add in REFINEMENT.iter() { + for &bmv_add in REFINEMENT.iter() { + let cand = [best_cand[0] + fmv_add, best_cand[1] + bmv_add]; + let dist = self.interp_b_dist(ref_mb, cand, best_dist); + if dist < best_dist { + best_dist = dist; + best_cand = cand; + } + } + } + + (best_cand[0], best_cand[1]) + } + fn interp_b_dist(&mut self, ref_mb: &RefMBData, cand_mv: [MV; 2], cur_best_dist: u32) -> u32 { + let [fmv, bmv] = cand_mv; + luma_mc(&mut self.tmp1.y, 16, self.ref_p, self.xpos, self.ypos, fmv, true); + chroma_mc(&mut self.tmp1.u, 8, self.ref_p, self.xpos / 2, self.ypos / 2, 1, fmv, true); + chroma_mc(&mut self.tmp1.v, 8, self.ref_p, self.xpos / 2, self.ypos / 2, 2, fmv, true); + luma_mc(&mut self.tmp2.y, 16, self.ref_n, self.xpos, self.ypos, bmv, true); + chroma_mc(&mut self.tmp2.u, 8, self.ref_n, self.xpos / 2, self.ypos / 2, 1, bmv, true); + chroma_mc(&mut self.tmp2.v, 8, self.ref_n, self.xpos / 2, self.ypos / 2, 2, bmv, true); + self.pred_blk.avg(&self.tmp1, self.ratios[0], &self.tmp2, self.ratios[1]); + + let mut dist = 0; + for (dline, sline) in self.pred_blk.y.chunks(16).zip(ref_mb.y.chunks(16)) { + dist += sad!(dline, sline); + if dist > cur_best_dist { + return dist; + } + } + dist += sad!(self.pred_blk.u, ref_mb.u); + if dist > cur_best_dist { + return dist; + } + dist += sad!(self.pred_blk.v, ref_mb.v); + + dist + } +} + +macro_rules! hadamard { + ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $d0:expr, $d1:expr, $d2:expr, $d3:expr) => { + let t0 = $s0 + $s1; + let t1 = $s0 - $s1; + let t2 = $s2 + $s3; + let t3 = $s2 - $s3; + $d0 = t0 + t2; + $d2 = t0 - t2; + $d1 = t1 + t3; + $d3 = t1 - t3; + } +} + +pub struct FrameComplexityEstimate { + ref_frm: NAVideoBufferRef, + cur_frm: NAVideoBufferRef, + nxt_frm: NAVideoBufferRef, + width: usize, + height: usize, +} + +impl FrameComplexityEstimate { + pub fn new() -> Self { + let vinfo = NAVideoInfo::new(24, 24, false, YUV420_FORMAT); + let vt = alloc_video_buffer(vinfo, 4).unwrap(); + let buf = vt.get_vbuf().unwrap(); + Self { + ref_frm: buf.clone(), + cur_frm: buf.clone(), + nxt_frm: buf, + width: 0, + height: 0, + } + } + pub fn resize(&mut self, width: usize, height: usize) { + if width != self.width || height != self.height { + self.width = width; + self.height = height; + + let vinfo = NAVideoInfo::new(self.width / 2, self.height / 2, false, YUV420_FORMAT); + let vt = alloc_video_buffer(vinfo, 4).unwrap(); + self.ref_frm = vt.get_vbuf().unwrap(); + let frm = self.ref_frm.get_data_mut().unwrap(); + for el in frm.iter_mut() { + *el = 0x80; + } + let vt = alloc_video_buffer(vinfo, 4).unwrap(); + self.cur_frm = vt.get_vbuf().unwrap(); + let vt = alloc_video_buffer(vinfo, 4).unwrap(); + self.nxt_frm = vt.get_vbuf().unwrap(); + } + } + pub fn set_current(&mut self, frm: &NAVideoBuffer) { + Self::downscale(&mut self.cur_frm, frm); + } + pub fn get_complexity(&self, ftype: FrameType) -> u32 { + match ftype { + FrameType::I => Self::calculate_i_cplx(&self.cur_frm), + FrameType::P => Self::calculate_mv_diff(&self.ref_frm, &self.cur_frm), + _ => 0, + } + } + pub fn decide_b_frame(&mut self, frm1: &NAVideoBuffer, frm2: &NAVideoBuffer) -> bool { + Self::downscale(&mut self.cur_frm, frm1); + Self::downscale(&mut self.nxt_frm, frm2); + let diff_ref_cur = Self::calculate_mv_diff(&self.ref_frm, &self.cur_frm); + let diff_cur_nxt = Self::calculate_mv_diff(&self.cur_frm, &self.nxt_frm); + + // simple rule - if complexity ref->cur and cur->next is about the same this should be a B-frame + let ddiff = diff_ref_cur.max(diff_cur_nxt) - diff_ref_cur.min(diff_cur_nxt); + if ddiff < 256 { + true + } else { + let mut order = 0; + while (ddiff << order) < diff_ref_cur.min(diff_cur_nxt) { + order += 1; + } + order > 2 + } + } + pub fn update_ref(&mut self) { + std::mem::swap(&mut self.ref_frm, &mut self.cur_frm); + } + + fn add_mv(mb_x: usize, mb_y: usize, mv: MV) -> (usize, usize) { + (((mb_x * 16) as isize + (mv.x as isize)) as usize, + ((mb_y * 16) as isize + (mv.y as isize)) as usize) + } + fn calculate_i_cplx(frm: &NAVideoBuffer) -> u32 { + let (w, h) = frm.get_dimensions(0); + let src = frm.get_data(); + let stride = frm.get_stride(0); + let mut sum = 0; + let mut offset = 0; + for y in (0..h).step_by(4) { + for x in (0..w).step_by(4) { + sum += Self::satd_i(src, offset + x, stride, x > 0, y > 0); + } + offset += stride * 4; + } + sum + } + fn calculate_mv_diff(ref_frm: &NAVideoBuffer, cur_frm: &NAVideoBuffer) -> u32 { + let (w, h) = ref_frm.get_dimensions(0); + let mut sum = 0; + for mb_y in 0..(h / 16) { + for mb_x in 0..(w / 16) { + sum += Self::satd_mb_diff(ref_frm, cur_frm, mb_x, mb_y); + } + } + sum + } + fn satd_mb_diff(ref_frm: &NAVideoBuffer, cur_frm: &NAVideoBuffer, mb_x: usize, mb_y: usize) -> u32 { + let mv = Self::search_mv(ref_frm, cur_frm, mb_x, mb_y); + let mut sum = 0; + let src0 = ref_frm.get_data(); + let src1 = cur_frm.get_data(); + let stride = ref_frm.get_stride(0); + let (src_x, src_y) = Self::add_mv(mb_x, mb_y, mv); + for y in (0..16).step_by(4) { + for x in (0..16).step_by(4) { + sum += Self::satd(&src0[src_x + x + (src_y + y) * stride..], + &src1[mb_x * 16 + x + (mb_y * 16 + y) * stride..], + stride); + } + } + sum + } + fn search_mv(ref_frm: &NAVideoBuffer, cur_frm: &NAVideoBuffer, mb_x: usize, mb_y: usize) -> MV { + let stride = ref_frm.get_stride(0); + let (w, h) = ref_frm.get_dimensions(0); + let (v_edge, h_edge) = (w - 16, h - 16); + let ref_src = ref_frm.get_data(); + let cur_src = cur_frm.get_data(); + let cur_src = &cur_src[mb_x * 16 + mb_y * 16 * stride..]; + + let mut best_mv = ZERO_MV; + let mut best_dist = Self::sad(cur_src, ref_src, mb_x, mb_y, stride, best_mv); + if best_dist == 0 { + return best_mv; + } + + for step in (0..=2).rev() { + let mut changed = true; + while changed { + changed = false; + for &mv in DIA_PATTERN[1..].iter() { + let cand_mv = best_mv + mv.scale(1 << step); + let (cx, cy) = Self::add_mv(mb_x, mb_y, cand_mv); + if cx > v_edge || cy > h_edge { + continue; + } + let cand_dist = Self::sad(cur_src, ref_src, mb_x, mb_y, stride, cand_mv); + if cand_dist < best_dist { + best_dist = cand_dist; + best_mv = cand_mv; + if best_dist == 0 { + return best_mv; + } + changed = true; + } + } + } + } + best_mv + } + fn sad(cur_src: &[u8], src: &[u8], mb_x: usize, mb_y: usize, stride: usize, mv: MV) -> u32 { + let (src_x, src_y) = Self::add_mv(mb_x, mb_y, mv); + let mut sum = 0; + for (line1, line2) in cur_src.chunks(stride).zip(src[src_x + src_y * stride..].chunks(stride)).take(16) { + sum += line1[..16].iter().zip(line2[..16].iter()).fold(0u32, + |acc, (&a, &b)| acc + u32::from(a.max(b) - a.min(b)) * u32::from(a.max(b) - a.min(b))); + } + sum + } + fn satd_i(src: &[u8], mut offset: usize, stride: usize, has_left: bool, has_top: bool) -> u32 { + let mut diffs = [0; 16]; + match (has_left, has_top) { + (true, true) => { + for row in diffs.chunks_exact_mut(4) { + let mut left = i16::from(src[offset - 1]); + let mut tl = i16::from(src[offset - stride - 1]); + for (x, dst) in row.iter_mut().enumerate() { + let cur = i16::from(src[offset + x]); + let top = i16::from(src[offset + x - stride]); + + *dst = cur - (top + left + tl - top.min(left).min(tl) - top.max(left).max(tl)); + + left = cur; + tl = top; + } + + offset += stride; + } + }, + (true, false) => { + for (dst, (left, cur)) in diffs.chunks_exact_mut(4).zip( + src[offset - 1..].chunks(stride).zip(src[offset..].chunks(stride))) { + for (dst, (&left, &cur)) in dst.iter_mut().zip(left.iter().zip(cur.iter())) { + *dst = i16::from(cur) - i16::from(left); + } + } + }, + (false, true) => { + for (dst, (top, cur)) in diffs.chunks_exact_mut(4).zip( + src[offset - stride..].chunks(stride).zip(src[offset..].chunks(stride))) { + for (dst, (&top, &cur)) in dst.iter_mut().zip(top.iter().zip(cur.iter())) { + *dst = i16::from(cur) - i16::from(top); + } + } + }, + (false, false) => { + for (dst, src) in diffs.chunks_exact_mut(4).zip(src[offset..].chunks(stride)) { + for (dst, &src) in dst.iter_mut().zip(src.iter()) { + *dst = i16::from(src) - 128; + } + } + }, + }; + for row in diffs.chunks_exact_mut(4) { + hadamard!(row[0], row[1], row[2], row[3], row[0], row[1], row[2], row[3]); + } + for i in 0..4 { + hadamard!(diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12], + diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12]); + } + diffs.iter().fold(0u32, |acc, x| acc + (x.abs() as u32)) + } + fn satd(src0: &[u8], src1: &[u8], stride: usize) -> u32 { + let mut diffs = [0; 16]; + for (dst, (src0, src1)) in diffs.chunks_exact_mut(4).zip( + src0.chunks(stride).zip(src1.chunks(stride))) { + hadamard!(i16::from(src0[0]) - i16::from(src1[0]), + i16::from(src0[1]) - i16::from(src1[1]), + i16::from(src0[2]) - i16::from(src1[2]), + i16::from(src0[3]) - i16::from(src1[3]), + dst[0], dst[1], dst[2], dst[3]); + } + for i in 0..4 { + hadamard!(diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12], + diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12]); + } + diffs.iter().fold(0u32, |acc, x| acc + (x.abs() as u32)) + } + fn downscale(dst: &mut NAVideoBuffer, src: &NAVideoBuffer) { + let dst = NASimpleVideoFrame::from_video_buf(dst).unwrap(); + let sdata = src.get_data(); + for plane in 0..3 { + let cur_w = dst.width[plane]; + let cur_h = dst.height[plane]; + let doff = dst.offset[plane]; + let soff = src.get_offset(plane); + let dstride = dst.stride[plane]; + let sstride = src.get_stride(plane); + for (dline, sstrip) in dst.data[doff..].chunks_exact_mut(dstride).zip( + sdata[soff..].chunks_exact(sstride * 2)).take(cur_h) { + let (line0, line1) = sstrip.split_at(sstride); + for (dst, (src0, src1)) in dline.iter_mut().zip( + line0.chunks_exact(2).zip(line1.chunks_exact(2))).take(cur_w) { + *dst = ((u16::from(src0[0]) + u16::from(src0[1]) + + u16::from(src1[0]) + u16::from(src1[1]) + 2) >> 2) as u8; + } + } + } + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/ratectl.rs b/nihav-realmedia/src/codecs/rv40enc/ratectl.rs new file mode 100644 index 0000000..aa881ed --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/ratectl.rs @@ -0,0 +1,244 @@ +use nihav_core::frame::FrameType; + +pub struct RateDistMetric { + pub lambda: f32, + pub good_enough: u32, + pub p_split_thr: u32, +} + +impl RateDistMetric { + pub fn new() -> Self { + Self { + lambda: 1.0, + good_enough: 256, + p_split_thr: 8192, + } + } + pub fn get_metric(&self, bits: u32, dist: u32) -> u32 { + ((bits as f32) + (dist as f32) * 0.1 * self.lambda).ceil() as u32 + } +} + +#[derive(Clone,Copy)] +struct BitrateCounter { + factors: [f32; 32], + last_q: usize, + proj_size: usize, + intra: bool, +} + +impl BitrateCounter { + fn new(intra: bool) -> Self { + let mut obj = Self { + factors: [0.0; 32], + last_q: 0, + proj_size: 0, + intra + }; + obj.reset(); + obj + } + fn reset(&mut self) { + if self.intra { + self.last_q = 8; + for (q, dst) in self.factors.iter_mut().enumerate() { + let q = q as f32; + *dst = (-0.1 * q + 2.95) / 100.0; + } + } else { + self.last_q = 10; + for (q, dst) in self.factors.iter_mut().enumerate() { + let q = q as f32; + *dst = 100.0 / (8.2 * q * q + 51.0 * q + 3411.0); + } + } + } + fn init_metric(&self, metric: &mut RateDistMetric, q_add: usize) { + let q = (self.last_q + q_add).min(31); + const THRESHOLDS: [(u32, u32); 4] = [ + (256, 8192), (128, 8192), (64, 4196), (32, 2048) + ]; + let (ge_thr, ps_thr) = THRESHOLDS[q / 8]; + metric.good_enough = ge_thr; + metric.p_split_thr = ps_thr; + metric.lambda = 1.0; + } + fn update_stats(&mut self, fsize: usize) { + if fsize < self.proj_size - self.proj_size / 8 { + let mut inv_fac = 1.0 / self.factors[self.last_q]; + if inv_fac > 1.0 { + inv_fac -= 0.5; + } + self.factors[self.last_q] = 1.0 / inv_fac; + } else if fsize > self.proj_size + self.proj_size / 8 { + let mut inv_fac = 1.0 / self.factors[self.last_q]; + if inv_fac < 200.0 { + inv_fac += 0.5; + } + self.factors[self.last_q] = 1.0 / inv_fac; + } + } + fn get_est_size(&self, complexity: u32, q: usize) -> usize { + ((complexity as f32) * self.factors[q]).ceil() as usize + } + fn get_quant(&mut self, target: usize, complexity: u32) -> usize { + let tgt_31 = self.get_est_size(complexity, 31); + let tgt_0 = self.get_est_size(complexity, 0); + if target < tgt_31 { + self.last_q = 31; + self.proj_size = tgt_31; + } else if target > tgt_0 { + self.last_q = 0; + self.proj_size = tgt_0; + } else { //xxx: do binary search? + for q in (0..31).rev() { + let expected_size = self.get_est_size(complexity, q); + if target >= (expected_size - expected_size / 8) && + target <= (expected_size + expected_size / 8) { + self.proj_size = expected_size; + self.last_q = q; + } + } + } + self.last_q + } + fn get_last_quant(&self) -> usize { self.last_q } +} + +const TIMEBASE: u32 = 1000; + +pub struct BitRateControl { + force_quant: Option, + force_quality: Option, + br_counter: [BitrateCounter; 2], + + bitrate: u32, + tpos: u32, + bitpool: usize, + + duration: u32, + dcount: u32, + + pub b_offset: usize, +} + +impl BitRateControl { + pub fn new() -> Self { + Self { + force_quant: None, + force_quality: None, + br_counter: [BitrateCounter::new(true), BitrateCounter::new(false)], + + bitrate: 0, + tpos: 0, + bitpool: 0, + + duration: 0, + dcount: 0, + + b_offset: 4, + } + } + pub fn rate_ctl_in_use(&self) -> bool { + self.force_quant.is_none() && self.force_quality.is_none() && self.bitrate != 0 + } + pub fn set_bitrate(&mut self, bitrate: u32) { + self.bitrate = bitrate; + for br in self.br_counter.iter_mut() { + br.reset(); + } + + self.bitpool = (self.bitrate as usize) * 2; + self.tpos = 0; + } + pub fn set_force_quant(&mut self, force_q: Option) { self.force_quant = force_q; } + pub fn get_force_quant(&self) -> i8 { + if let Some(q) = self.force_quant { + q as i8 + } else { + -1 + } + } + pub fn set_force_quality(&mut self, force_q: Option) { self.force_quality = force_q; } + pub fn get_force_quality(&self) -> i8 { + if let Some(q) = self.force_quality { + q as i8 + } else { + -1 + } + } + pub fn get_quant(&mut self, ftype: FrameType, complexity: u32) -> usize { + if let Some(q) = self.force_quant { + q + } else if self.force_quality.is_some() { + 4 + } else if ftype != FrameType::B { + let tgt = self.get_target_size(ftype); + self.br_counter[if ftype == FrameType::I { 0 } else { 1 }].get_quant(tgt, complexity) + } else { + (self.br_counter[1].get_last_quant() + self.b_offset).min(31) + } + } + pub fn get_last_quant(&self, ftype: FrameType) -> usize { + match ftype { + FrameType::I => self.br_counter[0].get_last_quant(), + FrameType::P => self.br_counter[1].get_last_quant(), + _ => (self.br_counter[1].get_last_quant() + self.b_offset).min(31), + } + } + pub fn init_metric(&self, ftype: FrameType, metric: &mut RateDistMetric) { + if let Some(q) = self.force_quality { + metric.lambda = (q as f32) / 50.0; + } else { + match ftype { + FrameType::I => { + self.br_counter[0].init_metric(metric, 0); + }, + FrameType::P => { + self.br_counter[1].init_metric(metric, 0); + }, + _ => { + self.br_counter[1].init_metric(metric, self.b_offset); + }, + }; + } + } + pub fn update_stats(&mut self, ftype: FrameType, fsize: usize, ts_diff: u32) { + if self.bitrate > 0 { + if ts_diff > 0 && self.duration < std::u32::MAX / 2 { + self.duration += ts_diff; + self.dcount += 1; + } + self.tpos += ts_diff; + while self.tpos >= TIMEBASE { + self.tpos -= TIMEBASE; + self.bitpool += self.bitrate as usize; + } + self.bitpool = self.bitpool.saturating_sub(fsize * 8).max(1024); + } + match ftype { + FrameType::I => self.br_counter[0].update_stats(fsize), + FrameType::P => self.br_counter[1].update_stats(fsize), + _ => {}, + }; + } + pub fn get_target_size(&self, ftype: FrameType) -> usize { + if self.bitrate == 0 || self.bitpool == 0 { + return 0; + } + let bitpool_limit = (self.bitrate + self.bitrate / 8) as usize; + let bitpool_avail = self.bitpool.min(bitpool_limit); + let target_size = if self.dcount > 0 { + let avg_len = ((self.duration + self.dcount / 2) / self.dcount).max(1); + bitpool_avail * (avg_len as usize) / ((TIMEBASE - self.tpos) as usize) + } else { + bitpool_avail / 10 + }; + let tgt_bits = match ftype { + FrameType::I => target_size * 3, + FrameType::B => target_size * 3 / 4, + _ => target_size, + }; + (tgt_bits + 7) / 8 + } +} diff --git a/nihav-realmedia/src/codecs/rv40enc/types.rs b/nihav-realmedia/src/codecs/rv40enc/types.rs new file mode 100644 index 0000000..0d3f11f --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/types.rs @@ -0,0 +1,586 @@ +use nihav_codec_support::codecs::{MV, ZERO_MV}; + +pub trait RV34MVOps { + fn scale(&self, trd: u32, trb: u32) -> (MV, MV); + fn diff_gt_3(self, other: Self) -> bool; +} + +impl RV34MVOps for MV { + fn scale(&self, trd: u32, trb: u32) -> (MV, MV) { + const TR_SHIFT: u8 = 14; + const TR_BIAS: i32 = 1 << (TR_SHIFT - 1); + + let ratio = ((trb as i32) << TR_SHIFT) / (trd as i32); + let mv_f = MV { + x: (((self.x as i32) * ratio + TR_BIAS) >> TR_SHIFT) as i16, + y: (((self.y as i32) * ratio + TR_BIAS) >> TR_SHIFT) as i16 + }; + let mv_b = mv_f - *self; + (mv_f, mv_b) + } + fn diff_gt_3(self, other: Self) -> bool { + let diff = self - other; + diff.x.abs() > 3 || diff.y.abs() > 3 + } +} + +#[derive(Debug,Clone,Copy)] +pub enum PredType4x4 { + Ver, + Hor, + DC, + DiagDownLeft, + DiagDownRight, + VerRight, + HorDown, + VerLeft, + HorUp, + LeftDC, + TopDC, + DC128, + DiagDownLeftNoDown, + HorUpNoDown, + VerLeftNoDown +} + +#[derive(Debug,Clone,Copy)] +pub enum PredType8x8 { + DC, + Hor, + Ver, + Plane, + LeftDC, + TopDC, + DC128 +} + +pub trait ToIndex { + fn to_index(self) -> i8; +} + +impl ToIndex for PredType8x8 { + fn to_index(self) -> i8 { + match self { + PredType8x8::Ver => 1, + PredType8x8::Hor => 2, + PredType8x8::Plane => 3, + _ => 0, + } + } +} + +impl ToIndex for PredType4x4 { + fn to_index(self) -> i8 { + match self { + PredType4x4::Ver => 1, + PredType4x4::Hor => 2, + PredType4x4::DiagDownRight => 3, + PredType4x4::DiagDownLeft | PredType4x4::DiagDownLeftNoDown => 4, + PredType4x4::VerRight => 5, + PredType4x4::VerLeft | PredType4x4::VerLeftNoDown => 6, + PredType4x4::HorUp |PredType4x4::HorUpNoDown => 7, + PredType4x4::HorDown => 8, + _ => 0, // DC predictions + } + } +} + +#[derive(Clone,Copy,Default)] +pub struct Block { + pub coeffs: [i16; 16], +} + +impl Block { + pub fn new() -> Self { Self::default() } + pub fn is_empty(&self) -> bool { + for &el in self.coeffs.iter() { + if el != 0 { + return false; + } + } + true + } + pub fn count_nz(&self) -> usize { + self.coeffs.iter().filter(|&&x| x != 0).count() + } +} +impl std::fmt::Display for Block { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut out = String::new(); + for row in self.coeffs.chunks(4) { + out += format!(" {:3} {:3} {:3} {:3}\n", row[0], row[1], row[2], row[3]).as_str(); + } + write!(f, "{}", out) + } +} + +#[derive(Clone,Copy,Default)] +pub struct DeblockInfo { + pub is_strong: bool, + pub q: u8, + pub cbp_y: u16, + pub cbp_c: u8, + pub deblock_y: u16, +} + +#[derive(Debug,Clone,Copy,PartialEq)] +pub enum MBType { + Intra, + Intra16, + Skip, + P16x16, + P16x16Mix, + P16x8, + P8x16, + P8x8, + Direct, + Bidir, + Forward, + Backward, + Invalid, +} + +impl MBType { + pub fn is_intra(self) -> bool { matches!(self, MBType::Intra | MBType::Intra16) } + fn get_weight(self) -> u8 { + match self { + MBType::Intra => 0, + MBType::Intra16 => 1, + MBType::Skip => unreachable!(), + MBType::P16x16 => 2, + MBType::P16x16Mix => 10, + MBType::P16x8 => 7, + MBType::P8x16 => 8, + MBType::P8x8 => 3, + MBType::Direct => 6, + MBType::Bidir => 9, + MBType::Forward => 4, + MBType::Backward => 5, + MBType::Invalid => unreachable!(), + } + } + pub fn to_code(self) -> usize { + match self { + MBType::Intra => 0, + MBType::Intra16 => 1, + MBType::P16x16 | MBType::Forward => 2, + MBType::P8x8 | MBType::Backward => 3, + MBType::P16x8 | MBType::Bidir => 4, + MBType::P8x16 | MBType::Direct => 5, + MBType::P16x16Mix => 6, + _ => unreachable!(), + } + } + pub fn has_dir_mv(self, fwd: bool) -> bool { + match self { + MBType::Bidir => true, + MBType::Forward if fwd => true, + MBType::Backward if !fwd => true, + _ => false, + } + } +} + +#[derive(Default)] +pub struct SliceState { + pub has_t: bool, + pub has_l: bool, + pub has_tl: bool, + pub has_tr: bool, + pub mb_x: usize, + pub mb_y: usize, +} + +impl SliceState { + pub fn new() -> Self { Self::default() } +} + +#[derive(Default)] +pub struct MBState { + pub mb_type: Vec, + pub ipred: Vec, + pub fwd_mv: Vec, + pub bwd_mv: Vec, + pub ref_mv: Vec, + pub mb_stride: usize, + pub blk8_stride: usize, + pub blk4_stride: usize, +} + +impl MBState { + pub fn new() -> Self { Self::default() } + pub fn resize(&mut self, mb_w: usize, mb_h: usize) { + self.mb_stride = mb_w + 2; + self.blk8_stride = mb_w * 2 + 2; + self.blk4_stride = mb_w * 4 + 2; + + self.mb_type.resize(self.mb_stride * (mb_h + 1), MBType::Invalid); + self.ipred.resize(self.blk4_stride * (mb_h * 4 + 1), -1); + self.fwd_mv.resize(self.blk8_stride * (mb_w * 2 + 1), ZERO_MV); + self.bwd_mv.resize(self.blk8_stride * (mb_w * 2 + 1), ZERO_MV); + self.ref_mv.resize(self.blk8_stride * (mb_w * 2 + 1), ZERO_MV); + } + pub fn reset(&mut self) { + for el in self.mb_type.iter_mut() { + *el = MBType::Invalid; + } + for el in self.ipred.iter_mut() { + *el = -1; + } + } + fn set_mv(&mut self, blk8_idx: usize, fwd: bool, mv: MV) { + if fwd { + self.fwd_mv[blk8_idx] = mv; + self.fwd_mv[blk8_idx + 1] = mv; + self.fwd_mv[blk8_idx + self.blk8_stride] = mv; + self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mv; + } else { + self.bwd_mv[blk8_idx] = mv; + self.bwd_mv[blk8_idx + 1] = mv; + self.bwd_mv[blk8_idx + self.blk8_stride] = mv; + self.bwd_mv[blk8_idx + self.blk8_stride + 1] = mv; + } + } + pub fn get_mb_idx(&self, mb_x: usize, mb_y: usize) -> usize { + mb_x + 1 + (mb_y + 1) * self.mb_stride + } + pub fn get_blk8_idx(&self, mb_x: usize, mb_y: usize) -> usize { + mb_x * 2 + 1 + (mb_y * 2 + 1) * self.blk8_stride + } + pub fn get_blk4_idx(&self, mb_x: usize, mb_y: usize) -> usize { + mb_x * 4 + 1 + (mb_y * 4 + 1) * self.blk4_stride + } + pub fn update(&mut self, mb_type: &MacroblockType, mb_x: usize, mb_y: usize) { + let mb_idx = self.get_mb_idx(mb_x, mb_y); + let blk8_idx = self.get_blk8_idx(mb_x, mb_y); + let blk4_idx = self.get_blk4_idx(mb_x, mb_y); + + for row in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).take(4) { + for el in row[..4].iter_mut() { + *el = 0; + } + } + + match *mb_type { + MacroblockType::Intra16x16(ptype) => { + self.mb_type[mb_idx] = MBType::Intra16; + let pred_id = ptype.to_index(); + for row in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).take(4) { + for el in row[..4].iter_mut() { + *el = pred_id; + } + } + self.set_mv(blk8_idx, true, ZERO_MV); + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::Intra4x4(ptypes) => { + self.mb_type[mb_idx] = MBType::Intra; + for (dst, src) in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).zip(ptypes.chunks(4)) { + for (dst, &ptype) in dst.iter_mut().zip(src.iter()) { + *dst = ptype.to_index(); + } + } + self.set_mv(blk8_idx, true, ZERO_MV); + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::PSkip => { + self.mb_type[mb_idx] = MBType::Skip; + self.set_mv(blk8_idx, true, ZERO_MV); + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::Inter16x16(mv) => { + self.mb_type[mb_idx] = MBType::P16x16; + self.set_mv(blk8_idx, true, mv); + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::InterMix(mv) => { + self.mb_type[mb_idx] = MBType::P16x16Mix; + self.set_mv(blk8_idx, true, mv); + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::Inter16x8(mvs) => { + self.mb_type[mb_idx] = MBType::P16x8; + self.fwd_mv[blk8_idx] = mvs[0]; + self.fwd_mv[blk8_idx + 1] = mvs[0]; + self.fwd_mv[blk8_idx + self.blk8_stride] = mvs[1]; + self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mvs[1]; + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::Inter8x16(mvs) => { + self.mb_type[mb_idx] = MBType::P8x16; + self.fwd_mv[blk8_idx] = mvs[0]; + self.fwd_mv[blk8_idx + 1] = mvs[1]; + self.fwd_mv[blk8_idx + self.blk8_stride] = mvs[0]; + self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mvs[1]; + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::Inter8x8(mvs) => { + self.mb_type[mb_idx] = MBType::P8x8; + self.fwd_mv[blk8_idx] = mvs[0]; + self.fwd_mv[blk8_idx + 1] = mvs[1]; + self.fwd_mv[blk8_idx + self.blk8_stride] = mvs[2]; + self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mvs[3]; + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::BSkip(fmvs, bmvs) => { + self.mb_type[mb_idx] = MBType::Skip; + self.fwd_mv[blk8_idx] = fmvs[0]; + self.fwd_mv[blk8_idx + 1] = fmvs[1]; + self.fwd_mv[blk8_idx + self.blk8_stride] = fmvs[0]; + self.fwd_mv[blk8_idx + self.blk8_stride + 1] = fmvs[1]; + self.bwd_mv[blk8_idx] = bmvs[0]; + self.bwd_mv[blk8_idx + 1] = bmvs[1]; + self.bwd_mv[blk8_idx + self.blk8_stride] = bmvs[0]; + self.bwd_mv[blk8_idx + self.blk8_stride + 1] = bmvs[1]; + }, + /*MacroblockType::Direct(fmv, bmv) => { + self.mb_type[mb_idx] = MBType::Direct; + self.set_mv(blk8_idx, true, fmv); + self.set_mv(blk8_idx, false, bmv); + },*/ + MacroblockType::Bidir(fmv, bmv) => { + self.mb_type[mb_idx] = MBType::Bidir; + self.set_mv(blk8_idx, true, fmv); + self.set_mv(blk8_idx, false, bmv); + }, + MacroblockType::Forward(mv) => { + self.mb_type[mb_idx] = MBType::Forward; + self.set_mv(blk8_idx, true, mv); + self.set_mv(blk8_idx, false, ZERO_MV); + }, + MacroblockType::Backward(mv) => { + self.mb_type[mb_idx] = MBType::Backward; + self.set_mv(blk8_idx, true, ZERO_MV); + self.set_mv(blk8_idx, false, mv); + }, + }; + } + pub fn get_pred_mbtype(&self, sstate: &SliceState, is_b: bool) -> MBType { + let mut cand = [MBType::Invalid; 4]; + let mut ccount = 0; + + let mb_idx = self.get_mb_idx(sstate.mb_x, sstate.mb_y); + if sstate.has_t { + cand[ccount] = self.mb_type[mb_idx - self.mb_stride]; + ccount += 1; + if sstate.has_tr { + cand[ccount] = self.mb_type[mb_idx - self.mb_stride + 1]; + ccount += 1; + } + } + if sstate.has_l { + cand[ccount] = self.mb_type[mb_idx - 1]; + ccount += 1; + } + if sstate.has_tl { + cand[ccount] = self.mb_type[mb_idx - self.mb_stride - 1]; + ccount += 1; + } + if !is_b { + for el in cand[..ccount].iter_mut() { + if *el == MBType::Skip { + *el = MBType::P16x16; + } + } + } else { + for el in cand[..ccount].iter_mut() { + if *el == MBType::Skip { + *el = MBType::Direct; + } + } + } + match ccount { + 0 => MBType::Intra, + 1 => cand[0], + 2 => if cand[0].get_weight() <= cand[1].get_weight() { cand[0] } else { cand[1] }, + _ => { + const MBTYPE_FROM_WEIGHT: [MBType; 11] = [ + MBType::Intra, MBType::Intra16, MBType::P16x16, MBType::P8x8, + MBType::Forward, MBType::Backward, MBType::Direct, MBType::P16x8, + MBType::P8x16, MBType::Bidir, MBType::P16x16Mix + ]; + + let mut counts = [0; 12]; + for el in cand[..ccount].iter() { + counts[usize::from(el.get_weight())] += 1; + } + let mut best_idx = 0; + let mut best_wgt = 0; + for (idx, &weight) in counts.iter().enumerate() { + if weight > best_wgt { + best_idx = idx; + best_wgt = weight; + } + } + MBTYPE_FROM_WEIGHT[best_idx] + }, + } + } + pub fn get_ipred4x4_ctx(&self, mb_x: usize, mb_y: usize, x: usize, y: usize) -> (i8, i8, i8) { + let blk4_idx = self.get_blk4_idx(mb_x, mb_y) + x + y * self.blk4_stride; + (self.ipred[blk4_idx - 1], + self.ipred[blk4_idx - self.blk4_stride], + self.ipred[blk4_idx - self.blk4_stride + 1]) + } + pub fn set_ipred4x4(&mut self, mb_x: usize, mb_y: usize, modes: &[PredType4x4; 16]) { + let blk4_idx = self.get_blk4_idx(mb_x, mb_y); + for (dst, src) in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).zip(modes.chunks(4)) { + for (dst, src) in dst.iter_mut().zip(src.iter()) { + *dst = src.to_index(); + } + } + } + fn get_mv(&self, idx: usize, fwd: bool) -> MV { + if fwd { + self.fwd_mv[idx] + } else { + self.bwd_mv[idx] + } + } + pub fn get_diff_mv(&self, sstate: &SliceState, w16: bool, xoff: usize, yoff: usize) -> MV { + let blk8_idx = self.get_blk8_idx(sstate.mb_x, sstate.mb_y) + xoff + yoff * self.blk8_stride; + + let cur_mv = self.get_mv(blk8_idx, true); + + if (yoff == 0 && !sstate.has_t) && (xoff == 0 && !sstate.has_l) { + return cur_mv; + } + + let left_mv = if sstate.has_l || (xoff != 0) { self.get_mv(blk8_idx - 1, true) } else { ZERO_MV }; + let top_mv = if sstate.has_t || (yoff != 0) { self.get_mv(blk8_idx - self.blk8_stride, true) } else { left_mv }; + let has_tr = match xoff + yoff * 2 { + 0 if w16 => sstate.has_tr, + 0 => sstate.has_t, + 1 => sstate.has_tr, + 2 if w16 => false, + 2 => true, + _ => false, + }; + let has_tl = match xoff + yoff * 2 { + 0 => sstate.has_tl, + 1 => sstate.has_t, + 2 => sstate.has_l, + _ => true, + }; + let mv_c = if has_tr { + self.get_mv(blk8_idx - self.blk8_stride + if w16 { 2 } else { 1 }, true) + } else if has_tl { + self.get_mv(blk8_idx - self.blk8_stride - 1, true) + } else { + return cur_mv - left_mv; + }; + + cur_mv - MV::pred(left_mv, top_mv, mv_c) + } + pub fn get_diff_mv_b(&self, sstate: &SliceState, fwd: bool) -> MV { + let mb_idx = self.get_mb_idx(sstate.mb_x, sstate.mb_y); + let blk8_idx = self.get_blk8_idx(sstate.mb_x, sstate.mb_y); + + let mut pred_mv = [ZERO_MV; 3]; + let mut pcount = 0; + + let cur_mv = self.get_mv(blk8_idx, fwd); + + if sstate.has_l && self.mb_type[mb_idx - 1].has_dir_mv(fwd) { + pred_mv[pcount] = self.get_mv(blk8_idx - 1, fwd); + pcount += 1; + } + if !sstate.has_t { + return cur_mv - pred_mv[0]; + } + if self.mb_type[mb_idx - self.mb_stride].has_dir_mv(fwd) { + pred_mv[pcount] = self.get_mv(blk8_idx - self.blk8_stride, fwd); + pcount += 1; + } + if sstate.has_tr { + if self.mb_type[mb_idx - self.mb_stride + 1].has_dir_mv(fwd) { + pred_mv[pcount] = self.get_mv(blk8_idx - self.blk8_stride + 2, fwd); + pcount += 1; + } + } else if sstate.has_tl && self.mb_type[mb_idx - self.mb_stride - 1].has_dir_mv(fwd) { + pred_mv[pcount] = self.get_mv(blk8_idx - self.blk8_stride - 1, fwd); + pcount += 1; + } + let pred_mv = match pcount { + 3 => MV::pred(pred_mv[0], pred_mv[1], pred_mv[2]), + 2 => MV{ x: (pred_mv[0].x + pred_mv[1].x) / 2, y: (pred_mv[0].y + pred_mv[1].y) / 2 }, + 1 => pred_mv[0], + _ => ZERO_MV, + }; + cur_mv - pred_mv + } + pub fn swap_mvs(&mut self) { + std::mem::swap(&mut self.fwd_mv, &mut self.ref_mv); + } + pub fn fill_deblock(&self, dblk: &mut DeblockInfo, sstate: &SliceState) { + if dblk.is_strong { + dblk.deblock_y = 0xFFFF; + return; + } + let mut hmvmask = 0; + let mut vmvmask = 0; + + let mut blk8_idx = self.get_blk8_idx(sstate.mb_x, sstate.mb_y); + for y in 0..2 { + for x in 0..2 { + let shift = x * 2 + y * 8; + let cur_mv = self.get_mv(blk8_idx + x, true); + if (x > 0) || (sstate.mb_x > 0) { + let left_mv = self.get_mv(blk8_idx + x - 1, true); + if cur_mv.diff_gt_3(left_mv) { + vmvmask |= 0x11 << shift; + } + } + if (y > 0) || (sstate.mb_y > 0) { + let top_mv = self.get_mv(blk8_idx + x - self.blk8_stride, true); + if cur_mv.diff_gt_3(top_mv) { + hmvmask |= 0x03 << shift; + } + } + } + blk8_idx += self.blk8_stride; + } + if sstate.mb_y == 0 { hmvmask &= !0x000F; } + if sstate.mb_x == 0 { vmvmask &= !0x1111; } + + dblk.deblock_y = dblk.cbp_y | hmvmask | vmvmask; + } +} + +#[derive(Clone)] +pub enum MacroblockType { + Intra16x16(PredType8x8), + Intra4x4([PredType4x4; 16]), + PSkip, + Inter16x16(MV), + InterMix(MV), + Inter16x8([MV; 2]), + Inter8x16([MV; 2]), + Inter8x8([MV; 4]), + BSkip([MV; 4], [MV; 4]), + //Direct(MV, MV), + Bidir(MV, MV), + Forward(MV), + Backward(MV), +} + +impl Default for MacroblockType { + fn default() -> Self { Self::Intra16x16(PredType8x8::DC) } +} + +impl MacroblockType { + pub fn is_intra(&self) -> bool { + matches!(*self, MacroblockType::Intra16x16(_) | MacroblockType::Intra4x4(_)) + } + pub fn is_16(&self) -> bool { + matches!(*self, MacroblockType::Intra16x16(_) | MacroblockType::InterMix(_)) + } + pub fn is_skip(&self) -> bool { + matches!(*self, MacroblockType::PSkip | MacroblockType::BSkip(_, _)) + } +} + +pub struct Macroblock { + pub mb_type: MacroblockType, + pub coeffs: [Block; 25], +}