path = "../nihav-codec-support"
features = ["h263", "mdct", "blockdsp"]
+[dev-dependencies]
+nihav_commonfmt = { path = "../nihav-commonfmt", default-features=false, features = ["demuxer_y4m", "decoder_rawvideo"] }
+
[features]
default = ["all_decoders", "all_demuxers", "all_encoders", "all_muxers"]
demuxers = []
all_encoders = ["all_video_encoders", "all_audio_encoders"]
encoders = []
-all_video_encoders = []
+all_video_encoders = ["encoder_rv40"]
+encoder_rv40 = ["encoders"]
all_audio_encoders = ["encoder_cook"]
encoder_cook = ["encoders"]
#[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4"))]
mod rv3040;
-#[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4"))]
+#[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4", feature="encoder_realvideo4"))]
#[allow(clippy::erasing_op)]
mod rv34codes;
#[cfg(any(feature="decoder_realvideo3", feature="decoder_realvideo4"))]
pub mod rv30dsp;
#[cfg(feature="decoder_realvideo4")]
pub mod rv40;
-#[cfg(feature="decoder_realvideo4")]
+#[cfg(any(feature="decoder_realvideo4", feature="encoder_realvideo4"))]
pub mod rv40data;
#[cfg(feature="decoder_realvideo4")]
#[allow(clippy::erasing_op)]
#[cfg(feature="encoder_cook")]
mod cookenc;
+#[cfg(feature="encoder_rv40")]
+mod rv40enc;
+
#[cfg(feature="encoders")]
const ENCODERS: &[EncoderInfo] = &[
#[cfg(feature="encoder_cook")]
EncoderInfo { name: "cook", get_encoder: cookenc::get_encoder },
+
+#[cfg(feature="encoder_rv40")]
+ EncoderInfo { name: "realvideo4", get_encoder: rv40enc::get_encoder },
];
/// Registers all available encoders provided by this crate.
--- /dev/null
+use nihav_core::frame::FrameType;
+use nihav_core::io::bitwriter::*;
+use nihav_core::io::intcode::*;
+use nihav_codec_support::codecs::MV;
+use super::types::*;
+use super::super::rv34codes::*;
+use super::super::rv40data::*;
+
+pub fn write_slice_header(bw: &mut BitWriter, ftype: FrameType, q: usize, set_idx: usize, deblock: bool, pts: u32) {
+ bw.write0();
+ match ftype {
+ FrameType::I => bw.write(0, 2),
+ FrameType::P => bw.write(2, 2),
+ FrameType::B => bw.write(3, 2),
+ _ => unreachable!(),
+ };
+ bw.write(q as u32, 5);
+ bw.write(0, 2); // unknown
+ bw.write(set_idx as u32, 2);
+ bw.write(!deblock as u32, 1);
+ bw.write(pts, 13);
+}
+
+pub fn write_slice_dimensions(bw: &mut BitWriter, width: usize, height: usize) {
+ let wcode = match width {
+ 160 => 0,
+ 176 => 1,
+ 240 => 2,
+ 320 => 3,
+ 352 => 4,
+ 640 => 5,
+ 704 => 6,
+ _ => 7,
+ };
+ bw.write(wcode, 3);
+ if wcode == 7 {
+ let mut w = width >> 2;
+ while w >= 255 {
+ bw.write(255, 8);
+ w -= 255;
+ }
+ bw.write(w as u32, 8);
+ }
+
+ let hcode = match height {
+ 120 => 0,
+ 132 => 1,
+ 144 => 2,
+ 240 => 3,
+ 288 => 4,
+ 480 => 5,
+ 180 => 6,
+ 360 => 7,
+ 576 => 8,
+ _ => 9,
+ };
+ if hcode < 6 {
+ bw.write(hcode, 3);
+ } else {
+ bw.write(hcode + 6, 4);
+ if hcode == 9 {
+ let mut h = height >> 2;
+ while h >= 255 {
+ bw.write(255, 8);
+ h -= 255;
+ }
+ bw.write(h as u32, 8);
+ }
+ }
+}
+
+pub fn write_slice_mb_idx(bw: &mut BitWriter, mb_idx: usize, num_mbs: usize) {
+ let mba_bits = match num_mbs - 1 {
+ 0..= 47 => 6,
+ 48..= 98 => 7,
+ 99..= 395 => 9,
+ 396..=1583 => 11,
+ 1584..=6335 => 13,
+ 6336..=9215 => 14,
+ _ => unreachable!(),
+ };
+ bw.write(mb_idx as u32, mba_bits);
+}
+
+pub fn write_skip_count(bw: &mut BitWriter, skip_count: u32) {
+ bw.write_code(UintCodeType::Gamma, skip_count);
+}
+
+fn write_mv(bw: &mut BitWriter, mv: MV) {
+ let xcode = if mv.x > 0 { (mv.x - 1) * 2 + 1 } else { -mv.x * 2 } as u32;
+ let ycode = if mv.y > 0 { (mv.y - 1) * 2 + 1 } else { -mv.y * 2 } as u32;
+
+ bw.write_code(UintCodeType::Gamma, xcode);
+ bw.write_code(UintCodeType::Gamma, ycode);
+}
+
+pub fn write_mb_header(bw: &mut BitWriter, ftype: FrameType, sstate: &SliceState, mbstate: &MBState) {
+ let mb_idx = mbstate.get_mb_idx(sstate.mb_x, sstate.mb_y);
+ let pred_mbt = mbstate.get_pred_mbtype(sstate, ftype == FrameType::B);
+
+ let set_id = pred_mbt.to_code();
+
+ if ftype != FrameType::I {
+ let (codes, lens) = if ftype == FrameType::P {
+ (&RV40_PTYPE_CODES[set_id][..], &RV40_PTYPE_BITS[set_id][..])
+ } else {
+ (&RV40_BTYPE_CODES[set_id][..], &RV40_BTYPE_BITS[set_id][..])
+ };
+ let idx = mbstate.mb_type[mb_idx].to_code();
+ bw.write(codes[idx].into(), lens[idx]);
+ }
+ match mbstate.mb_type[mb_idx] {
+ MBType::Intra16 => {
+ if ftype == FrameType::I {
+ bw.write1();
+ }
+ bw.write(mbstate.ipred[mbstate.get_blk4_idx(sstate.mb_x, sstate.mb_y)] as u32, 2);
+ },
+ MBType::Intra => {
+ if ftype == FrameType::I {
+ bw.write0();
+ bw.write1(); //dquant
+ }
+ let ystart = if sstate.has_t { 0 } else { 1 };
+ let mut blk4_idx = mbstate.get_blk4_idx(sstate.mb_x, sstate.mb_y);
+
+ if !sstate.has_t {
+ let mut code = 0;
+ for &el in mbstate.ipred[blk4_idx..][..4].iter() {
+ code = code * 2 + if el == 0 { 0 } else { 1 };
+ }
+ bw.write(RV40_AIC_TOP_CODES[code].into(), RV40_AIC_TOP_BITS[code]);
+ blk4_idx += mbstate.blk4_stride;
+ }
+ for y in ystart..4 {
+ let mut x = 0;
+ while x < 4 {
+ let (lctx, tctx, trctx) = mbstate.get_ipred4x4_ctx(sstate.mb_x, sstate.mb_y, x, y);
+ let mode = mbstate.ipred[blk4_idx + x];
+ let ctx_word = if x < 3 {
+ ((trctx & 0xF) as u16) + (((tctx & 0xF) as u16) << 4) + (((lctx & 0xF) as u16) << 8)
+ } else { 0xFFF };
+ if let Some(idx) = RV40_AIC_PATTERNS.iter().position(|&x| x == ctx_word) {
+ let mode1 = mbstate.ipred[blk4_idx + x + 1];
+ let code = mode * 9 + mode1;
+ bw.write(RV40_AIC_MODE2_CODES[idx][code as usize].into(),
+ RV40_AIC_MODE2_BITS[idx][code as usize]);
+ x += 2;
+ } else if tctx != -1 && lctx != -1 {
+ let idx = (tctx + lctx * 10) as usize;
+ let code = mode as usize;
+ bw.write(RV40_AIC_MODE1_CODES[idx][code].into(),
+ RV40_AIC_MODE1_BITS[idx][code]);
+ x += 1;
+ } else {
+ match lctx {
+ -1 if tctx < 2 => {
+ if mode == 0 {
+ bw.write1();
+ } else {
+assert_eq!(mode, 1);
+ bw.write0();
+ }
+ },
+ 0 | 2 => {
+ if mode == 0 {
+ bw.write1();
+ } else {
+assert_eq!(mode, 2);
+ bw.write0();
+ }
+ },
+ _ => {
+assert_eq!(mode, 0);
+ },
+ };
+ x += 1;
+ }
+ }
+ blk4_idx += mbstate.blk4_stride;
+ }
+ },
+ MBType::P16x16 | MBType::P16x16Mix => {
+ let diff_mv = mbstate.get_diff_mv(sstate, true, 0, 0);
+ write_mv(bw, diff_mv);
+ },
+ MBType::P16x8 => {
+ let diff_mv = mbstate.get_diff_mv(sstate, true, 0, 0);
+ write_mv(bw, diff_mv);
+ let diff_mv = mbstate.get_diff_mv(sstate, true, 0, 1);
+ write_mv(bw, diff_mv);
+ },
+ MBType::P8x16 => {
+ let diff_mv = mbstate.get_diff_mv(sstate, false, 0, 0);
+ write_mv(bw, diff_mv);
+ let diff_mv = mbstate.get_diff_mv(sstate, false, 1, 0);
+ write_mv(bw, diff_mv);
+ },
+ MBType::P8x8 => {
+ for i in 0..4 {
+ let diff_mv = mbstate.get_diff_mv(sstate, false, i & 1, i >> 1);
+ write_mv(bw, diff_mv);
+ }
+ },
+ MBType::Forward => {
+ let fwd_diff = mbstate.get_diff_mv_b(sstate, true);
+ write_mv(bw, fwd_diff);
+ },
+ MBType::Backward => {
+ let bwd_diff = mbstate.get_diff_mv_b(sstate, false);
+ write_mv(bw, bwd_diff);
+ },
+ MBType::Bidir => {
+ let fwd_diff = mbstate.get_diff_mv_b(sstate, true);
+ let bwd_diff = mbstate.get_diff_mv_b(sstate, false);
+ write_mv(bw, fwd_diff);
+ write_mv(bw, bwd_diff);
+ },
+ MBType::Invalid => unreachable!(),
+ _ => unimplemented!(),
+ };
+}
+
+trait CodeWriter {
+ fn write(&self, bw: &mut BitWriter, code: u16);
+}
+
+impl CodeWriter for RV34CodeReader {
+ fn write(&self, bw: &mut BitWriter, to_write: u16) {
+ for (&sym, (&code, &bits)) in self.syms.iter().zip(self.codes.iter().zip(self.lengths.iter())) {
+ if sym == to_write {
+ bw.write(code, bits);
+ return;
+ }
+ }
+unreachable!();
+ }
+}
+
+impl CodeWriter for RV34CBPCodeReader {
+ fn write(&self, bw: &mut BitWriter, to_write: u16) {
+ for (&sym, (&code, &bits)) in self.syms.iter().zip(self.codes.iter().zip(self.lengths.iter())) {
+ if u16::from(sym) == to_write {
+ bw.write(code, bits);
+ return;
+ }
+ }
+unreachable!();
+ }
+}
+
+struct CBPSet {
+ cbp_pattern: RV34CodeReader,
+ cbp: [RV34CBPCodeReader; 4]
+}
+
+impl CBPSet {
+ fn new(intra: bool, set: usize, subset: usize) -> Self {
+ if intra {
+ let cbp_pat = RV34CodeReader::new(&RV34_INTRA_CBPPAT[set][subset]);
+ let cbp0 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset]);
+ let cbp1 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset + 1*2]);
+ let cbp2 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset + 2*2]);
+ let cbp3 = RV34CBPCodeReader::new(&RV34_INTRA_CBP[set][subset + 3*2]);
+ CBPSet { cbp_pattern: cbp_pat, cbp: [cbp0, cbp1, cbp2, cbp3] }
+ } else {
+ let cbp_pat = RV34CodeReader::new(&RV34_INTER_CBPPAT[set]);
+ let cbp0 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][0]);
+ let cbp1 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][1]);
+ let cbp2 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][2]);
+ let cbp3 = RV34CBPCodeReader::new(&RV34_INTER_CBP[set][3]);
+ CBPSet { cbp_pattern: cbp_pat, cbp: [cbp0, cbp1, cbp2, cbp3] }
+ }
+ }
+}
+
+struct CoefSet {
+ pat0: Vec<RV34CodeReader>,
+ pat1: Vec<RV34CodeReader>,
+ pat2: Vec<RV34CodeReader>,
+}
+
+impl CoefSet {
+ fn new(intra: bool, set: usize) -> Self {
+ if intra {
+ let first0 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][0]);
+ let first1 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][1]);
+ let first2 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][2]);
+ let first3 = RV34CodeReader::new(&RV34_INTRA_FIRSTPAT[set][3]);
+ let firsts = vec![first0, first1, first2, first3];
+
+ let second0 = RV34CodeReader::new(&RV34_INTRA_SECONDPAT[set][0]);
+ let second1 = RV34CodeReader::new(&RV34_INTRA_SECONDPAT[set][1]);
+ let seconds = vec![second0, second1];
+
+ let third0 = RV34CodeReader::new(&RV34_INTRA_THIRDPAT[set][0]);
+ let third1 = RV34CodeReader::new(&RV34_INTRA_THIRDPAT[set][1]);
+ let thirds = vec![third0, third1];
+
+ CoefSet { pat0: firsts, pat1: seconds, pat2: thirds }
+ } else {
+ let first0 = RV34CodeReader::new(&RV34_INTER_FIRSTPAT[set][0]);
+ let first1 = RV34CodeReader::new(&RV34_INTER_FIRSTPAT[set][1]);
+ let firsts = vec![first0, first1];
+
+ let second0 = RV34CodeReader::new(&RV34_INTER_SECONDPAT[set][0]);
+ let second1 = RV34CodeReader::new(&RV34_INTER_SECONDPAT[set][1]);
+ let seconds = vec![second0, second1];
+
+ let third0 = RV34CodeReader::new(&RV34_INTER_THIRDPAT[set][0]);
+ let third1 = RV34CodeReader::new(&RV34_INTER_THIRDPAT[set][1]);
+ let thirds = vec![third0, third1];
+
+ CoefSet { pat0: firsts, pat1: seconds, pat2: thirds }
+ }
+ }
+}
+
+struct FullSet {
+ cbp: Vec<CBPSet>,
+ cset: CoefSet,
+ coeffs: RV34CodeReader,
+}
+
+impl FullSet {
+ fn new(intra: bool, set: usize) -> Self {
+ if intra {
+ let cbp0 = CBPSet::new(intra, set, 0);
+ let cbp1 = CBPSet::new(intra, set, 1);
+ let cbp: Vec<CBPSet> = vec![cbp0, cbp1];
+ let cset = CoefSet::new(intra, set);
+ let coeffs = RV34CodeReader::new(&RV34_INTRA_COEFFS[set]);
+ FullSet { cbp, cset, coeffs }
+ } else {
+ let cbp0 = CBPSet::new(intra, set, 0);
+ let cbp: Vec<CBPSet> = vec![cbp0];
+ let cset = CoefSet::new(intra, set);
+ let coeffs = RV34CodeReader::new(&RV34_INTER_COEFFS[set]);
+ FullSet { cbp, cset, coeffs }
+ }
+ }
+ fn write_block(&self, bw: &mut BitWriter, blk: &Block, subset_idx: usize, luma: bool) {
+ let sblk0 = [blk.coeffs[0], blk.coeffs[1], blk.coeffs[4], blk.coeffs[5]];
+ let sblk1 = [blk.coeffs[2], blk.coeffs[3], blk.coeffs[6], blk.coeffs[7]];
+ let sblk2 = [blk.coeffs[8], blk.coeffs[12], blk.coeffs[9], blk.coeffs[13]]; // sub-block 2 has different order
+ let sblk3 = [blk.coeffs[10], blk.coeffs[11], blk.coeffs[14], blk.coeffs[15]];
+
+ let idx0 = get_subblock_index(&sblk0);
+ let idx1 = get_subblock_index(&sblk1);
+ let idx2 = get_subblock_index(&sblk2);
+ let idx3 = get_subblock_index(&sblk3);
+
+ let mut cflags = idx0;
+ cflags = (cflags << 1) | ((idx1 != 0) as u16);
+ cflags = (cflags << 1) | ((idx2 != 0) as u16);
+ cflags = (cflags << 1) | ((idx3 != 0) as u16);
+
+ self.cset.pat0[subset_idx].write(bw, cflags);
+
+ if matches!(idx0, 0 | 27 | 54 | 81) { // only first coefficient is set
+ write_single_coeff(bw, &self.coeffs, sblk0[0], 3);
+ } else {
+ write_coeffs(bw, &self.coeffs, &sblk0);
+ }
+ if idx1 != 0 {
+ self.cset.pat1[!luma as usize].write(bw, idx1);
+ write_coeffs(bw, &self.coeffs, &sblk1);
+ }
+ if idx2 != 0 {
+ self.cset.pat1[!luma as usize].write(bw, idx2);
+ write_coeffs(bw, &self.coeffs, &sblk2);
+ }
+ if idx3 != 0 {
+ self.cset.pat2[!luma as usize].write(bw, idx3);
+ write_coeffs(bw, &self.coeffs, &sblk3);
+ }
+ }
+}
+
+fn write_coeffs(bw: &mut BitWriter, coeffs: &RV34CodeReader, blk: &[i16; 4]) {
+ for (&val, &limit) in blk.iter().zip([3i16, 2, 2, 2].iter()) {
+ write_single_coeff(bw, coeffs, val, limit);
+ }
+}
+
+fn write_single_coeff(bw: &mut BitWriter, coeffs: &RV34CodeReader, val: i16, limit: i16) {
+ if val != 0 {
+ if val.abs() >= limit {
+ let mut val = (val.abs() - limit) as u16;
+ if val > 23 {
+ val -= 22;
+ let bits = (15 - val.leading_zeros()) as u16;
+ coeffs.write(bw, bits + 23);
+ bw.write(u32::from(val - (1 << bits)), bits as u8);
+ } else {
+ coeffs.write(bw, val);
+ }
+ }
+ if val > 0 {
+ bw.write0();
+ } else {
+ bw.write1();
+ }
+ }
+}
+
+pub struct CodeSets {
+ super_idx: usize,
+ set_idx: usize,
+ intra: bool,
+ is16: bool,
+ is_p16: bool,
+
+ iset: Vec<FullSet>,
+ pset: Vec<FullSet>,
+}
+
+impl CodeSets {
+ pub fn new() -> Self {
+ let mut iset: Vec<FullSet> = Vec::with_capacity(5);
+ for set in 0..5 { iset.push(FullSet::new(true, set)); }
+ let mut pset: Vec<FullSet> = Vec::with_capacity(7);
+ for set in 0..7 { pset.push(FullSet::new(false, set)); }
+
+ Self {
+ iset, pset,
+ super_idx: 0,
+ set_idx: 0,
+ intra: false,
+ is16: false,
+ is_p16: false,
+ }
+ }
+ pub fn init(&mut self, quant: usize, subset: usize) {
+ let mut idx = quant as usize;
+ if (subset == 2) && (idx < 19) {
+ idx += 10;
+ } else if (subset != 0) && (idx < 26) {
+ idx += 5;
+ }
+ if idx > 30 {
+ idx = 30;
+ }
+ self.super_idx = idx;
+ }
+ pub fn set_params(&mut self, mbtype: &MacroblockType) {
+ self.is_p16 = matches!(*mbtype, MacroblockType::InterMix(_));
+ self.intra = mbtype.is_intra() || self.is_p16;
+ self.is16 = mbtype.is_16();
+ self.set_idx = if self.intra {
+ RV34_SET_IDX_INTRA[self.super_idx]
+ } else {
+ RV34_SET_IDX_INTER[self.super_idx]
+ };
+ }
+ fn write_cbp(&self, bw: &mut BitWriter, coded_pat: [bool; 24], cbp_code: &CBPSet) {
+ let mut cbp_pat = 0u16;
+ for i in 16..20 {
+ cbp_pat = cbp_pat * 3 + (coded_pat[i] as u16) + (coded_pat[i + 4] as u16);
+ }
+ let mut nnz = 0usize;
+ for blk4 in coded_pat[..16].chunks(4) {
+ let cur_nz = blk4.contains(&true);
+ if cur_nz {
+ nnz += 1;
+ }
+ cbp_pat = cbp_pat * 2 + (cur_nz as u16);
+ }
+ nnz = nnz.saturating_sub(1);
+
+ cbp_code.cbp_pattern.write(bw, cbp_pat);
+ for blk4 in coded_pat[..16].chunks(4) {
+ let pat = (blk4[3] as u16) * 32 + (blk4[2] as u16) * 16 + (blk4[1] as u16) * 2 + (blk4[0] as u16);
+ if pat != 0 {
+ cbp_code.cbp[nnz].write(bw, pat);
+ }
+ }
+ for i in 16..20 {
+ if coded_pat[i] ^ coded_pat[i + 4] {
+ if coded_pat[i] {
+ bw.write1();
+ } else {
+ bw.write0();
+ }
+ }
+ }
+ }
+ pub fn write_coeffs(&mut self, bw: &mut BitWriter, coeffs: &[Block; 25]) {
+ let mut fset = if self.intra { &self.iset[self.set_idx] } else { &self.pset[self.set_idx] };
+
+ const CODED_ORDER: [usize; 24] = [0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23];
+ let cbp_code = &fset.cbp[if self.is16 { 1 } else { 0 }];
+ let mut coded_blk = [false; 24];
+ let mut coded_pat = [false; 24];
+ for (i, ((cpat, cblk), &seq)) in coded_pat.iter_mut().zip(coded_blk.iter_mut())
+ .zip(CODED_ORDER.iter()).enumerate() {
+ *cpat = !coeffs[seq].is_empty();
+ *cblk = !coeffs[i].is_empty();
+ }
+ self.write_cbp(bw, coded_pat, cbp_code);
+
+ if self.is16 {
+ fset.write_block(bw, &coeffs[24], 3, true);
+ }
+ let (luma_set, chroma_set) = if self.intra {
+ (if self.is16 { 2 } else { 1 }, if !self.is_p16 { 0 } else { 1 })
+ } else {
+ (0, 1)
+ };
+ let mut citer = coded_blk.iter();
+ for blk in coeffs[..16].iter() {
+ if let Some(true) = citer.next() {
+ fset.write_block(bw, blk, luma_set, true);
+ }
+ }
+ if self.is_p16 {
+ self.set_idx = RV34_SET_IDX_INTER[self.super_idx];
+ fset = &self.pset[self.set_idx];
+ }
+ for blk in coeffs[16..24].iter() {
+ if let Some(true) = citer.next() {
+ fset.write_block(bw, blk, chroma_set, false);
+ }
+ }
+ }
+}
+
+fn get_subblock_index(blk: &[i16; 4]) -> u16 {
+ let mut idx = blk[0].abs().min(3) as u16;
+ idx = idx * 3 + (blk[1].abs().min(2) as u16);
+ idx = idx * 3 + (blk[2].abs().min(2) as u16);
+ idx = idx * 3 + (blk[3].abs().min(2) as u16);
+ idx
+}
--- /dev/null
+use super::super::types::Block;
+use super::clip8;
+
+pub trait BlockOps {
+ fn from_diff(&mut self, new: &[u8], old: &[u8], stride: usize);
+ fn add_to(&self, dst: &mut [u8], stride: usize);
+ fn quant_dcs(&mut self, q_dc: usize, q_ac: usize);
+ fn quant(&mut self, q_dc: usize, q_ac: usize);
+ fn dequant_dcs(&mut self, q_dc: usize, q_ac: usize);
+ fn dequant(&mut self, q_dc: usize, q_ac: usize);
+ fn transform_4x4(&mut self);
+ fn transform_dcs(&mut self);
+ fn itransform_4x4(&mut self);
+ fn itransform_dcs(&mut self);
+}
+
+macro_rules! tx {
+ ($a:expr, $b:expr, $c:expr, $d:expr, $o0:expr, $o1:expr, $o2:expr, $o3:expr) => {
+ let t0 = $a + $d;
+ let t1 = $a - $d;
+ let t2 = $b + $c;
+ let t3 = $b - $c;
+ $o0 = 13 * (t0 + t2);
+ $o2 = 13 * (t0 - t2);
+ $o1 = 17 * t1 + 7 * t3;
+ $o3 = 7 * t1 - 17 * t3;
+ }
+}
+
+macro_rules! itx {
+ ($a:expr, $b:expr, $c:expr, $d:expr, $bias:expr) => {
+ let t0 = 13 * ($a + $c) + $bias;
+ let t1 = 13 * ($a - $c) + $bias;
+ let t2 = 7 * $b - 17 * $d;
+ let t3 = 17 * $b + 7 * $d;
+ $a = t0 + t3;
+ $d = t0 - t3;
+ $b = t1 + t2;
+ $c = t1 - t2;
+ }
+}
+
+impl BlockOps for Block {
+ fn from_diff(&mut self, new: &[u8], old: &[u8], stride: usize) {
+ for (dline, (oline, nline)) in self.coeffs.chunks_mut(4).zip(old.chunks(stride).zip(new.chunks(stride))) {
+ for (dst, (&o, &n)) in dline.iter_mut().zip(oline.iter().zip(nline.iter())) {
+ *dst = i16::from(n) - i16::from(o);
+ }
+ }
+ }
+ fn add_to(&self, dst: &mut [u8], stride: usize) {
+ for (line, row) in dst.chunks_mut(stride).zip(self.coeffs.chunks(4)) {
+ for (dst, &add) in line.iter_mut().zip(row.iter()) {
+ *dst = clip8(i16::from(*dst) + add);
+ }
+ }
+ }
+ fn quant_dcs(&mut self, q_dc: usize, q_ac: usize) {
+ let q_dc = i32::from(RV34_QUANT_TAB[q_dc]);
+ let q_ac = i32::from(RV34_QUANT_TAB[q_ac]);
+ for (i, el) in self.coeffs.iter_mut().enumerate() {
+ if *el != 0 {
+ let q = if matches!(i, 0 | 1 | 4) { q_dc } else { q_ac };
+ *el = (i32::from(*el) * 16 / q).max(-511).min(511) as i16;
+ }
+ }
+ }
+ fn quant(&mut self, q_dc: usize, q_ac: usize) {
+ let q_dc = RV34_QUANT_TAB[q_dc];
+ let q_ac = RV34_QUANT_TAB[q_ac];
+ if self.coeffs[0] != 0 {
+ self.coeffs[0] = self.coeffs[0] * 16 / q_dc;
+ }
+ for el in self.coeffs.iter_mut().skip(1) {
+ if *el != 0 {
+ *el = *el * 16 / q_ac;
+ }
+ }
+ }
+ fn dequant_dcs(&mut self, q_dc: usize, q_ac: usize) {
+ let q_dc = i32::from(RV34_QUANT_TAB[q_dc]);
+ let q_ac = i32::from(RV34_QUANT_TAB[q_ac]);
+ for (i, el) in self.coeffs.iter_mut().enumerate() {
+ if *el != 0 {
+ let q = if matches!(i, 0 | 1 | 4) { q_dc } else { q_ac };
+ *el = ((i32::from(*el) * q + 8) >> 4) as i16;
+ }
+ }
+ }
+ fn dequant(&mut self, q_dc: usize, q_ac: usize) {
+ let q_ac = i32::from(RV34_QUANT_TAB[q_ac]);
+ if self.coeffs[0] != 0 {
+ let q_dc = i32::from(RV34_QUANT_TAB[q_dc]);
+ self.coeffs[0] = ((i32::from(self.coeffs[0]) * q_dc + 8) >> 4) as i16;
+ }
+ for el in self.coeffs.iter_mut().skip(1) {
+ if *el != 0 {
+ *el = ((i32::from(*el) * q_ac + 8) >> 4) as i16;
+ }
+ }
+ }
+ fn transform_4x4(&mut self) {
+ let mut tmp = [0; 16];
+ for (drow, srow) in tmp.chunks_mut(4).zip(self.coeffs.chunks(4)) {
+ tx!(i32::from(srow[0]), i32::from(srow[1]), i32::from(srow[2]), i32::from(srow[3]),
+ drow[0], drow[1], drow[2], drow[3]);
+ }
+ for i in 0..4 {
+ tx!(tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12],
+ tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12]);
+ }
+ for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) {
+ *dst = ((src + 223) / 446) as i16;
+ }
+ }
+ fn transform_dcs(&mut self) {
+ let mut tmp = [0; 16];
+ for (drow, srow) in tmp.chunks_mut(4).zip(self.coeffs.chunks(4)) {
+ tx!(i32::from(srow[0]), i32::from(srow[1]), i32::from(srow[2]), i32::from(srow[3]),
+ drow[0], drow[1], drow[2], drow[3]);
+ }
+ for i in 0..4 {
+ tx!(tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12],
+ tmp[i], tmp[i + 4], tmp[i + 8], tmp[i + 12]);
+ }
+ for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) {
+ *dst = ((src + 334) / 669) as i16;
+ }
+ }
+ fn itransform_4x4(&mut self) {
+ let mut tmp: [i32; 16] = [0; 16];
+ for (dst, &src) in tmp.iter_mut().zip(self.coeffs.iter()) {
+ *dst = i32::from(src);
+ }
+ for row in tmp.chunks_mut(4) {
+ itx!(row[0], row[1], row[2], row[3], 0);
+ }
+ for i in 0..4 {
+ itx!(tmp[i], tmp[i + 4], tmp[i + 2 * 4], tmp[i + 3 * 4], 0x200);
+ }
+ for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) {
+ *dst = (src >> 10) as i16;
+ }
+ }
+ fn itransform_dcs(&mut self) {
+ let mut tmp: [i32; 16] = [0; 16];
+ for (dst, &src) in tmp.iter_mut().zip(self.coeffs.iter()) {
+ *dst = i32::from(src);
+ }
+ for row in tmp.chunks_mut(4) {
+ itx!(row[0], row[1], row[2], row[3], 0);
+ }
+ for i in 0..4 {
+ itx!(tmp[i], tmp[i + 4], tmp[i + 2 * 4], tmp[i + 3 * 4], 0);
+ }
+ for (dst, &src) in self.coeffs.iter_mut().zip(tmp.iter()) {
+ *dst = ((src * 3) >> 11) as i16;
+ }
+ }
+}
+
+const RV34_QUANT_TAB: [i16; 32] = [
+ 60, 67, 76, 85, 96, 108, 121, 136,
+ 152, 171, 192, 216, 242, 272, 305, 341,
+ 383, 432, 481, 544, 606, 683, 767, 854,
+ 963, 1074, 1212, 1392, 1566, 1708, 1978, 2211
+];
--- /dev/null
+use super::super::types::{PredType8x8, PredType4x4};
+use super::RefMBData;
+
+#[derive(Default)]
+pub struct IntraPred16x16 {
+ pub top: [u8; 17],
+ pub left: [u8; 17],
+}
+
+impl IntraPred16x16 {
+ pub fn new() -> Self { Self::default() }
+ #[allow(clippy::many_single_char_names)]
+ pub fn apply16(&self, mode: PredType8x8, dst: &mut [u8], stride: usize) {
+ match mode {
+ PredType8x8::DC => {
+ let sumt = self.top[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let suml = self.left[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let dc = ((sumt + suml + 16) >> 5) as u8;
+ for line in dst.chunks_mut(stride).take(16) {
+ for dst in line[..16].iter_mut() {
+ *dst = dc;
+ }
+ }
+ },
+ PredType8x8::Hor => {
+ for (&left, line) in self.left[1..].iter().zip(dst.chunks_mut(stride)) {
+ for dst in line[..16].iter_mut() {
+ *dst = left;
+ }
+ }
+ },
+ PredType8x8::Ver => {
+ for line in dst.chunks_mut(stride).take(16) {
+ line[..16].copy_from_slice(&self.top[1..]);
+ }
+ },
+ PredType8x8::Plane => {
+ let top0 = &self.top[9..];
+ let top1 = &self.top[..8];
+ let h = top0.iter().zip(top1.iter().rev()).enumerate().fold(
+ 0i32, |acc, (k, (&a, &b))| acc + ((k + 1) as i32) * (i32::from(a) - i32::from(b)));
+ let left0 = &self.left[9..];
+ let left1 = &self.left[..8];
+ let v = left0.iter().zip(left1.iter().rev()).enumerate().fold(
+ 0i32, |acc, (k, (&a, &b))| acc + ((k + 1) as i32) * (i32::from(a) - i32::from(b)));
+ let b = (h + (h >> 2)) >> 4;
+ let c = (v + (v >> 2)) >> 4;
+ let mut a = 16 * (i32::from(self.left[16]) + i32::from(self.top[16])) + 16 - 7 * (b + c);
+
+ for line in dst.chunks_mut(stride).take(16) {
+ let mut oval = a;
+ for el in line[..16].iter_mut() {
+ *el = (oval >> 5).max(0).min(255) as u8;
+ oval += b;
+ }
+ a += c;
+ }
+ },
+ PredType8x8::LeftDC => {
+ let dc = ((self.left[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 8) >> 4) as u8;
+ for line in dst.chunks_mut(stride).take(16) {
+ for dst in line[..16].iter_mut() {
+ *dst = dc;
+ }
+ }
+ },
+ PredType8x8::TopDC => {
+ let dc = ((self.top[1..].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 8) >> 4) as u8;
+ for line in dst.chunks_mut(stride).take(16) {
+ for dst in line[..16].iter_mut() {
+ *dst = dc;
+ }
+ }
+ },
+ PredType8x8::DC128 => {
+ for line in dst.chunks_mut(stride).take(16) {
+ for dst in line[..16].iter_mut() {
+ *dst = 128;
+ }
+ }
+ },
+ }
+ }
+ pub fn apply8(&self, mode: PredType8x8, dst: &mut [u8], stride: usize) {
+ match mode {
+ PredType8x8::DC | PredType8x8::Plane => {
+ let sumt = self.top[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let suml = self.left[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let dc = ((sumt + suml + 8) >> 4) as u8;
+ for line in dst.chunks_mut(stride).take(8) {
+ for dst in line[..8].iter_mut() {
+ *dst = dc;
+ }
+ }
+ },
+ PredType8x8::Hor => {
+ for (&left, line) in self.left[1..9].iter().zip(dst.chunks_mut(stride)) {
+ for dst in line[..8].iter_mut() {
+ *dst = left;
+ }
+ }
+ },
+ PredType8x8::Ver => {
+ for line in dst.chunks_mut(stride).take(8) {
+ line[..8].copy_from_slice(&self.top[1..9]);
+ }
+ },
+ PredType8x8::LeftDC => {
+ let dc = ((self.left[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 4) >> 3) as u8;
+ for line in dst.chunks_mut(stride).take(8) {
+ for dst in line[..8].iter_mut() {
+ *dst = dc;
+ }
+ }
+ },
+ PredType8x8::TopDC => {
+ let dc = ((self.top[1..9].iter().fold(0u32, |acc, &x| acc + u32::from(x)) + 4) >> 3) as u8;
+ for line in dst.chunks_mut(stride).take(8) {
+ for dst in line[..8].iter_mut() {
+ *dst = dc;
+ }
+ }
+ },
+ PredType8x8::DC128 => {
+ for line in dst.chunks_mut(stride).take(8) {
+ for dst in line[..8].iter_mut() {
+ *dst = 128;
+ }
+ }
+ },
+ }
+ }
+}
+
+#[derive(Default)]
+pub struct Intra4Pred {
+ pub top: [u8; 8],
+ pub left: [u8; 8],
+ pub tl: u8,
+}
+
+impl Intra4Pred {
+ pub fn new() -> Self { Self::default() }
+ fn load_left(&self) -> [u16; 8] {
+ let mut ret = [0; 8];
+ for (dst, &src) in ret.iter_mut().zip(self.left.iter()) {
+ *dst = u16::from(src);
+ }
+ ret
+ }
+ fn load_top(&self) -> [u16; 8] {
+ let mut ret = [0; 8];
+ for (dst, &src) in ret.iter_mut().zip(self.top.iter()) {
+ *dst = u16::from(src);
+ }
+ ret
+ }
+ fn load_left_and_top(&self) -> ([u16; 5], [u16; 5]) {
+ let mut left = [0; 5];
+ let mut top = [0; 5];
+ left[0] = u16::from(self.tl);
+ top[0] = u16::from(self.tl);
+ for (dst, &src) in left[1..].iter_mut().zip(self.left.iter()) {
+ *dst = u16::from(src);
+ }
+ for (dst, &src) in top[1..].iter_mut().zip(self.top.iter()) {
+ *dst = u16::from(src);
+ }
+ (left, top)
+ }
+ #[allow(clippy::many_single_char_names)]
+ pub fn apply(&self, ptype: PredType4x4, buf: &mut [u8], stride: usize) {
+ match ptype {
+ PredType4x4::DC => {
+ let dc_l = self.left[..4].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let dc_t = self.top [..4].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let dc = ((dc_t + dc_l + 4) >> 3) as u8;
+ for line in buf.chunks_mut(stride).take(4) {
+ for el in line[..4].iter_mut() {
+ *el = dc;
+ }
+ }
+ },
+ PredType4x4::LeftDC => {
+ let dc_l = self.left[..4].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let dc = ((dc_l + 2) >> 2) as u8;
+ for line in buf.chunks_mut(stride).take(4) {
+ for el in line[..4].iter_mut() {
+ *el = dc;
+ }
+ }
+ },
+ PredType4x4::TopDC => {
+ let dc_t = self.top [..4].iter().fold(0u32, |acc, &x| acc + u32::from(x));
+ let dc = ((dc_t + 2) >> 2) as u8;
+ for line in buf.chunks_mut(stride).take(4) {
+ for el in line[..4].iter_mut() {
+ *el = dc;
+ }
+ }
+ },
+ PredType4x4::DC128 => {
+ for line in buf.chunks_mut(stride).take(4) {
+ for el in line[..4].iter_mut() {
+ *el = 128;
+ }
+ }
+ },
+ PredType4x4::Ver => {
+ for line in buf.chunks_mut(stride).take(4) {
+ line[..4].copy_from_slice(&self.top[..4]);
+ }
+ },
+ PredType4x4::Hor => {
+ for (&left, line) in self.left[..4].iter().zip(buf.chunks_mut(stride)) {
+ for dst in line[..4].iter_mut() {
+ *dst = left;
+ }
+ }
+ },
+ PredType4x4::DiagDownLeft => {
+ let l = self.load_left();
+ let t = self.load_top();
+ buf[0] = ((t[0] + t[2] + 2*t[1] + 2 + l[0] + l[2] + 2*l[1] + 2) >> 3) as u8;
+ let pix = ((t[1] + t[3] + 2*t[2] + 2 + l[1] + l[3] + 2*l[2] + 2) >> 3) as u8;
+ buf[1] = pix;
+ buf[stride] = pix;
+ let pix = ((t[2] + t[4] + 2*t[3] + 2 + l[2] + l[4] + 2*l[3] + 2) >> 3) as u8;
+ buf[2] = pix;
+ buf[1 + stride] = pix;
+ buf[2 * stride] = pix;
+ let pix = ((t[3] + t[5] + 2*t[4] + 2 + l[3] + l[5] + 2*l[4] + 2) >> 3) as u8;
+ buf[3] = pix;
+ buf[2 + stride] = pix;
+ buf[1 + 2 * stride] = pix;
+ buf[ 3 * stride] = pix;
+ let pix = ((t[4] + t[6] + 2*t[5] + 2 + l[4] + l[6] + 2*l[5] + 2) >> 3) as u8;
+ buf[3 + stride] = pix;
+ buf[2 + 2 * stride] = pix;
+ buf[1 + 3 * stride] = pix;
+ let pix = ((t[5] + t[7] + 2*t[6] + 2 + l[5] + l[7] + 2*l[6] + 2) >> 3) as u8;
+ buf[3 + 2 * stride] = pix;
+ buf[2 + 3 * stride] = pix;
+ buf[3 + 3 * stride] = ((t[6] + t[7] + 1 + l[6] + l[7] + 1) >> 2) as u8;
+ },
+ PredType4x4::DiagDownRight => {
+ let (l, t) = self.load_left_and_top();
+ for (j, line) in buf.chunks_mut(stride).take(4).enumerate() {
+ for i in 0..j {
+ line[i] = ((l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2) as u8;
+ }
+ line[j] = ((l[1] + 2 * l[0] + t[1] + 2) >> 2) as u8;
+ for i in (j + 1)..4 {
+ line[i] = ((t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2) as u8;
+ }
+ }
+ },
+ PredType4x4::VerRight => {
+ let (l, t) = self.load_left_and_top();
+ for (j, line) in buf.chunks_mut(stride).take(4).enumerate() {
+ for (i, pix) in line[..4].iter_mut().enumerate() {
+ let zvr = ((2 * i) as i8) - (j as i8);
+ *pix = if zvr >= 0 {
+ if (zvr & 1) == 0 {
+ (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1
+ } else {
+ (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2
+ }
+ } else {
+ if zvr == -1 {
+ (l[1] + 2 * l[0] + t[1] + 2) >> 2
+ } else {
+ (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2
+ }
+ } as u8;
+ }
+ }
+ },
+ PredType4x4::HorDown => {
+ let (l, t) = self.load_left_and_top();
+ for (j, line) in buf.chunks_mut(stride).take(4).enumerate() {
+ for (i, pix) in line[..4].iter_mut().enumerate() {
+ let zhd = ((2 * j) as i8) - (i as i8);
+ *pix = if zhd >= 0 {
+ if (zhd & 1) == 0 {
+ (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1
+ } else {
+ (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2
+ }
+ } else {
+ if zhd == -1 {
+ (l[1] + 2 * l[0] + t[1] + 2) >> 2
+ } else {
+ (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2
+ }
+ } as u8;
+ }
+ }
+ },
+ PredType4x4::VerLeft => {
+ let l = self.load_left();
+ let t = self.load_top();
+ buf[0] = ((2*t[0] + 2*t[1] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8;
+ let pix = ((t[1] + t[2] + 1) >> 1) as u8;
+ buf[1] = pix;
+ buf[2 * stride] = pix;
+ let pix = ((t[2] + t[3] + 1) >> 1) as u8;
+ buf[2] = pix;
+ buf[1 + 2 * stride] = pix;
+ let pix = ((t[3] + t[4] + 1) >> 1) as u8;
+ buf[3] = pix;
+ buf[2 + 2 * stride] = pix;
+ buf[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8;
+ buf[ stride] = ((t[0] + 2*t[1] + t[2] + l[2] + 2*l[3] + l[4] + 4) >> 3) as u8;
+ let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8;
+ buf[1 + stride] = pix;
+ buf[ 3 * stride] = pix;
+ let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8;
+ buf[2 + stride] = pix;
+ buf[1 + 3 * stride] = pix;
+ let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8;
+ buf[3 + stride] = pix;
+ buf[2 + 3 * stride] = pix;
+ buf[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8;
+ },
+ PredType4x4::HorUp => {
+ let l = self.load_left();
+ let t = self.load_top();
+ buf[0] = ((t[1] + 2*t[2] + t[3] + 2*l[0] + 2*l[1] + 4) >> 3) as u8;
+ buf[1] = ((t[2] + 2*t[3] + t[4] + l[0] + 2*l[1] + l[2] + 4) >> 3) as u8;
+ let pix = ((t[3] + 2*t[4] + t[5] + 2*l[1] + 2*l[2] + 4) >> 3) as u8;
+ buf[2] = pix;
+ buf[ stride] = pix;
+ let pix = ((t[4] + 2*t[5] + t[6] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8;
+ buf[3] = pix;
+ buf[1 + stride] = pix;
+ let pix = ((t[5] + 2*t[6] + t[7] + 2*l[2] + 2*l[3] + 4) >> 3) as u8;
+ buf[2 + stride] = pix;
+ buf[0 + 2 * stride] = pix;
+ let pix = ((t[6] + 3*t[7] + l[2] + 3*l[3] + 4) >> 3) as u8;
+ buf[3 + stride] = pix;
+ buf[1 + 2 * stride] = pix;
+ let pix = ((l[3] + 2*l[4] + l[5] + 2) >> 2) as u8;
+ buf[3 + 2 * stride] = pix;
+ buf[1 + 3 * stride] = pix;
+ let pix = ((t[6] + t[7] + l[3] + l[4] + 2) >> 2) as u8;
+ buf[0 + 3 * stride] = pix;
+ buf[2 + 2 * stride] = pix;
+ buf[2 + 3 * stride] = ((l[4] + l[5] + 1) >> 1) as u8;
+ buf[3 + 3 * stride] = ((l[4] + 2*l[5] + l[6] + 2) >> 2) as u8;
+ },
+ PredType4x4::DiagDownLeftNoDown => {
+ let l = self.load_left();
+ let t = self.load_top();
+ buf[0] = ((t[0] + t[2] + 2*t[1] + 2 + l[0] + l[2] + 2*l[1] + 2) >> 3) as u8;
+ let pix = ((t[1] + t[3] + 2*t[2] + 2 + l[1] + l[3] + 2*l[2] + 2) >> 3) as u8;
+ buf[1] = pix;
+ buf[0 + stride] = pix;
+ let pix = ((t[2] + t[4] + 2*t[3] + 2 + l[2] + 3*l[3] + 2) >> 3) as u8;
+ buf[2] = pix;
+ buf[1 + stride] = pix;
+ buf[0 + 2 * stride] = pix;
+ let pix = ((t[3] + t[5] + 2*t[4] + 2 + l[3]*4 + 2) >> 3) as u8;
+ buf[3] = pix;
+ buf[2 + stride] = pix;
+ buf[1 + 2 * stride] = pix;
+ buf[0 + 3 * stride] = pix;
+ let pix = ((t[4] + t[6] + 2*t[5] + 2 + l[3]*4 + 2) >> 3) as u8;
+ buf[3 + stride] = pix;
+ buf[2 + 2 * stride] = pix;
+ buf[1 + 3 * stride] = pix;
+ let pix = ((t[5] + t[7] + 2*t[6] + 2 + l[3]*4 + 2) >> 3) as u8;
+ buf[3 + 2 * stride] = pix;
+ buf[2 + 3 * stride] = pix;
+ buf[3 + 3 * stride] = ((t[6] + t[7] + 1 + 2*l[3] + 1) >> 2) as u8;
+ },
+ PredType4x4::HorUpNoDown => {
+ let l = self.load_left();
+ let t = self.load_top();
+ buf[0] = ((t[1] + 2*t[2] + t[3] + 2*l[0] + 2*l[1] + 4) >> 3) as u8;
+ buf[1] = ((t[2] + 2*t[3] + t[4] + l[0] + 2*l[1] + l[2] + 4) >> 3) as u8;
+ let pix = ((t[3] + 2*t[4] + t[5] + 2*l[1] + 2*l[2] + 4) >> 3) as u8;
+ buf[2] = pix;
+ buf[ stride] = pix;
+ let pix = ((t[4] + 2*t[5] + t[6] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8;
+ buf[3] = pix;
+ buf[1 + stride] = pix;
+ let pix = ((t[5] + 2*t[6] + t[7] + 2*l[2] + 2*l[3] + 4) >> 3) as u8;
+ buf[2 + stride] = pix;
+ buf[ 2 * stride] = pix;
+ let pix = ((t[6] + 3*t[7] + l[2] + 3*l[3] + 4) >> 3) as u8;
+ buf[3 + stride] = pix;
+ buf[1 + 2 * stride] = pix;
+ buf[3 + 2 * stride] = l[3] as u8;
+ buf[1 + 3 * stride] = l[3] as u8;
+ let pix = ((t[6] + t[7] + 2*l[3] + 2) >> 2) as u8;
+ buf[0 + 3 * stride] = pix;
+ buf[2 + 2 * stride] = pix;
+ buf[2 + 3 * stride] = l[3] as u8;
+ buf[3 + 3 * stride] = l[3] as u8;
+ },
+ PredType4x4::VerLeftNoDown => {
+ let l = [u16::from(self.left[0]), u16::from(self.left[1]), u16::from(self.left[2]), u16::from(self.left[3]), u16::from(self.left[3])];
+ let t = self.load_top();
+ buf[0] = ((2*t[0] + 2*t[1] + l[1] + 2*l[2] + l[3] + 4) >> 3) as u8;
+ let pix = ((t[1] + t[2] + 1) >> 1) as u8;
+ buf[1] = pix;
+ buf[ 2 * stride] = pix;
+ let pix = ((t[2] + t[3] + 1) >> 1) as u8;
+ buf[2] = pix;
+ buf[1 + 2 * stride] = pix;
+ let pix = ((t[3] + t[4] + 1) >> 1) as u8;
+ buf[3] = pix;
+ buf[2 + 2 * stride] = pix;
+ buf[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8;
+ buf[ stride] = ((t[0] + 2*t[1] + t[2] + l[2] + 2*l[3] + l[4] + 4) >> 3) as u8;
+ let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8;
+ buf[1 + stride] = pix;
+ buf[ 3 * stride] = pix;
+ let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8;
+ buf[2 + stride] = pix;
+ buf[1 + 3 * stride] = pix;
+ let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8;
+ buf[3 + stride] = pix;
+ buf[2 + 3 * stride] = pix;
+ buf[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8;
+ },
+ }
+ }
+}
+
+pub struct BlockIntra4Pred {
+ pub ipred_y: Intra4Pred,
+ pub ipred_u: Intra4Pred,
+ pub ipred_v: Intra4Pred,
+ pub top_y: [u8; 21],
+ pub top_u: [u8; 13],
+ pub top_v: [u8; 13],
+ pub left_y: [u8; 16],
+ pub left_u: [u8; 8],
+ pub left_v: [u8; 8],
+ pub has_l: bool,
+}
+
+impl BlockIntra4Pred {
+ pub fn new(src_y: &IntraPred16x16, src_u: &IntraPred16x16, src_v: &IntraPred16x16, tr_y: [u8; 4], tr_u: [u8; 4], tr_v: [u8; 4], has_l: bool) -> Self {
+ let mut top_y = [0; 21];
+ top_y[..17].copy_from_slice(&src_y.top);
+ top_y[17..].copy_from_slice(&tr_y);
+ let mut top_u = [0; 13];
+ top_u[..9].copy_from_slice(&src_u.top[..9]);
+ top_u[9..].copy_from_slice(&tr_u);
+ let mut top_v = [0; 13];
+ top_v[..9].copy_from_slice(&src_v.top[..9]);
+ top_v[9..].copy_from_slice(&tr_v);
+ let mut left_y = [0; 16];
+ left_y.copy_from_slice(&src_y.left[1..]);
+ let mut left_u = [0; 8];
+ left_u.copy_from_slice(&src_u.left[1..9]);
+ let mut left_v = [0; 8];
+ left_v.copy_from_slice(&src_v.left[1..9]);
+ Self {
+ ipred_y: Intra4Pred::new(),
+ ipred_u: Intra4Pred::new(),
+ ipred_v: Intra4Pred::new(),
+ top_y, top_u, top_v, left_y, left_u, left_v,
+ has_l,
+ }
+ }
+ pub fn pred_block(&mut self, dst: &mut RefMBData, x: usize, y: usize, mode: PredType4x4) {
+ let do_chroma = ((x & 1) == 0) && ((y & 1) == 0);
+ if x == 0 {
+ self.ipred_y.tl = if y == 0 { self.top_y[0] } else { self.left_y[y * 4 - 1] };
+ if y != 3 {
+ self.ipred_y.left.copy_from_slice(&self.left_y[y * 4..][..8]);
+ } else {
+ self.ipred_y.left[..4].copy_from_slice(&self.left_y[12..]);
+ }
+ if y == 0 {
+ self.ipred_u.tl = self.top_u[0];
+ self.ipred_v.tl = self.top_v[0];
+ self.ipred_u.left.copy_from_slice(&self.left_u);
+ self.ipred_v.left.copy_from_slice(&self.left_v);
+ } else if y == 2 {
+ self.ipred_u.tl = self.left_u[3];
+ self.ipred_v.tl = self.left_v[3];
+ self.ipred_u.left[..4].copy_from_slice(&self.left_u[4..]);
+ self.ipred_v.left[..4].copy_from_slice(&self.left_v[4..]);
+ }
+ }
+ self.ipred_y.top.copy_from_slice(&self.top_y[x * 4 + 1..][..8]);
+ if do_chroma {
+ if x == 0 {
+ self.ipred_u.top.copy_from_slice(&self.top_u[1..9]);
+ self.ipred_v.top.copy_from_slice(&self.top_v[1..9]);
+ } else if x == 2 {
+ self.ipred_u.top.copy_from_slice(&self.top_u[5..]);
+ self.ipred_v.top.copy_from_slice(&self.top_v[5..]);
+ }
+ }
+
+ self.ipred_y.apply(mode, &mut dst.y[x * 4 + y * 4 * 16..], 16);
+ if do_chroma {
+ let has_ld = if (x == 0) && (y == 0) { self.has_l } else { false };
+ let off = x * 2 + y * 2 * 8;
+ let cmode = match mode {
+ PredType4x4::DiagDownLeft if !has_ld => PredType4x4::DiagDownLeftNoDown,
+ PredType4x4::VerLeft if !has_ld => PredType4x4::VerLeftNoDown,
+ PredType4x4::HorUp if !has_ld => PredType4x4::HorUpNoDown,
+ _ => mode,
+ };
+ self.ipred_u.apply(cmode, &mut dst.u[off..], 8);
+ self.ipred_v.apply(cmode, &mut dst.v[off..], 8);
+ }
+ }
+ pub fn update_from(&mut self, src: &RefMBData, x: usize, y: usize) {
+ let do_chroma = ((x & 1) == 0) && ((y & 1) == 0);
+ let y_off = x * 4 + y * 4 * 16;
+ let c_off = x * 2 + y * 2 * 8;
+
+ if x != 3 {
+ self.ipred_y.tl = self.ipred_y.top[3];
+ for (left, src) in self.ipred_y.left[..4].iter_mut().zip(src.y[y_off + 3..].chunks(16)) {
+ *left = src[0];
+ }
+ }
+ if do_chroma && x != 2 {
+ self.ipred_u.tl = self.ipred_u.top[3];
+ self.ipred_v.tl = self.ipred_v.top[3];
+ for (left, src) in self.ipred_u.left[..4].iter_mut().zip(src.u[c_off + 3..].chunks(8)) {
+ *left = src[0];
+ }
+ for (left, src) in self.ipred_v.left[..4].iter_mut().zip(src.v[c_off + 3..].chunks(8)) {
+ *left = src[0];
+ }
+ }
+ if x == 0 {
+ self.top_y[0] = self.left_y[x * 4 + 3];
+ if y == 0 {
+ self.top_u[0] = self.left_u[3];
+ self.top_v[0] = self.left_v[3];
+ }
+ }
+ self.top_y[x * 4 + 1..][..4].copy_from_slice(&src.y[y_off + 3 * 16..][..4]);
+ if x == 3 {
+ let (head, tail) = self.top_y.split_at_mut(17);
+ for el in tail.iter_mut() {
+ *el = head[16];
+ }
+ }
+ if do_chroma && y != 2 {
+ self.top_u[x * 2 + 1..][..4].copy_from_slice(&src.u[c_off + 3 * 8..][..4]);
+ self.top_v[x * 2 + 1..][..4].copy_from_slice(&src.v[c_off + 3 * 8..][..4]);
+ if x == 2 {
+ for i in 9..13 {
+ self.top_u[i] = self.top_u[8];
+ self.top_v[i] = self.top_v[8];
+ }
+ }
+ }
+ }
+}
--- /dev/null
+use nihav_core::frame::NASimpleVideoFrame;
+use super::super::types::DeblockInfo;
+use super::clip8;
+
+const Y_TOP_ROW_MASK: u32 = 0x000F;
+const Y_BOT_ROW_MASK: u32 = 0xF000;
+const Y_LEFT_COL_MASK: u32 = 0x1111;
+const Y_RIGHT_COL_MASK: u32 = 0x8888;
+const C_TOP_ROW_MASK: u8 = 0x3;
+const C_BOT_ROW_MASK: u8 = 0xC;
+const C_LEFT_COL_MASK: u8 = 0x5;
+const C_RIGHT_COL_MASK: u8 = 0xA;
+
+macro_rules! test_bit {
+ ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
+}
+
+pub fn loop_filter_frame(dst: &mut NASimpleVideoFrame<u8>, dblk: &[DeblockInfo], mb_w: usize, mb_h: usize) {
+ let small_frame = dst.width[0] * dst.height[0] <= 176 * 144;
+
+ let mut mb_pos = 0;
+ for mb_y in 0..mb_h {
+ let is_last_row = mb_y == mb_h - 1;
+ let mut left_q: usize = 0;
+ let mut left_cbp_y = 0;
+ let mut left_cbp_c = 0;
+ let mut left_dbk_y = 0;
+
+ for mb_x in 0..mb_w {
+ let q = usize::from(dblk[mb_pos].q);
+ let alpha = RV40_ALPHA_TAB[q];
+ let beta = RV40_BETA_TAB[q];
+ let beta_y = if small_frame { beta * 4 } else { beta * 3 };
+ let beta_c = beta * 3;
+
+ let is_strong = dblk[mb_pos].is_strong;
+ let top_is_strong = mb_y > 0 && dblk[mb_pos - mb_w].is_strong;
+ let left_is_strong = mb_x > 0 && dblk[mb_pos - 1].is_strong;
+ let bot_is_strong = !is_last_row && dblk[mb_pos + mb_w].is_strong;
+
+ let cur_dbk_y = dblk[mb_pos].deblock_y;
+ let cur_cbp_y = if is_strong { 0xFFFF } else { u32::from(dblk[mb_pos].cbp_y) };
+
+ let (top_cbp_y, top_dbk_y) = if mb_y > 0 {
+ (if top_is_strong { 0xFFFF } else { u32::from(dblk[mb_pos - mb_w].cbp_y) }, dblk[mb_pos - mb_w].deblock_y)
+ } else {
+ (0, 0)
+ };
+ let bot_dbk_y = if !is_last_row {
+ dblk[mb_pos + mb_w].deblock_y
+ } else {
+ 0
+ };
+
+ let y_to_deblock = (cur_dbk_y as u32) | ((bot_dbk_y as u32) << 16);
+ let mut y_h_deblock = y_to_deblock | ((cur_cbp_y << 4) & !Y_TOP_ROW_MASK) | ((top_cbp_y & Y_BOT_ROW_MASK) >> 12);
+ let mut y_v_deblock = y_to_deblock | ((cur_cbp_y << 1) & !Y_LEFT_COL_MASK) | ((left_cbp_y & Y_RIGHT_COL_MASK) >> 3);
+
+ if mb_x == 0 {
+ y_v_deblock &= !Y_LEFT_COL_MASK;
+ }
+ if mb_y == 0 {
+ y_h_deblock &= !Y_TOP_ROW_MASK;
+ }
+ if is_last_row || is_strong || bot_is_strong {
+ y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
+ }
+
+ for y in 0..4 {
+ let yoff = dst.offset[0] + mb_x * 16 + (mb_y * 16 + y * 4) * dst.stride[0];
+ for x in 0..4 {
+ let bpos = x + y * 4;
+ let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
+
+ let cur_strength: usize;
+ if is_strong {
+ cur_strength = 2;
+ } else if test_bit!(cur_dbk_y, bpos) {
+ cur_strength = 1;
+ } else {
+ cur_strength = 0;
+ }
+
+ let left_strength: usize;
+ if x > 0 {
+ if is_strong {
+ left_strength = 2;
+ } else if test_bit!(cur_dbk_y, bpos - 1) {
+ left_strength = 1;
+ } else {
+ left_strength = 0;
+ }
+ } else if mb_x > 0 {
+ if left_is_strong {
+ left_strength = 2;
+ } else if test_bit!(left_dbk_y, bpos + 3) {
+ left_strength = 1;
+ } else {
+ left_strength = 0;
+ }
+ } else {
+ left_strength = 0;
+ }
+
+ let bot_strength: usize;
+ if y < 3 {
+ if is_strong {
+ bot_strength = 2;
+ } else if test_bit!(cur_dbk_y, bpos + 4) {
+ bot_strength = 1;
+ } else {
+ bot_strength = 0;
+ }
+ } else if !is_last_row {
+ if dblk[mb_pos + mb_w].is_strong {
+ bot_strength = 2;
+ } else if test_bit!(bot_dbk_y, x) {
+ bot_strength = 1;
+ } else {
+ bot_strength = 0;
+ }
+ } else {
+ bot_strength = 0;
+ }
+
+ let top_strength: usize;
+ if y > 0 {
+ if is_strong {
+ top_strength = 2;
+ } else if test_bit!(cur_dbk_y, bpos - 4) {
+ top_strength = 1;
+ } else {
+ top_strength = 0;
+ }
+ } else if mb_y > 0 {
+ if top_is_strong {
+ top_strength = 2;
+ } else if test_bit!(top_dbk_y, bpos + 12) {
+ top_strength = 1;
+ } else {
+ top_strength = 0;
+ }
+ } else {
+ top_strength = 0;
+ }
+
+ let l_q = if x > 0 { q } else { left_q };
+ let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 };
+
+ let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
+ let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
+ let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
+ let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
+
+ let dmode = if y > 0 { x + y * 4 } else { x * 4 };
+
+ if test_bit!(y_h_deblock, bpos + 4) {
+ rv40_loop_filter4_h(dst.data, yoff + 4 * dst.stride[0] + x * 4, dst.stride[0],
+ dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
+ }
+ if test_bit!(y_v_deblock, bpos) && !ver_strong {
+ rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0],
+ dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
+ }
+ if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
+ rv40_loop_filter4_h(dst.data, yoff + x * 4, dst.stride[0],
+ dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
+ }
+ if test_bit!(y_v_deblock, bpos) && ver_strong {
+ rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0],
+ dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
+ }
+ }
+ }
+
+ let cur_cbp_c = dblk[mb_pos].cbp_c;
+ let top_cbp_c = if mb_y > 0 {
+ if top_is_strong { 0xFF } else { dblk[mb_pos - mb_w].cbp_c }
+ } else {
+ 0
+ };
+ let bot_cbp_c = if !is_last_row {
+ dblk[mb_pos + mb_w].cbp_c
+ } else {
+ 0
+ };
+ for comp in 1..3 {
+ let cshift = (comp - 1) * 4;
+ let c_cur_cbp = (cur_cbp_c >> cshift) & 0xF;
+ let c_top_cbp = (top_cbp_c >> cshift) & 0xF;
+ let c_left_cbp = (left_cbp_c >> cshift) & 0xF;
+ let c_bot_cbp = (bot_cbp_c >> cshift) & 0xF;
+
+ let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
+ let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
+ let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
+ if mb_x == 0 {
+ c_v_deblock &= !C_LEFT_COL_MASK;
+ }
+ if mb_y == 0 {
+ c_h_deblock &= !C_TOP_ROW_MASK;
+ }
+ if is_last_row || is_strong || bot_is_strong {
+ c_h_deblock &= !(C_TOP_ROW_MASK << 4);
+ }
+
+ for y in 0..2 {
+ let coff = dst.offset[comp] + mb_x * 8 + (mb_y * 8 + y * 4) * dst.stride[comp];
+ for x in 0..2 {
+ let bpos = x + y * 2;
+
+ let ver_strong = (x == 0) && (is_strong || left_is_strong);
+
+ let cur_strength: usize;
+ if is_strong {
+ cur_strength = 2;
+ } else if test_bit!(c_cur_cbp, bpos) {
+ cur_strength = 1;
+ } else {
+ cur_strength = 0;
+ }
+
+ let left_strength: usize;
+ if x > 0 {
+ if is_strong {
+ left_strength = 2;
+ } else if test_bit!(c_cur_cbp, bpos - 1) {
+ left_strength = 1;
+ } else {
+ left_strength = 0;
+ }
+ } else if mb_x > 0 {
+ if left_is_strong {
+ left_strength = 2;
+ } else if test_bit!(c_left_cbp, bpos + 1) {
+ left_strength = 1;
+ } else {
+ left_strength = 0;
+ }
+ } else {
+ left_strength = 0;
+ }
+
+ let bot_strength: usize;
+ if y != 3 {
+ if is_strong {
+ bot_strength = 2;
+ } else if test_bit!(c_cur_cbp, bpos + 2) {
+ bot_strength = 1;
+ } else {
+ bot_strength = 0;
+ }
+ } else if !is_last_row {
+ if dblk[mb_pos + mb_w].is_strong {
+ bot_strength = 2;
+ } else if test_bit!(c_bot_cbp, x) {
+ bot_strength = 1;
+ } else {
+ bot_strength = 0;
+ }
+ } else {
+ bot_strength = 0;
+ }
+
+ let top_strength: usize;
+ if y > 0 {
+ if is_strong {
+ top_strength = 2;
+ } else if test_bit!(c_cur_cbp, bpos - 2) {
+ top_strength = 1;
+ } else {
+ top_strength = 0;
+ }
+ } else if mb_y > 0 {
+ if top_is_strong {
+ top_strength = 2;
+ } else if test_bit!(c_top_cbp, bpos + 2) {
+ top_strength = 1;
+ } else {
+ top_strength = 0;
+ }
+ } else {
+ top_strength = 0;
+ }
+
+ let l_q = if x > 0 { q } else { left_q };
+ let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 };
+
+ let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
+ let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
+ let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
+ let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
+
+ if test_bit!(c_h_deblock, bpos + 2) {
+ rv40_loop_filter4_h(dst.data, coff + 4 * dst.stride[comp] + x * 4, dst.stride[comp],
+ x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
+ }
+ if test_bit!(c_v_deblock, bpos) && !ver_strong {
+ rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp],
+ y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
+ }
+ if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
+ rv40_loop_filter4_h(dst.data, coff + x * 4, dst.stride[comp],
+ x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
+ }
+ if test_bit!(c_v_deblock, bpos) && ver_strong {
+ rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp],
+ y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
+ }
+ }
+ }
+ }
+
+ left_q = q;
+ left_dbk_y = cur_dbk_y;
+ left_cbp_y = cur_cbp_y;
+ left_cbp_c = cur_cbp_c;
+
+ mb_pos += 1;
+ }
+ }
+}
+
+macro_rules! el {
+ ($src: ident, $o: expr) => ($src[$o] as i16);
+}
+
+fn clip_symm(a: i16, lim: i16) -> i16 {
+ if a < -lim {
+ -lim
+ } else if a > lim {
+ lim
+ } else {
+ a
+ }
+}
+
+fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
+ filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
+ lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
+ for _ in 0..4 {
+ let p0 = el!(pix, off - step);
+ let q0 = el!(pix, off);
+
+ let t = q0 - p0;
+ if t == 0 {
+ off += stride;
+ continue;
+ }
+
+ let u = (alpha * t.wrapping_abs()) >> 7;
+ if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
+ off += stride;
+ continue;
+ }
+
+ let p2 = el!(pix, off - 3*step);
+ let p1 = el!(pix, off - 2*step);
+ let q1 = el!(pix, off + step);
+ let q2 = el!(pix, off + 2*step);
+
+ let strength;
+ if filter_p1 && filter_q1 {
+ strength = (t << 2) + (p1 - q1);
+ } else {
+ strength = t << 2;
+ }
+
+ let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
+ pix[off - step] = clip8(p0 + diff);
+ pix[off ] = clip8(q0 - diff);
+
+ if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
+ let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
+ pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
+ }
+
+ if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
+ let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
+ pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
+ }
+
+ off += stride;
+ }
+}
+
+fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
+ filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
+ lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
+ rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
+}
+#[allow(clippy::eq_op)]
+fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
+ filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
+ lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
+ let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
+ for ch in src.chunks_mut(stride).take(4) {
+ assert!(ch.len() >= 3 + 3);
+ let p0 = el!(ch, 3 - 1);
+ let q0 = el!(ch, 3);
+
+ let t = q0 - p0;
+ if t == 0 {
+ continue;
+ }
+
+ let u = (alpha * t.wrapping_abs()) >> 7;
+ if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
+ continue;
+ }
+
+ let p2 = el!(ch, 3 - 3);
+ let p1 = el!(ch, 3 - 2);
+ let q1 = el!(ch, 3 + 1);
+ let q2 = el!(ch, 3 + 2);
+
+ let strength;
+ if filter_p1 && filter_q1 {
+ strength = (t << 2) + (p1 - q1);
+ } else {
+ strength = t << 2;
+ }
+
+ let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
+ ch[3 - 1] = clip8(p0 + diff);
+ ch[3 ] = clip8(q0 - diff);
+
+ if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
+ let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
+ ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
+ }
+
+ if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
+ let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
+ ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
+ }
+ }
+}
+
+#[allow(clippy::many_single_char_names)]
+fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
+ let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
+ if clip {
+ if val < c - lims {
+ c - lims
+ } else if val > c + lims {
+ c + lims
+ } else {
+ val
+ }
+ } else {
+ val
+ }
+}
+
+fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
+ alpha: i16, lims: i16, dmode: usize, chroma: bool) {
+ for i in 0..4 {
+ let p0 = el!(pix, off - step);
+ let q0 = el!(pix, off);
+
+ let t = q0 - p0;
+ if t == 0 {
+ off += stride;
+ continue;
+ }
+
+ let fmode = (alpha * t.wrapping_abs()) >> 7;
+ if fmode > 1 {
+ off += stride;
+ continue;
+ }
+
+ let p3 = el!(pix, off - 4*step);
+ let p2 = el!(pix, off - 3*step);
+ let p1 = el!(pix, off - 2*step);
+ let q1 = el!(pix, off + step);
+ let q2 = el!(pix, off + 2*step);
+ let q3 = el!(pix, off + 3*step);
+
+ let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
+ let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
+
+ let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
+ let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
+
+ pix[off - 2*step] = np1 as u8;
+ pix[off - step] = np0 as u8;
+ pix[off] = nq0 as u8;
+ pix[off + step] = nq1 as u8;
+
+ if !chroma {
+ let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
+ let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
+ pix[off - 3*step] = np2 as u8;
+ pix[off + 2*step] = nq2 as u8;
+ }
+
+ off += stride;
+ }
+}
+
+fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
+ beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
+ let mut sum_p1p0 = 0;
+ let mut sum_q1q0 = 0;
+
+ let mut off1 = off;
+ for _ in 0..4 {
+ sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
+ sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
+ off1 += stride;
+ }
+
+ let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
+ let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
+
+ if (!filter_p1 || !filter_q1) || !edge {
+ return (false, filter_p1, filter_q1);
+ }
+
+ let mut sum_p1p2 = 0;
+ let mut sum_q1q2 = 0;
+
+ let mut off1 = off;
+ for _ in 0..4 {
+ sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
+ sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
+ off1 += stride;
+ }
+
+ let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
+
+ (strong, filter_p1, filter_q1)
+}
+
+fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
+ beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
+ rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
+}
+
+#[allow(clippy::eq_op)]
+fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
+ beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
+ let src = &pix[off - 3..][..stride * 3 + 3 + 3];
+ let mut sum_p1p0 = 0;
+ let mut sum_q1q0 = 0;
+
+ for ch in src.chunks(stride).take(4) {
+ assert!(ch.len() >= 3 + 3);
+ sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
+ sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
+ }
+
+ let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
+ let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
+
+ if (!filter_p1 || !filter_q1) || !edge {
+ return (false, filter_p1, filter_q1);
+ }
+
+ let mut sum_p1p2 = 0;
+ let mut sum_q1q2 = 0;
+
+ for ch in src.chunks(stride).take(4) {
+ assert!(ch.len() >= 3 + 3);
+ sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
+ sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
+ }
+
+ let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
+
+ (strong, filter_p1, filter_q1)
+}
+
+fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
+ dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
+ chroma: bool, edge: bool) {
+ let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
+ let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
+
+ if strong {
+ rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
+ } else if filter_p1 && filter_q1 {
+ rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
+ lims, lim_p1, lim_q1);
+ } else if filter_p1 || filter_q1 {
+ rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
+ lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
+ }
+}
+
+fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
+ dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
+ chroma: bool, edge: bool) {
+ let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
+ let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
+
+ if strong {
+ rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
+ } else if filter_p1 && filter_q1 {
+ rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
+ lims, lim_p1, lim_q1);
+ } else if filter_p1 || filter_q1 {
+ rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
+ lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
+ }
+}
+
+const RV40_DITHER_L: [i16; 16] = [
+ 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
+ 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
+];
+const RV40_DITHER_R: [i16; 16] = [
+ 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
+ 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
+];
+
+const RV40_ALPHA_TAB: [i16; 32] = [
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 122, 96, 75, 59, 47, 37,
+ 29, 23, 18, 15, 13, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1
+];
+
+const RV40_BETA_TAB: [i16; 32] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
+ 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
+];
+
+const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
+ [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ ], [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
+ ], [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
+ ]
+];
+
--- /dev/null
+use nihav_core::frame::NAVideoBuffer;
+use nihav_codec_support::codecs::MV;
+use nihav_codec_support::codecs::blockdsp::edge_emu;
+use super::clip8;
+
+pub fn luma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer<u8>, xpos: usize, ypos: usize, mv: MV, is16: bool) {
+ const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
+ const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
+ let dx = mv.x >> 2;
+ let cx = (mv.x & 3) as usize;
+ let dy = mv.y >> 2;
+ let cy = (mv.y & 3) as usize;
+ let mode = cx + cy * 4;
+
+ let (w_, h_) = pic.get_dimensions(0);
+ let w = (w_ + 15) & !15;
+ let h = (h_ + 15) & !15;
+ let (bsize, mc_func) = if is16 { (16, LUMA_MC_16[mode]) } else { (8, LUMA_MC_8[mode]) };
+
+ if check_pos(xpos, ypos, bsize, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
+ let sstride = pic.get_stride(0);
+ let mut soffset = pic.get_offset(0) + xpos + ypos * sstride;
+ let data = pic.get_data();
+ let src: &[u8] = data.as_slice();
+ soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
+ (mc_func)(dst, dstride, src, soffset, sstride);
+ } else {
+ let mut ebuf = [0u8; 32 * 22];
+ edge_emu(pic, (xpos as isize) + (dx as isize) - 2, (ypos as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
+ (mc_func)(dst, dstride, &ebuf, 32 * 2 + 2, 32);
+ }
+}
+
+pub fn chroma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer<u8>, xpos: usize, ypos: usize, comp: usize, mv: MV, is16: bool) {
+ let mvx = mv.x / 2;
+ let mvy = mv.y / 2;
+ let dx = mvx >> 2;
+ let mut cx = (mvx & 3) as usize;
+ let dy = mvy >> 2;
+ let mut cy = (mvy & 3) as usize;
+
+ if (cx == 3) && (cy == 3) {
+ cx = 2;
+ cy = 2;
+ }
+
+ let (w_, h_) = pic.get_dimensions(0);
+ let w = ((w_ + 15) & !15) >> 1;
+ let h = ((h_ + 15) & !15) >> 1;
+ let bsize = if is16 { 8 } else { 4 };
+
+ if check_pos(xpos, ypos, bsize, w, h, dx, dy, 0, 1, 0, 1) {
+ let sstride = pic.get_stride(comp);
+ let mut soffset = pic.get_offset(comp) + xpos + ypos * sstride;
+ let data = pic.get_data();
+ let src: &[u8] = data.as_slice();
+ soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
+ rv40_chroma_mc(dst, dstride, src, soffset, sstride, bsize, cx, cy);
+ } else {
+ let mut ebuf = [0u8; 16 * 10];
+ edge_emu(pic, (xpos as isize) + (dx as isize), (ypos as isize) + (dy as isize), bsize + 1, bsize + 1, &mut ebuf, 16, comp, 4);
+ rv40_chroma_mc(dst, dstride, &ebuf, 0, 16, bsize, cx, cy);
+ }
+}
+
+fn check_pos(x: usize, y: usize, size: usize, width: usize, height: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
+ let xn = (x as isize) + (dx as isize);
+ let yn = (y as isize) + (dy as isize);
+
+ (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (width as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (height as isize))
+}
+
+type MCFunc = fn (&mut [u8], usize, &[u8], usize, usize);
+
+macro_rules! el {
+ ($s: ident, $o: expr) => ( $s[$o] as i16 )
+}
+
+macro_rules! filter {
+ (01; $s: ident, $o: expr, $step: expr) => (
+ clip8((( el!($s, $o - 2 * $step)
+ -5 * el!($s, $o - 1 * $step)
+ +52 * el!($s, $o - 0 * $step)
+ +20 * el!($s, $o + 1 * $step)
+ -5 * el!($s, $o + 2 * $step)
+ + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
+ );
+ (02; $s: ident, $o: expr, $step: expr) => (
+ clip8((( el!($s, $o - 2 * $step)
+ -5 * el!($s, $o - 1 * $step)
+ +20 * el!($s, $o - 0 * $step)
+ +20 * el!($s, $o + 1 * $step)
+ -5 * el!($s, $o + 2 * $step)
+ + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
+ );
+ (03; $s: ident, $o: expr, $step: expr) => (
+ clip8((( el!($s, $o - 2 * $step)
+ -5 * el!($s, $o - 1 * $step)
+ +20 * el!($s, $o - 0 * $step)
+ +52 * el!($s, $o + 1 * $step)
+ -5 * el!($s, $o + 2 * $step)
+ + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
+ );
+ (33; $s: ident, $o: expr, $stride: expr) => (
+ clip8((( el!($s, $o)
+ + el!($s, $o + 1)
+ + el!($s, $o + $stride)
+ + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
+ );
+}
+
+macro_rules! mc_func {
+ (copy; $name: ident, $size: expr) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
+ for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take($size) {
+ dline[..$size].copy_from_slice(&sline[..$size]);
+ }
+ }
+ );
+ (mc01; $name: ident, $size: expr, $ver: expr) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ let step = if $ver { sstride } else { 1 };
+ for dline in dst.chunks_mut(dstride).take($size) {
+ for (x, el) in dline[..$size].iter_mut().enumerate() {
+ *el = filter!(01; src, sidx + x, step);
+ }
+ sidx += sstride;
+ }
+ }
+ );
+ (mc02; $name: ident, $size: expr, $ver: expr) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ let step = if $ver { sstride } else { 1 };
+ for dline in dst.chunks_mut(dstride).take($size) {
+ for (x, el) in dline[..$size].iter_mut().enumerate() {
+ *el = filter!(02; src, sidx + x, step);
+ }
+ sidx += sstride;
+ }
+ }
+ );
+ (mc03; $name: ident, $size: expr, $ver: expr) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ let step = if $ver { sstride } else { 1 };
+ for dline in dst.chunks_mut(dstride).take($size) {
+ for (x, el) in dline[..$size].iter_mut().enumerate() {
+ *el = filter!(03; src, sidx + x, step);
+ }
+ sidx += sstride;
+ }
+ }
+ );
+ (cm01; $name: ident, $size: expr, $ofilt: ident) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
+ let mut bidx = 0;
+ let bstride = $size;
+ sidx -= sstride * 2;
+ for _ in 0..$size+5 {
+ for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
+ bidx += bstride;
+ sidx += sstride;
+ }
+ $ofilt(dst, dstride, &buf, 2*bstride, $size);
+ }
+ );
+ (cm02; $name: ident, $size: expr, $ofilt: ident) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
+ let mut bidx = 0;
+ let bstride = $size;
+ sidx -= sstride * 2;
+ for _ in 0..$size+5 {
+ for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
+ bidx += bstride;
+ sidx += sstride;
+ }
+ $ofilt(dst, dstride, &buf, 2*bstride, $size);
+ }
+ );
+ (cm03; $name: ident, $size: expr, $ofilt: ident) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
+ let mut bidx = 0;
+ let bstride = $size;
+ sidx -= sstride * 2;
+ for _ in 0..$size+5 {
+ for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
+ bidx += bstride;
+ sidx += sstride;
+ }
+ $ofilt(dst, dstride, &buf, 2*bstride, $size);
+ }
+ );
+ (mc33; $name: ident, $size: expr) => (
+ fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ for dline in dst.chunks_mut(dstride).take($size) {
+ for (x, el) in dline[..$size].iter_mut().enumerate() {
+ *el = filter!(33; src, sidx + x, sstride);
+ }
+ sidx += sstride;
+ }
+ }
+ );
+}
+mc_func!(copy; copy_16, 16);
+mc_func!(copy; copy_8, 8);
+mc_func!(mc01; luma_mc_10_16, 16, false);
+mc_func!(mc01; luma_mc_10_8, 8, false);
+mc_func!(mc02; luma_mc_20_16, 16, false);
+mc_func!(mc02; luma_mc_20_8, 8, false);
+mc_func!(mc03; luma_mc_30_16, 16, false);
+mc_func!(mc03; luma_mc_30_8, 8, false);
+mc_func!(mc01; luma_mc_01_16, 16, true);
+mc_func!(mc01; luma_mc_01_8, 8, true);
+mc_func!(mc02; luma_mc_02_16, 16, true);
+mc_func!(mc02; luma_mc_02_8, 8, true);
+mc_func!(mc03; luma_mc_03_16, 16, true);
+mc_func!(mc03; luma_mc_03_8, 8, true);
+mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
+mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
+mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
+mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
+mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
+mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
+mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
+mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
+mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
+mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
+mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
+mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
+mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
+mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
+mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
+mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
+mc_func!(mc33; luma_mc_33_16, 16);
+mc_func!(mc33; luma_mc_33_8, 8);
+
+const LUMA_MC_16: [MCFunc; 16] = [
+ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
+ luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
+ luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
+ luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16
+];
+const LUMA_MC_8: [MCFunc; 16] = [
+ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
+ luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
+ luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
+ luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8
+];
+
+#[allow(clippy::many_single_char_names)]
+fn rv40_chroma_mc(dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
+ const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
+ [ 0, 4, 8, 4 ],
+ [ 8, 7, 8, 7 ],
+ [ 0, 8, 4, 8 ],
+ [ 8, 7, 8, 7 ]
+ ];
+
+ if (x == 0) && (y == 0) {
+ for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take(size) {
+ dline[..size].copy_from_slice(&sline[..size]);
+ }
+ return;
+ }
+ let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
+ if (x > 0) && (y > 0) {
+ let a = ((4 - x) * (4 - y)) as u16;
+ let b = (( x) * (4 - y)) as u16;
+ let c = ((4 - x) * ( y)) as u16;
+ let d = (( x) * ( y)) as u16;
+ for dline in dst.chunks_mut(dstride).take(size) {
+ for (x, el) in dline[..size].iter_mut().enumerate() {
+ *el = ((a * (src[sidx + x] as u16)
+ + b * (src[sidx + x + 1] as u16)
+ + c * (src[sidx + x + sstride] as u16)
+ + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
+ }
+ sidx += sstride;
+ }
+ } else {
+ let a = ((4 - x) * (4 - y)) as u16;
+ let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
+ let step = if y > 0 { sstride } else { 1 };
+ for dline in dst.chunks_mut(dstride).take(size) {
+ for (x, el) in dline[..size].iter_mut().enumerate() {
+ *el = ((a * (src[sidx + x] as u16)
+ + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
+ }
+ sidx += sstride;
+ }
+ }
+}
--- /dev/null
+use super::types::Block;
+
+mod blk;
+pub use blk::*;
+mod ipred;
+pub use ipred::*;
+mod loopfilt;
+pub use loopfilt::*;
+mod mc;
+pub use mc::*;
+
+fn clip8(a: i16) -> u8 {
+ if a < 0 { 0 }
+ else if a > 255 { 255 }
+ else { a as u8 }
+}
+
+pub struct RefMBData {
+ pub y: [u8; 16 * 16],
+ pub u: [u8; 8 * 8],
+ pub v: [u8; 8 * 8],
+}
+
+impl RefMBData {
+ pub fn new() -> Self {
+ Self {
+ y: [0; 16 * 16],
+ u: [0; 8 * 8],
+ v: [0; 8 * 8],
+ }
+ }
+ pub fn copy_from(&mut self, other: &Self) {
+ self.y.copy_from_slice(&other.y);
+ self.u.copy_from_slice(&other.u);
+ self.v.copy_from_slice(&other.v);
+ }
+ pub fn calc_coeffs(&self, new: &Self, coeffs: &mut [Block; 25], q_dc: usize, q_ac: usize, is16: bool) {
+ let (blocks, dcs) = coeffs.split_at_mut(24);
+ let mut dblocks = blocks.iter_mut();
+ let dcs = &mut dcs[0];
+ for (y, (dstripe, sstripe)) in self.y.chunks(16 * 4).zip(new.y.chunks(16 * 4)).enumerate() {
+ for x in (0..16).step_by(4) {
+ let dst = dblocks.next().unwrap();
+ Self::diff_blk(&sstripe[x..], &dstripe[x..], 16, dst);
+ dst.transform_4x4();
+ if is16 {
+ dcs.coeffs[x / 4 + y * 4] = dst.coeffs[0];
+ dst.coeffs[0] = 0;
+ }
+ dst.quant(q_ac, q_ac);
+ }
+ }
+ let (cq_dc, cq_ac) = chroma_quants(q_ac);
+ for (dstripe, sstripe) in self.u.chunks(8 * 4).zip(new.u.chunks(8 * 4)) {
+ for x in (0..8).step_by(4) {
+ let dst = dblocks.next().unwrap();
+ Self::diff_blk(&sstripe[x..], &dstripe[x..], 8, dst);
+ dst.transform_4x4();
+ dst.quant(cq_dc, cq_ac);
+ }
+ }
+ for (dstripe, sstripe) in self.v.chunks(8 * 4).zip(new.v.chunks(8 * 4)) {
+ for x in (0..8).step_by(4) {
+ let dst = dblocks.next().unwrap();
+ Self::diff_blk(&sstripe[x..], &dstripe[x..], 8, dst);
+ dst.transform_4x4();
+ dst.quant(cq_dc, cq_ac);
+ }
+ }
+ if is16 {
+ coeffs[24].transform_dcs();
+ coeffs[24].quant_dcs(q_dc, q_ac);
+ }
+ }
+ fn diff_blk(src: &[u8], new: &[u8], stride: usize, dst: &mut Block) {
+ for (drow, (sline, nline)) in dst.coeffs.chunks_mut(4).zip(src.chunks(stride).zip(new.chunks(stride))) {
+ for (dst, (&a, &b)) in drow.iter_mut().zip(sline.iter().zip(nline.iter())) {
+ *dst = i16::from(a) - i16::from(b);
+ }
+ }
+ }
+ pub fn avg(&mut self, ref1: &Self, weight1: u32, ref2: &Self, weight2: u32) {
+ for (dst, (&src1, &src2)) in self.y.iter_mut().zip(ref1.y.iter().zip(ref2.y.iter())) {
+ *dst = weight(src1, weight1, src2, weight2);
+ }
+ for (dst, (&src1, &src2)) in self.u.iter_mut().zip(ref1.u.iter().zip(ref2.u.iter())) {
+ *dst = weight(src1, weight1, src2, weight2);
+ }
+ for (dst, (&src1, &src2)) in self.v.iter_mut().zip(ref1.v.iter().zip(ref2.v.iter())) {
+ *dst = weight(src1, weight1, src2, weight2);
+ }
+ }
+}
+
+fn weight(pix1: u8, weight1: u32, pix2: u8, weight2: u32) -> u8 {
+ ((((u32::from(pix1) * weight1) >> 9) + ((u32::from(pix2) * weight2) >> 9) + 0x10) >> 5) as u8
+}
+
+pub fn chroma_quants(q: usize) -> (usize, usize) {
+ (RV34_CHROMA_QUANT_DC[q].into(), RV34_CHROMA_QUANT_AC[q].into())
+}
+
+const RV34_CHROMA_QUANT_DC: [u8; 32] = [
+ 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+ 14, 15, 15, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23
+];
+const RV34_CHROMA_QUANT_AC: [u8; 32] = [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 23, 24, 24, 25, 25
+];
--- /dev/null
+use nihav_core::frame::FrameType;
+use nihav_codec_support::codecs::MV;
+use super::types::*;
+use super::super::rv40data::*;
+
+pub struct BitsEstimator {
+ ftype: FrameType,
+ pred_mbt: MBType,
+ cur_mbt: MBType,
+}
+
+impl BitsEstimator {
+ pub fn new() -> Self {
+ Self {
+ ftype: FrameType::I,
+ pred_mbt: MBType::Invalid,
+ cur_mbt: MBType::Invalid,
+ }
+ }
+ pub fn set_frame_type(&mut self, ftype: FrameType) {
+ self.ftype = ftype;
+ }
+ pub fn set_quant(&mut self, _q: usize) {
+ }
+ pub fn set_pred_mb_type(&mut self, most_prob_type: MBType) {
+ self.pred_mbt = most_prob_type;
+ }
+ pub fn set_mb_type(&mut self, mbt: MBType) {
+ self.cur_mbt = mbt;
+ }
+ pub fn estimate_mb_hdr(&self, mvs: &[MV]) -> u32 {
+ if self.ftype == FrameType::I {
+ return 1;
+ }
+ let hdr_cw_bits = if self.ftype == FrameType::P {
+ RV40_PTYPE_BITS[self.pred_mbt.to_code()][self.cur_mbt.to_code()]
+ } else {
+ RV40_BTYPE_BITS[self.pred_mbt.to_code()][self.cur_mbt.to_code()]
+ };
+ let mv_bits = mvs.iter().fold(0u32, |acc, &mv| acc + Self::mv_cost(mv));
+ u32::from(hdr_cw_bits) + mv_bits
+ }
+ fn block_no_to_type(&self, blk_no: usize) -> usize {
+ match blk_no {
+ 0..=15 => {
+ match self.cur_mbt {
+ MBType::Intra16 | MBType::P16x16Mix => 2,
+ MBType::Intra => 1,
+ _ => 0,
+ }
+ },
+ 24 => 3,
+ _ if self.cur_mbt.is_intra() => 4,
+ _ => 5,
+ }
+ }
+ pub fn block_bits(&self, blk: &Block, blk_no: usize) -> u32 {
+ let btype = self.block_no_to_type(blk_no);
+
+ const EXPECTED_BLOCK_BITS: [[u8; 17]; 6] = [
+ [ 0, 7, 12, 17, 22, 26, 31, 35, 39, 45, 51, 56, 61, 66, 85, 103, 117],
+ [ 0, 7, 13, 19, 26, 30, 36, 43, 49, 57, 65, 74, 87, 99, 115, 131, 147],
+ [ 0, 7, 14, 20, 25, 30, 35, 40, 45, 50, 56, 62, 69, 76, 84, 93, 113],
+ [ 2, 9, 13, 20, 25, 29, 33, 38, 43, 48, 54, 62, 71, 82, 98, 116, 141],
+ [ 0, 5, 12, 18, 24, 30, 35, 42, 48, 53, 62, 69, 78, 87, 97, 106, 121],
+ [ 0, 6, 12, 17, 22, 27, 33, 40, 47, 53, 60, 66, 73, 80, 85, 85, 103]
+ ];
+ EXPECTED_BLOCK_BITS[btype][blk.count_nz()].into()
+ }
+ pub fn mv_cost(mv: MV) -> u32 {
+ let xval = mv.x.abs() * 2 + 1;
+ let yval = mv.y.abs() * 2 + 1;
+ (15 - xval.leading_zeros()) * 2 + (15 - yval.leading_zeros()) * 2 + 2
+ }
+ pub fn decide_set(hist: &[usize; 17]) -> usize {
+ let max_val = hist[16];
+ let threshold = max_val - max_val / 4;
+ if hist[3] > threshold {
+ 2
+ } else if hist[6] > threshold {
+ 1
+ } else {
+ 0
+ }
+ }
+}
--- /dev/null
+use nihav_codec_support::codecs::ZERO_MV;
+
+use super::super::rv40data::*;
+
+use super::*;
+use super::dsp::*;
+use super::motion_est::MotionEstimator;
+
+const PRED_TYPES8: [PredType8x8; 4] = [
+ PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane
+];
+
+fn calc_dist(src1: &[u8], stride1: usize, src2: &[u8], stride2: usize, width: usize, height: usize) -> u32 {
+ let mut sum = 0u32;
+ for (line1, line2) in src1.chunks(stride1).zip(src2.chunks(stride2)).take(height) {
+ sum += line1[..width].iter().zip(line2.iter()).fold(0u32,
+ |acc, (&a, &b)| { let diff = u32::from(a.max(b)) - u32::from(a.min(b)); acc + diff * diff });
+ }
+ sum
+}
+
+struct SingleMacroblock {
+ cand_blk: RefMBData,
+ pred_blk: RefMBData,
+ ref_blk: RefMBData,
+
+ wblk1: RefMBData,
+ wblk2: RefMBData,
+
+ tmpc: [Block; 25],
+
+ ratio1: u32,
+ ratio2: u32,
+
+ tmp_tx: [Block; 25],
+}
+
+impl SingleMacroblock {
+ fn new() -> Self {
+ Self {
+ cand_blk: RefMBData::new(),
+ pred_blk: RefMBData::new(),
+ ref_blk: RefMBData::new(),
+ wblk1: RefMBData::new(),
+ wblk2: RefMBData::new(),
+ tmpc: [Block::new(); 25],
+ ratio1: 0,
+ ratio2: 0,
+ tmp_tx: [Block::new(); 25],
+ }
+ }
+ fn load(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3]) {
+ for (dst, src) in self.ref_blk.y.chunks_mut(16).zip(src[offsets[0]..].chunks(strides[0])) {
+ dst.copy_from_slice(&src[..16]);
+ }
+ for (dst, src) in self.ref_blk.u.chunks_mut(8).zip(src[offsets[1]..].chunks(strides[1])) {
+ dst.copy_from_slice(&src[..8]);
+ }
+ for (dst, src) in self.ref_blk.v.chunks_mut(8).zip(src[offsets[2]..].chunks(strides[2])) {
+ dst.copy_from_slice(&src[..8]);
+ }
+ }
+ fn recon_pred_part(&mut self, mbt: MacroblockType, ref_p: &NAVideoBuffer<u8>, ref_n: &NAVideoBuffer<u8>, mb_x: usize, mb_y: usize) {
+ let (xpos, ypos) = (mb_x * 16, mb_y * 16);
+
+ match mbt {
+ MacroblockType::Intra16x16(_) => unreachable!(),
+ MacroblockType::Intra4x4(_) => unreachable!(),
+ MacroblockType::Inter16x16(mv) |
+ MacroblockType::InterMix(mv) |
+ MacroblockType::Backward(mv) => {
+ luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, mv, true);
+ chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, mv, true);
+ chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, mv, true);
+ },
+ MacroblockType::PSkip => {
+ luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, ZERO_MV, true);
+ chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, ZERO_MV, true);
+ chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, ZERO_MV, true);
+ },
+ MacroblockType::Inter16x8(mvs) => {
+ let mvs = [mvs[0], mvs[0], mvs[1], mvs[1]];
+ for (i, &mv) in mvs.iter().enumerate() {
+ let xadd = i & 1;
+ let yadd = i >> 1;
+ luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false);
+ chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false);
+ chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false);
+ }
+ },
+ MacroblockType::Inter8x16(mvs) => {
+ let mvs = [mvs[0], mvs[1], mvs[0], mvs[1]];
+ for (i, &mv) in mvs.iter().enumerate() {
+ let xadd = i & 1;
+ let yadd = i >> 1;
+ luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false);
+ chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false);
+ chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false);
+ }
+ },
+ MacroblockType::Inter8x8(mvs) => {
+ for (i, &mv) in mvs.iter().enumerate() {
+ let xadd = i & 1;
+ let yadd = i >> 1;
+ luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false);
+ chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false);
+ chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false);
+ }
+ },
+ MacroblockType::Forward(mv) => {
+ luma_mc(&mut self.pred_blk.y, 16, ref_p, xpos, ypos, mv, true);
+ chroma_mc(&mut self.pred_blk.u, 8, ref_p, xpos / 2, ypos / 2, 1, mv, true);
+ chroma_mc(&mut self.pred_blk.v, 8, ref_p, xpos / 2, ypos / 2, 2, mv, true);
+ },
+ MacroblockType::Bidir(fmv, bmv) => {
+ luma_mc(&mut self.wblk1.y, 16, ref_p, xpos, ypos, fmv, true);
+ chroma_mc(&mut self.wblk1.u, 8, ref_p, xpos / 2, ypos / 2, 1, fmv, true);
+ chroma_mc(&mut self.wblk1.v, 8, ref_p, xpos / 2, ypos / 2, 2, fmv, true);
+ luma_mc(&mut self.wblk2.y, 16, ref_n, xpos, ypos, bmv, true);
+ chroma_mc(&mut self.wblk2.u, 8, ref_n, xpos / 2, ypos / 2, 1, bmv, true);
+ chroma_mc(&mut self.wblk2.v, 8, ref_n, xpos / 2, ypos / 2, 2, bmv, true);
+ self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2);
+ },
+ MacroblockType::BSkip(fmvs, bmvs) => {
+ for (i, (&fmv, &bmv)) in fmvs.iter().zip(bmvs.iter()).enumerate() {
+ let xadd = i & 1;
+ let yadd = i >> 1;
+ luma_mc(&mut self.wblk1.y[xadd * 8 + yadd * 8 * 16..], 16, ref_p, xpos + xadd * 8, ypos + yadd * 8, fmv, false);
+ chroma_mc(&mut self.wblk1.u[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, fmv, false);
+ chroma_mc(&mut self.wblk1.v[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, fmv, false);
+ luma_mc(&mut self.wblk2.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, bmv, false);
+ chroma_mc(&mut self.wblk2.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, bmv, false);
+ chroma_mc(&mut self.wblk2.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, bmv, false);
+ }
+ self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2);
+ },
+ };
+ }
+ fn get_diff_metric(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, best_m: u32, q_dc: usize, q_ac: usize, is16: bool, mut bits: u32) -> (u32, u32) {
+ self.pred_blk.calc_coeffs(&self.ref_blk, &mut self.tmpc, q_dc, q_ac, is16);
+ self.tmp_tx.copy_from_slice(&self.tmpc);
+ if is16 {
+ bits += be.block_bits(&self.tmpc[24], 24);
+ }
+ for blk in self.tmpc[..16].iter() {
+ bits += be.block_bits(blk, 0);
+ }
+ for blk in self.tmpc[16..24].iter() {
+ bits += be.block_bits(blk, 16);
+ }
+ let cdist = rdm.get_metric(bits, 0);
+ if cdist > best_m {
+ return (cdist, 0);
+ }
+
+ for blk in self.tmpc[..16].iter_mut() {
+ blk.dequant(q_ac, q_ac);
+ }
+ let (cq_dc, cq_ac) = chroma_quants(q_ac);
+ for blk in self.tmpc[16..24].iter_mut() {
+ blk.dequant(cq_dc, cq_ac);
+ }
+ if is16 {
+ let (blocks, dc_blk) = self.tmpc.split_at_mut(24);
+ dc_blk[0].dequant_dcs(q_dc, q_ac);
+ dc_blk[0].itransform_dcs();
+ for (blk, &dc) in blocks.iter_mut().zip(dc_blk[0].coeffs.iter()) {
+ blk.coeffs[0] = dc;
+ }
+ }
+
+ self.cand_blk.copy_from(&self.pred_blk);
+ let mut dist = 0;
+ for (i, blk) in self.tmpc[..16].iter_mut().enumerate() {
+ let off = (i & 3) * 4 + (i >> 2) * 4 * 16;
+ if !blk.is_empty() {
+ blk.itransform_4x4();
+ blk.add_to(&mut self.cand_blk.y[off..], 16);
+ }
+ dist += calc_dist(&self.cand_blk.y[off..], 16, &self.ref_blk.y[off..], 16, 4, 4);
+ let cdist = rdm.get_metric(bits, dist);
+ if cdist > best_m {
+ return (cdist, 0);
+ }
+ }
+ let (_, cpart) = self.tmpc.split_at_mut(16);
+ let (upart, vpart) = cpart.split_at_mut(4);
+ for (i, (ublk, vblk)) in upart.iter_mut().zip(vpart.iter_mut()).enumerate() {
+ let off = (i & 1) * 4 + (i >> 1) * 4 * 8;
+ ublk.itransform_4x4();
+ vblk.itransform_4x4();
+ ublk.add_to(&mut self.cand_blk.u[off..], 8);
+ vblk.add_to(&mut self.cand_blk.v[off..], 8);
+ dist += calc_dist(&self.cand_blk.u[off..], 8, &self.ref_blk.u[off..], 8, 4, 4);
+ dist += calc_dist(&self.cand_blk.v[off..], 8, &self.ref_blk.v[off..], 8, 4, 4);
+
+ let cdist = rdm.get_metric(bits, dist);
+ if cdist > best_m {
+ return (cdist, 0);
+ }
+ }
+
+ (rdm.get_metric(bits, dist), bits)
+ }
+ fn get_skip_metric(&self, rdm: &RateDistMetric, best_m: u32) -> (u32, u32) {
+ let bits = 1;
+ let mut dist = calc_dist(&self.pred_blk.y, 16, &self.ref_blk.y, 16, 16, 16);
+ let cdist = rdm.get_metric(bits, dist);
+ if cdist > best_m {
+ return (cdist, 0);
+ }
+ dist += calc_dist(&self.pred_blk.u, 8, &self.ref_blk.u, 8, 8, 8);
+ let cdist = rdm.get_metric(bits, dist);
+ if cdist > best_m {
+ return (cdist, 0);
+ }
+ dist += calc_dist(&self.pred_blk.v, 8, &self.ref_blk.v, 8, 8, 8);
+
+ (rdm.get_metric(bits, dist), bits)
+ }
+ fn put_mb(dst: &mut NASimpleVideoFrame<u8>, cblk: &RefMBData, mb_x: usize, mb_y: usize) {
+ for (dline, sline) in dst.data[dst.offset[0] + mb_x * 16 + mb_y * 16 * dst.stride[0]..].chunks_mut(dst.stride[0]).zip(cblk.y.chunks(16)) {
+ dline[..16].copy_from_slice(sline);
+ }
+ for (dline, sline) in dst.data[dst.offset[1] + mb_x * 8 + mb_y * 8 * dst.stride[1]..].chunks_mut(dst.stride[1]).zip(cblk.u.chunks(8)) {
+ dline[..8].copy_from_slice(sline);
+ }
+ for (dline, sline) in dst.data[dst.offset[2] + mb_x * 8 + mb_y * 8 * dst.stride[2]..].chunks_mut(dst.stride[2]).zip(cblk.v.chunks(8)) {
+ dline[..8].copy_from_slice(sline);
+ }
+ }
+}
+
+pub struct MacroblockDecider {
+ pub q: usize,
+ has_top: bool,
+ has_left: bool,
+ has_tl: bool,
+ has_tr: bool,
+ mb_x: usize,
+ mb_y: usize,
+ best_mbt: MacroblockType,
+ best_dist: u32,
+ best_bits: u32,
+ ipred_y: IntraPred16x16,
+ ipred_u: IntraPred16x16,
+ ipred_v: IntraPred16x16,
+ top_y: Vec<u8>,
+ top_u: Vec<u8>,
+ top_v: Vec<u8>,
+ tr_d: u32,
+ tr_b: u32,
+ mb: SingleMacroblock,
+ best_coef: [Block; 25],
+ best_blk: RefMBData,
+}
+
+impl MacroblockDecider {
+ pub fn new() -> Self {
+ Self {
+ q: 0,
+ has_top: false,
+ has_left: false,
+ has_tl: false,
+ has_tr: false,
+ mb_x: 0,
+ mb_y: 0,
+ ipred_y: IntraPred16x16::new(),
+ ipred_u: IntraPred16x16::new(),
+ ipred_v: IntraPred16x16::new(),
+ top_y: Vec::new(),
+ top_u: Vec::new(),
+ top_v: Vec::new(),
+ tr_b: 0,
+ tr_d: 0,
+ best_mbt: MacroblockType::default(),
+ best_dist: 0,
+ best_bits: 0,
+ mb: SingleMacroblock::new(),
+ best_coef: [Block::new(); 25],
+ best_blk: RefMBData::new(),
+ }
+ }
+ pub fn resize(&mut self, mb_w: usize) {
+ self.top_y.resize((mb_w + 1) * 16 + 1, 0);
+ self.top_u.resize((mb_w + 1) * 8 + 1, 0);
+ self.top_v.resize((mb_w + 1) * 8 + 1, 0);
+ }
+ pub fn set_b_distance(&mut self, tr_b: u32, tr_d: u32) {
+ let (ratio1, ratio2) = if tr_d != 0 {
+ (((tr_d - tr_b) << 14) / tr_d, (tr_b << 14) / tr_d)
+ } else { (1 << 13, 1 << 13) };
+ self.tr_b = tr_b;
+ self.tr_d = tr_d;
+ self.mb.ratio1 = ratio1;
+ self.mb.ratio2 = ratio2;
+ }
+ pub fn load_mb(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3], sstate: &SliceState) {
+ self.has_top = sstate.has_t;
+ self.has_left = sstate.has_l;
+ self.has_tl = sstate.has_tl;
+ self.has_tr = sstate.has_tr;
+ self.mb_x = sstate.mb_x;
+ self.mb_y = sstate.mb_y;
+
+ self.ipred_y.top[1..].copy_from_slice(&self.top_y[self.mb_x * 16 + 1..][..16]);
+ self.ipred_u.top[1..9].copy_from_slice(&self.top_u[self.mb_x * 8 + 1..][..8]);
+ self.ipred_v.top[1..9].copy_from_slice(&self.top_v[self.mb_x * 8 + 1..][..8]);
+
+ self.mb.load(src, offsets, strides);
+
+ self.best_mbt = MacroblockType::default();
+ self.best_dist = std::u32::MAX;
+ self.best_bits = 0;
+ }
+ pub fn try_b_coding(&mut self, ref_p: &NAVideoBuffer<u8>, ref_n: &NAVideoBuffer<u8>, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState, refine: bool) {
+ let q_dc = usize::from(RV40_QUANT_DC[1][self.q]);
+
+ let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y);
+ let mut smb_f = [ZERO_MV; 4];
+ let mut smb_b = [ZERO_MV; 4];
+ for (i, (fwd, bwd)) in smb_f.iter_mut().zip(smb_b.iter_mut()).enumerate() {
+ let ref_mv = mbstate.ref_mv[blk8_idx + (i & 1) + (i >> 1) * mbstate.blk8_stride];
+ let (fm, bm) = ref_mv.scale(self.tr_d, self.tr_b);
+ *fwd = fm;
+ *bwd = bm;
+ }
+ self.mb.recon_pred_part(MacroblockType::BSkip(smb_f, smb_b), ref_p, ref_n, self.mb_x, self.mb_y);
+ be.set_mb_type(MBType::Skip);
+ let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist);
+ if cur_dist < self.best_dist {
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_mbt = MacroblockType::BSkip(smb_f, smb_b);
+ self.best_blk.copy_from(&self.mb.pred_blk);
+ if self.best_dist < rdm.good_enough {
+ return;
+ }
+ }
+
+ let fwd_cand = [
+ -mbstate.ref_mv[blk8_idx],
+ mbstate.fwd_mv[blk8_idx - 1],
+ mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride]
+ ];
+ let (fmv, _fdist) = me.search_mb_p(ref_p, &self.mb.ref_blk, self.mb_x, self.mb_y, &fwd_cand);
+ be.set_mb_type(MBType::Forward);
+ let bcost = be.estimate_mb_hdr(&[fmv]);
+ self.mb.recon_pred_part(MacroblockType::Forward(fmv), ref_p, ref_n, self.mb_x, self.mb_y);
+ let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost);
+ if cur_dist < self.best_dist {
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_mbt = MacroblockType::Forward(fmv);
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ if self.best_dist < rdm.good_enough {
+ return;
+ }
+ }
+
+ let bwd_cand = [
+ mbstate.ref_mv[blk8_idx],
+ mbstate.bwd_mv[blk8_idx - 1],
+ mbstate.bwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
+ mbstate.bwd_mv[blk8_idx - mbstate.blk8_stride],
+ mbstate.bwd_mv[blk8_idx + 2 - mbstate.blk8_stride]
+ ];
+ let (bmv, _bdist) = me.search_mb_p(ref_n, &self.mb.ref_blk, self.mb_x, self.mb_y, &bwd_cand);
+ be.set_mb_type(MBType::Backward);
+ let bcost = be.estimate_mb_hdr(&[bmv]);
+ self.mb.recon_pred_part(MacroblockType::Backward(bmv), ref_p, ref_n, self.mb_x, self.mb_y);
+ let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost);
+ if cur_dist < self.best_dist {
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_mbt = MacroblockType::Backward(bmv);
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ if self.best_dist < rdm.good_enough {
+ return;
+ }
+ }
+
+ be.set_mb_type(MBType::Bidir);
+ let (i_fmv, i_bmv) = if !refine {
+ (fmv, bmv)
+ } else {
+ let mut b_searcher = SearchB::new(ref_p, ref_n, self.mb_x, self.mb_y, [self.mb.ratio1, self.mb.ratio2]);
+ b_searcher.search_mb(&self.mb.ref_blk, [fmv, bmv])
+ };
+
+ let bcost = be.estimate_mb_hdr(&[i_fmv, i_bmv]);
+ self.mb.recon_pred_part(MacroblockType::Bidir(i_fmv, i_bmv), ref_p, ref_n, self.mb_x, self.mb_y);
+ let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost);
+ if cur_dist < self.best_dist {
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ self.best_mbt = MacroblockType::Bidir(i_fmv, i_bmv);
+ }
+ }
+ pub fn try_p_coding(&mut self, ref_pic: &NAVideoBuffer<u8>, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState) {
+ let q_dc = usize::from(RV40_QUANT_DC[1][self.q]);
+
+ self.mb.recon_pred_part(MacroblockType::Inter16x16(ZERO_MV), ref_pic, ref_pic, self.mb_x, self.mb_y);
+ be.set_mb_type(MBType::Skip);
+ let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist);
+ if cur_dist < self.best_dist {
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_mbt = MacroblockType::PSkip;
+ self.best_blk.copy_from(&self.mb.pred_blk);
+ if self.best_dist < rdm.good_enough {
+ return;
+ }
+ }
+
+ let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y);
+ let mv_cand = [
+ mbstate.fwd_mv[blk8_idx - 1],
+ mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride]
+ ];
+ let (mv, pdist) = me.search_mb_p(ref_pic, &self.mb.ref_blk, self.mb_x, self.mb_y, &mv_cand);
+
+ self.mb.recon_pred_part(MacroblockType::Inter16x16(mv), ref_pic, ref_pic, self.mb_x, self.mb_y);
+
+ be.set_mb_type(MBType::P16x16);
+ let pcost = be.estimate_mb_hdr(&[mv]);
+ let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost);
+ if cur_dist < self.best_dist {
+ self.best_mbt = MacroblockType::Inter16x16(mv);
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ }
+ be.set_mb_type(MBType::P16x16Mix);
+ let p16cost = be.estimate_mb_hdr(&[mv]);
+ let (cur_dist16, cur_bits16) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, p16cost);
+ if cur_dist16 < self.best_dist {
+ self.best_mbt = MacroblockType::InterMix(mv);
+ self.best_dist = cur_dist16;
+ self.best_bits = cur_bits16;
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ }
+
+ if pdist > rdm.p_split_thr {
+ let xpos = self.mb_x * 16;
+ let ypos = self.mb_y * 16;
+
+ let mv_cand = [
+ mv,
+ mbstate.fwd_mv[blk8_idx - 1],
+ mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx - 1 + mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride],
+ mbstate.fwd_mv[blk8_idx + 1 - mbstate.blk8_stride]
+ ];
+
+ let (mv0, pdist0) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos, &mv_cand);
+ let (mv1, pdist1) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos, &mv_cand);
+ let (mv2, pdist2) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos + 8, &mv_cand);
+ let (mv3, pdist3) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos + 8, &mv_cand);
+ if pdist0 + pdist1 + pdist2 + pdist3 < pdist - pdist / 4 {
+ let mvs = [mv0, mv1, mv2, mv3];
+ let (cand_mbt, cand_mbtype) = if mv0 == mv1 && mv2 == mv3 {
+ (MBType::P16x8, MacroblockType::Inter16x8([mv0, mv2]))
+ } else if mv0 == mv2 && mv1 == mv3 {
+ (MBType::P8x16, MacroblockType::Inter8x16([mv0, mv1]))
+ } else {
+ (MBType::P8x8, MacroblockType::Inter8x8(mvs))
+ };
+ be.set_mb_type(cand_mbt);
+ let pcost = be.estimate_mb_hdr(&mvs);
+
+ self.mb.recon_pred_part(MacroblockType::Inter8x8(mvs), ref_pic, ref_pic, self.mb_x, self.mb_y);
+ let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost);
+ if cur_dist < self.best_dist {
+ self.best_dist = cur_dist;
+ self.best_mbt = cand_mbtype;
+ self.best_bits = cur_bits;
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ }
+ }
+ }
+ }
+ fn recon_intra_16_pred(&mut self, ptype: PredType8x8) {
+ self.ipred_y.apply16(ptype, &mut self.mb.pred_blk.y, 16);
+ self.ipred_u.apply8(ptype, &mut self.mb.pred_blk.u, 8);
+ self.ipred_v.apply8(ptype, &mut self.mb.pred_blk.v, 8);
+ }
+ pub fn try_intra_16_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric) {
+ if self.best_dist < rdm.good_enough {
+ return;
+ }
+ let pred_types_try: &[PredType8x8] = match (self.has_top, self.has_left) {
+ (false, false) => &[PredType8x8::DC128],
+ (true, false) => &[PredType8x8::TopDC],
+ (false, true) => &[PredType8x8::LeftDC],
+ _ => &PRED_TYPES8,
+ };
+
+ be.set_mb_type(MBType::Intra16);
+ let hdr_cost = be.estimate_mb_hdr(&[]);
+ for &ptype in pred_types_try.iter() {
+ if !self.has_tl && matches!(ptype, PredType8x8::Plane) {
+ continue;
+ }
+ self.recon_intra_16_pred(ptype);
+ let q_dc = usize::from(RV40_QUANT_DC[0][self.q]);
+ let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, hdr_cost);
+ if cur_dist < self.best_dist {
+ self.best_mbt = MacroblockType::Intra16x16(ptype);
+ self.best_dist = cur_dist;
+ self.best_bits = cur_bits;
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ if cur_dist < rdm.good_enough {
+ break;
+ }
+ }
+ }
+ }
+ pub fn try_intra_4x4_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, mbstate: &mut MBState) {
+ const PRED4_DEF: &[PredType4x4] = &[ PredType4x4::DC128 ];
+ const PRED4_NO_TOP: &[PredType4x4] = &[ PredType4x4::Hor, PredType4x4::LeftDC ];
+ const PRED4_NO_LEFT: &[PredType4x4] = &[ PredType4x4::Ver, PredType4x4::TopDC ];
+ const PRED4_FULL: &[PredType4x4] = &[
+ PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC,
+ PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight,
+ PredType4x4::VerRight, PredType4x4::HorDown,
+ PredType4x4::VerLeft, PredType4x4::HorUp
+ ];
+ const PRED4_FULL_NO_LD: &[PredType4x4] = &[
+ PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC,
+ PredType4x4::DiagDownLeftNoDown, PredType4x4::DiagDownRight,
+ PredType4x4::VerRight, PredType4x4::HorDown,
+ PredType4x4::VerLeftNoDown, PredType4x4::HorUpNoDown
+ ];
+
+ if self.best_dist < rdm.good_enough {
+ return;
+ }
+ be.set_mb_type(MBType::Intra);
+
+ let (tr_y, tr_u, tr_v) = if self.has_tr {
+ let mut tr_y = [0; 4];
+ let mut tr_u = [0; 4];
+ let mut tr_v = [0; 4];
+ tr_y.copy_from_slice(&self.top_y[self.mb_x * 16 + 16 + 1..][..4]);
+ tr_u.copy_from_slice(&self.top_u[self.mb_x * 8 + 8 + 1..][..4]);
+ tr_v.copy_from_slice(&self.top_v[self.mb_x * 8 + 8 + 1..][..4]);
+ (tr_y, tr_u, tr_v)
+ } else {
+ ([self.ipred_y.top[16]; 4], [self.ipred_u.top[8]; 4], [self.ipred_v.top[8]; 4])
+ };
+ let mut ipred4 = BlockIntra4Pred::new(&self.ipred_y, &self.ipred_u, &self.ipred_v, tr_y, tr_u, tr_v, self.has_left);
+
+ let q_ac = self.q;
+ let (cq_dc, cq_ac) = chroma_quants(self.q);
+ let mut tot_dist = 0;
+ let mut tot_bits = be.estimate_mb_hdr(&[]);
+ let mut modes = [PredType4x4::DC; 16];
+ let mut tblk = Block::new();
+ let mut has_t = self.has_top;
+
+ for y in 0..4 {
+ let mut has_l = self.has_left;
+ let mut has_ld = has_l && y != 3;
+ for x in 0..4 {
+ let list = match (has_l, has_t) {
+ (true, true) if has_ld => PRED4_FULL,
+ (true, true) => PRED4_FULL_NO_LD,
+ (false, true) => PRED4_NO_LEFT,
+ (true, false) => PRED4_NO_TOP,
+ _ => PRED4_DEF,
+ };
+
+ let do_chroma = ((x & 1) == 0) && ((y & 1) == 0);
+
+ let mut best_mode = PRED4_DEF[0];
+ let mut best_cdist = std::u32::MAX;
+ let mut best_dist = 0;
+ let mut best_bits = 0;
+ for &try_mode in list.iter() {
+ ipred4.pred_block(&mut self.mb.cand_blk, x, y, try_mode);
+ let off = x * 4 + y * 4 * 16;
+ let (mut cur_dist, mut cur_bits) = Self::blk4_diff(&self.mb.cand_blk.y[off..], &self.mb.ref_blk.y[off..], 16, q_ac, q_ac, be);
+ if do_chroma {
+ let off = x * 2 + y * 2 * 8;
+ let (du, bu) = Self::blk4_diff(&self.mb.cand_blk.u[off..], &self.mb.ref_blk.u[off..], 8, cq_dc, cq_ac, be);
+ let (dv, bv) = Self::blk4_diff(&self.mb.cand_blk.v[off..], &self.mb.ref_blk.v[off..], 8, cq_dc, cq_ac, be);
+ cur_dist += du + dv;
+ cur_bits += bu + bv;
+ }
+
+ let cand_dist = rdm.get_metric(cur_bits, cur_dist);
+ if cand_dist < best_cdist {
+ best_cdist = cand_dist;
+ best_mode = try_mode;
+ best_dist = cur_dist;
+ best_bits = cur_bits;
+ }
+ }
+
+ ipred4.pred_block(&mut self.mb.cand_blk, x, y, best_mode);
+
+ let off = x * 4 + y * 4 * 16;
+ tblk.from_diff(&self.mb.ref_blk.y[off..], &self.mb.cand_blk.y[off..], 16);
+ tblk.transform_4x4();
+ tblk.quant(q_ac, q_ac);
+ self.mb.tmp_tx[x + y * 4] = tblk;
+ if !tblk.is_empty() {
+ tblk.dequant(q_ac, q_ac);
+ tblk.itransform_4x4();
+ tblk.add_to(&mut self.mb.cand_blk.y[off..], 16);
+ }
+ if do_chroma {
+ let off = x * 2 + y * 2 * 8;
+ let mut dests = [&mut self.mb.cand_blk.u[off..], &mut self.mb.cand_blk.v[off..]];
+ let sources = [&self.mb.ref_blk.u[off..], &self.mb.ref_blk.v[off..]];
+ for (comp, (dblk, &sblk)) in dests.iter_mut().zip(sources.iter()).enumerate() {
+ tblk.from_diff(sblk, dblk, 8);
+ tblk.transform_4x4();
+ tblk.quant(cq_dc, cq_ac);
+ self.mb.tmp_tx[16 + comp * 4 + x / 2 + y] = tblk;
+ if !tblk.is_empty() {
+ tblk.dequant(cq_dc, cq_ac);
+ tblk.itransform_4x4();
+ tblk.add_to(dblk, 8);
+ }
+ }
+ }
+
+ ipred4.update_from(&self.mb.cand_blk, x, y);
+
+ tot_dist += best_dist;
+ tot_bits += best_bits;
+
+ let cand_dist = rdm.get_metric(tot_bits, tot_dist);
+ if cand_dist > self.best_dist {
+ return;
+ }
+
+ modes[x + y * 4] = best_mode;
+
+ has_l = true;
+ has_ld = false;
+ }
+ has_t = true;
+ }
+
+ mbstate.set_ipred4x4(self.mb_x, self.mb_y, &modes);
+
+ if !self.has_top {
+ let mut code = 0usize;
+ for &el in modes[..4].iter() {
+ code = code * 2 + if el.to_index() == 0 { 0 } else { 1 };
+ }
+ tot_bits += u32::from(RV40_AIC_TOP_BITS[code]);
+ }
+
+ let ystart = if self.has_top { 0 } else { 1 };
+ for y in ystart..4 {
+ let mut x = 0;
+ while x < 4 {
+ let (lctx, tctx, trctx) = mbstate.get_ipred4x4_ctx(self.mb_x, self.mb_y, x, y);
+ let ctx_word = if x < 3 {
+ ((trctx & 0xF) as u16) + (((tctx & 0xF) as u16) << 4) + (((lctx & 0xF) as u16) << 8)
+ } else { 0xFFF };
+ if let Some(idx) = RV40_AIC_PATTERNS.iter().position(|&x| x == ctx_word) {
+ let code = modes[x + y * 4].to_index() * 9 + modes[x + y * 4 + 1].to_index();
+ tot_bits += u32::from(RV40_AIC_MODE2_BITS[idx][code as usize]);
+ x += 2;
+ } else if tctx != -1 && lctx != -1 {
+ let idx = (tctx + lctx * 10) as usize;
+ let code = modes[x + y * 4].to_index() as usize;
+ tot_bits += u32::from(RV40_AIC_MODE1_BITS[idx][code]);
+ x += 1;
+ } else {
+ match lctx {
+ -1 if tctx < 2 => tot_bits += 1,
+ 0 | 2 => tot_bits += 1,
+ _ => {},
+ };
+ x += 1;
+ }
+ }
+ }
+
+ let cand_dist = rdm.get_metric(tot_bits, tot_dist);
+ if cand_dist < self.best_dist {
+ self.best_dist = cand_dist;
+ self.best_mbt = MacroblockType::Intra4x4(modes);
+ self.best_bits = tot_bits;
+ self.best_coef.copy_from_slice(&self.mb.tmp_tx);
+ self.best_blk.copy_from(&self.mb.cand_blk);
+ }
+ }
+ pub fn get_est_bits(&self) -> u32 { self.best_bits }
+ pub fn get_macroblock(&mut self) -> Macroblock {
+ let mut coeffs = [Block::new(); 25];
+ if !self.best_mbt.is_skip() {
+ coeffs.copy_from_slice(&self.best_coef);
+ }
+ Macroblock {
+ mb_type: self.best_mbt.clone(),
+ coeffs,
+ }
+ }
+ pub fn recon_mb(&mut self, dst: &mut NASimpleVideoFrame<u8>) {
+ let src_mb = &self.best_blk;
+ SingleMacroblock::put_mb(dst, src_mb, self.mb_x, self.mb_y);
+
+ self.top_y[self.mb_x * 16 + 1..][..16].copy_from_slice(&src_mb.y[15 * 16..]);
+ self.top_u[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.u[7 * 8..]);
+ self.top_v[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.v[7 * 8..]);
+
+ self.ipred_y.top[0] = self.ipred_y.top[16];
+ self.ipred_y.left[0] = self.ipred_y.top[0];
+ self.ipred_u.top[0] = self.ipred_u.top[8];
+ self.ipred_u.left[0] = self.ipred_u.top[0];
+ self.ipred_v.top[0] = self.ipred_v.top[8];
+ self.ipred_v.left[0] = self.ipred_v.top[0];
+
+ for (left, src) in self.ipred_y.left[1..].iter_mut().zip(src_mb.y.chunks_exact(16)) {
+ *left = src[15];
+ }
+ for (left, src) in self.ipred_u.left[1..9].iter_mut().zip(src_mb.u.chunks_exact(8)) {
+ *left = src[7];
+ }
+ for (left, src) in self.ipred_v.left[1..9].iter_mut().zip(src_mb.v.chunks_exact(8)) {
+ *left = src[7];
+ }
+ }
+ fn blk4_diff(pred: &[u8], refsrc: &[u8], stride: usize, q_dc: usize, q_ac: usize, be: &mut BitsEstimator) -> (u32, u32) {
+ let mut blk = Block::new();
+ blk.from_diff(refsrc, pred, stride);
+ blk.transform_4x4();
+ blk.quant(q_dc, q_ac);
+ let bits = be.block_bits(&blk, 0);
+ if !blk.is_empty() {
+ blk.dequant(q_dc, q_ac);
+ blk.itransform_4x4();
+ }
+ let mut dist = 0u32;
+ for (diffs, (pred, refsrc)) in blk.coeffs.chunks(4).zip(pred.chunks(stride).zip(refsrc.chunks(stride))) {
+ for (&diff, (&p, &r)) in diffs.iter().zip(pred.iter().zip(refsrc.iter())) {
+ let new = (i32::from(p) + i32::from(diff)).max(0).min(255);
+ let expected = i32::from(r);
+ dist += ((new - expected) * (new - expected)) as u32;
+ }
+ }
+ (dist, bits)
+ }
+}
--- /dev/null
+use std::collections::VecDeque;
+use std::str::FromStr;
+
+use nihav_core::codecs::*;
+use nihav_core::io::byteio::*;
+use nihav_core::io::bitwriter::*;
+
+mod bitstream;
+use bitstream::*;
+
+mod dsp;
+use dsp::loop_filter_frame;
+
+mod estimator;
+use estimator::*;
+
+mod mb_coding;
+use mb_coding::*;
+
+mod motion_est;
+use motion_est::*;
+
+mod ratectl;
+use ratectl::*;
+pub use ratectl::RateDistMetric;
+
+mod types;
+pub use types::*;
+
+const DEBUG_BIT_FRAMENO: u8 = 0;
+const DEBUG_BIT_SLICE_SIZE: u8 = 1;
+const DEBUG_BIT_PSNR: u8 = 2;
+const DEBUG_BIT_RATECTL: u8 = 3;
+const DEBUG_FLAG_BITS: &[(&str, u8)] = &[
+ ("frameno", DEBUG_BIT_FRAMENO),
+ ("slicesize", DEBUG_BIT_SLICE_SIZE),
+ ("psnr", DEBUG_BIT_PSNR),
+ ("rc", DEBUG_BIT_RATECTL),
+];
+
+#[derive(Clone,Copy,Default)]
+struct DebugFlags {
+ flags: u32,
+}
+
+impl DebugFlags {
+ fn new() -> Self { Self::default() }
+ fn is_set(self, bit: u8) -> bool { (self.flags & (1 << bit)) != 0 }
+ fn parse(&mut self, args: &str) {
+ self.flags = 0;
+ for arg in args.split('+') {
+ for &(name, bit) in DEBUG_FLAG_BITS.iter() {
+ if name == arg {
+ self.flags += 1 << bit;
+ }
+ }
+ }
+ }
+}
+
+impl std::fmt::Display for DebugFlags {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ let mut flags = String::new();
+ let mut first = true;
+ for &(name, bit) in DEBUG_FLAG_BITS.iter() {
+ if self.is_set(bit) {
+ if !first {
+ flags.push('+');
+ }
+ flags.push_str(name);
+ first = false;
+ }
+ }
+ write!(f, "{}", flags)
+ }
+}
+
+struct StaticFrameOrder {
+ groups: Vec<(FrameType, usize)>,
+ start: bool,
+ cur_grp: usize,
+ cur_frm: usize,
+}
+
+impl StaticFrameOrder {
+ /*fn new() -> Self {
+ Self {
+ groups: vec![(FrameType::I, 0)],
+ start: true,
+ cur_grp: 0,
+ cur_frm: 0,
+ }
+ }*/
+ fn get_max_grp_len(&self) -> usize {
+ let mut g_len = 1;
+ for &(_, num_b) in self.groups.iter() {
+ g_len = g_len.max(1 + num_b);
+ }
+ g_len
+ }
+ fn peek_next_frame(&self) -> (FrameType, usize) {
+ if !self.start {
+ let grp = &self.groups[self.cur_grp];
+ if self.cur_frm == 0 {
+ (grp.0, grp.1)
+ } else {
+ (FrameType::B, 0)
+ }
+ } else {
+ (FrameType::I, 0)
+ }
+ }
+ fn next_frame(&mut self) -> FrameType {
+ if !self.start {
+ let grp = &self.groups[self.cur_grp];
+ let frm_type = if self.cur_frm == 0 {
+ grp.0
+ } else {
+ FrameType::B
+ };
+ self.cur_frm += 1;
+ if self.cur_frm > grp.1 {
+ self.cur_frm = 0;
+ self.cur_grp += 1;
+ if self.cur_grp >= self.groups.len() {
+ self.cur_grp = 0;
+ }
+ }
+ frm_type
+ } else {
+ self.start = false;
+ self.cur_grp = if self.groups.len() > 1 { 1 } else { 0 };
+ self.cur_frm = 0;
+ FrameType::I
+ }
+ }
+}
+
+impl std::fmt::Display for StaticFrameOrder {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ let mut seq = String::with_capacity(self.groups.len() * 2);
+ for &(ftype, brun) in self.groups.iter() {
+ seq.push(match ftype {
+ FrameType::I => 'I',
+ _ => 'P',
+ });
+ for _ in 0..brun {
+ seq.push('B');
+ }
+ }
+ write!(f, "{}", seq)
+ }
+}
+
+struct DynamicFrameOrder {
+ cur_ft: FrameType,
+ next_ft: FrameType,
+ p_count: usize,
+}
+
+const NUM_GOP_KF: usize = 8;
+
+impl DynamicFrameOrder {
+ fn new() -> Self {
+ Self {
+ cur_ft: FrameType::I,
+ next_ft: FrameType::Other,
+ p_count: 0,
+ }
+ }
+ fn peek_next_frame(&self) -> (FrameType, usize) {
+ (self.cur_ft, if self.cur_ft == FrameType::Other || self.next_ft == FrameType::B { 1 } else { 0 })
+ }
+ fn next_frame(&mut self) -> FrameType {
+ if self.cur_ft == FrameType::P {
+ self.p_count += 1;
+ if self.p_count >= NUM_GOP_KF {
+ self.cur_ft = FrameType::I;
+ self.p_count = 0;
+ }
+ }
+ let next = self.cur_ft;
+ self.cur_ft = self.next_ft;
+ self.next_ft = if self.cur_ft != FrameType::B { FrameType::Other } else { FrameType::P };
+ next
+ }
+ fn update(&mut self, ftype: FrameType) {
+ if self.cur_ft == FrameType::Other {
+ self.cur_ft = ftype;
+ if self.cur_ft == FrameType::B {
+ self.cur_ft = FrameType::P;
+ self.next_ft = FrameType::B;
+ } else {
+ self.next_ft = FrameType::Other;
+ }
+ }
+ }
+}
+
+impl std::fmt::Display for DynamicFrameOrder {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "dynamic")
+ }
+}
+
+enum FrameOrder {
+ Static(StaticFrameOrder),
+ Dynamic(DynamicFrameOrder),
+}
+
+impl FrameOrder {
+ fn new() -> Self {
+ FrameOrder::Dynamic(DynamicFrameOrder::new())
+ }
+ fn get_max_grp_len(&self) -> usize {
+ match self {
+ FrameOrder::Static(ref order) => order.get_max_grp_len(),
+ FrameOrder::Dynamic(ref _order) => 2,
+ }
+ }
+ fn peek_next_frame(&self) -> (FrameType, usize) {
+ match self {
+ FrameOrder::Static(ref order) => order.peek_next_frame(),
+ FrameOrder::Dynamic(ref order) => order.peek_next_frame(),
+ }
+ }
+ fn next_frame(&mut self) -> FrameType {
+ match self {
+ FrameOrder::Static(ref mut order) => order.next_frame(),
+ FrameOrder::Dynamic(ref mut order) => order.next_frame(),
+ }
+ }
+ fn update(&mut self, ftype: FrameType) {
+ if let FrameOrder::Dynamic(ref mut order) = self {
+ order.update(ftype);
+ }
+ }
+ fn is_dynamic(&self) -> bool { matches!(self, FrameOrder::Dynamic(_)) }
+}
+
+impl std::fmt::Display for FrameOrder {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match *self {
+ FrameOrder::Static(ref order) => order.fmt(f),
+ FrameOrder::Dynamic(ref order) => order.fmt(f),
+ }
+ }
+}
+
+#[derive(Clone,Copy,Debug)]
+enum ParseError {
+ TooShort,
+ TooLong,
+ InvalidValue,
+ InvalidCombination,
+}
+
+impl FromStr for FrameOrder {
+ type Err = ParseError;
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ if s == "dynamic" {
+ return Ok(FrameOrder::Dynamic(DynamicFrameOrder::new()));
+ }
+ let mut ftypes = Vec::new();
+ for ch in s.bytes() {
+ match ch {
+ b'I' | b'i' => ftypes.push(FrameType::I),
+ b'P' | b'p' => ftypes.push(FrameType::P),
+ b'B' | b'b' => ftypes.push(FrameType::B),
+ b' ' | b',' => {},
+ _ => return Err(ParseError::InvalidValue),
+ };
+ if ftypes.len() > 16 {
+ return Err(ParseError::TooLong);
+ }
+ }
+ if ftypes.is_empty() {
+ return Err(ParseError::TooShort);
+ }
+ if ftypes[0] != FrameType::I {
+ return Err(ParseError::InvalidCombination);
+ }
+
+ let mut groups = Vec::new();
+ let mut cur_ftype = ftypes[0];
+ let mut cur_run = 0;
+ for &ft in ftypes[1..].iter() {
+ match ft {
+ FrameType::I | FrameType::P => {
+ groups.push((cur_ftype, cur_run));
+ cur_ftype = ft;
+ cur_run = 0;
+ },
+ _ => {
+ cur_run += 1;
+ if cur_run > 4 {
+ return Err(ParseError::InvalidCombination);
+ }
+ },
+ };
+ }
+ groups.push((cur_ftype, cur_run));
+
+ Ok(FrameOrder::Static(StaticFrameOrder{
+ groups,
+ start: true,
+ cur_grp: 0,
+ cur_frm: 0,
+ }))
+ }
+}
+
+struct RV40Encoder {
+ stream: Option<NAStreamRef>,
+ vinfo: NAVideoInfo,
+ width: usize,
+ height: usize,
+ mb_w: usize,
+ mb_h: usize,
+
+ slice_bits: u32,
+ deblock: bool,
+ force_set: Option<usize>,
+
+ fce: FrameComplexityEstimate,
+ mbd: MacroblockDecider,
+ order: FrameOrder,
+ brc: BitRateControl,
+ rdm: RateDistMetric,
+ be: BitsEstimator,
+ me: MotionEstimator,
+ cset: CodeSets,
+
+ sstate: SliceState,
+ mbstate: MBState,
+ mbs: VecDeque<Macroblock>,
+ dblk: Vec<DeblockInfo>,
+
+ qframes: Vec<NAFrame>,
+ frm_pool: NAVideoBufferPool<u8>,
+ ref_p: NAVideoBufferRef<u8>,
+ ref_n: NAVideoBufferRef<u8>,
+ pkts: VecDeque<NAPacket>,
+
+ p_pts: u64,
+ n_pts: u64,
+ last_k_ts: u64,
+ last_b_ts: u64,
+
+ needs_alloc: bool,
+ max_grp_bufs: usize,
+
+ debug_log: DebugFlags,
+
+ refine_b: bool,
+ i4_in_b: bool,
+}
+
+impl RV40Encoder {
+ fn new() -> Self {
+ let vinfo = NAVideoInfo::new(24, 24, false, YUV420_FORMAT);
+ let vt = alloc_video_buffer(vinfo, 4).unwrap();
+ let ref_p = vt.get_vbuf().unwrap();
+ let vt = alloc_video_buffer(vinfo, 4).unwrap();
+ let ref_n = vt.get_vbuf().unwrap();
+ Self {
+ stream: None,
+ vinfo,
+ width: 0,
+ height: 0,
+ mb_w: 0,
+ mb_h: 0,
+
+ slice_bits: 10000,
+ deblock: true,
+ force_set: None,
+
+ fce: FrameComplexityEstimate::new(),
+ mbd: MacroblockDecider::new(),
+ order: FrameOrder::new(),
+ brc: BitRateControl::new(),
+ rdm: RateDistMetric::new(),
+ be: BitsEstimator::new(),
+ me: MotionEstimator::new(),
+ cset: CodeSets::new(),
+
+ sstate: SliceState::new(),
+ mbstate: MBState::new(),
+ mbs: VecDeque::new(),
+ dblk: Vec::new(),
+
+ qframes: Vec::new(),
+ frm_pool: NAVideoBufferPool::new(0),
+ pkts: VecDeque::new(),
+ ref_p, ref_n,
+
+ p_pts: 0,
+ n_pts: 0,
+ last_k_ts: 0,
+ last_b_ts: 0,
+
+ needs_alloc: true,
+ max_grp_bufs: 0,
+
+ debug_log: DebugFlags::new(),
+
+ refine_b: false,
+ i4_in_b: false,
+ }
+ }
+ fn encode_frame(&mut self, frm: NAFrame, frameno: usize) -> EncoderResult<NAPacket> {
+ let ftype = self.order.next_frame();
+ let buf = frm.get_buffer();
+
+ let tinfo = frm.get_time_information();
+ let pts = NATimeInfo::ts_to_time(tinfo.pts.unwrap_or(0), 1000, tinfo.tb_num, tinfo.tb_den);
+ let fpts = (pts & 0x1FFF) as u32;
+
+ let ts_diff = if ftype == FrameType::B {
+ pts.saturating_sub(self.last_k_ts.min(self.last_b_ts)) as u32
+ } else {
+ let diff = pts.saturating_sub(self.last_k_ts) as u32;
+ diff / ((frameno + 1) as u32)
+ };
+
+ if self.debug_log.is_set(DEBUG_BIT_FRAMENO) {
+ println!("encode frame type {} pts {}", ftype, pts);
+ }
+ let is_ref_frame = matches!(ftype, FrameType::I | FrameType::P);
+
+ let tr_d = (self.n_pts - self.p_pts) as u32;
+ let tr_b = (pts - self.p_pts) as u32;
+ if !is_ref_frame {
+ self.mbd.set_b_distance(tr_b, tr_d);
+ }
+
+ let mut rvbuf = if let Some(nfrm) = self.frm_pool.get_free() {
+ nfrm
+ } else {
+ return Err(EncoderError::AllocError);
+ };
+ let mut recon_frm = NASimpleVideoFrame::from_video_buf(&mut rvbuf).unwrap();
+
+ self.be.set_frame_type(ftype);
+ if let Some(ref vbuf) = buf.get_vbuf() {
+ let src = vbuf.get_data();
+
+ if self.brc.rate_ctl_in_use() || self.order.is_dynamic() {
+ self.fce.set_current(vbuf);
+ }
+
+ let complexity = if self.brc.rate_ctl_in_use() {
+ self.fce.get_complexity(ftype)
+ } else { 0 };
+
+ self.mbd.q = self.brc.get_quant(ftype, complexity);
+ self.brc.init_metric(ftype, &mut self.rdm);
+ self.be.set_quant(self.mbd.q);
+ if self.debug_log.is_set(DEBUG_BIT_RATECTL) {
+ println!(" expected frame size {}", self.brc.get_target_size(ftype));
+ println!(" target quantiser {} lambda {} thresholds {} / {}", self.brc.get_last_quant(ftype), self.rdm.lambda, self.rdm.good_enough, self.rdm.p_split_thr);
+ }
+
+ let mut nslices = 0;
+ let mut dvec = Vec::new();
+ let mut mb_idx = 0;
+ let mut slice_starts = Vec::new();
+ let num_mbs = self.mb_w * self.mb_h;
+ while mb_idx < num_mbs {
+ slice_starts.push(dvec.len());
+ let mut bw = BitWriter::new(dvec, BitWriterMode::BE);
+ let slice_start_mb = mb_idx;
+
+ self.mbstate.reset();
+
+ let mut est_bits = 0;
+ while est_bits < self.slice_bits && mb_idx < num_mbs {
+ let mb_x = mb_idx % self.mb_w;
+ let mb_y = mb_idx / self.mb_w;
+ self.sstate.has_t = mb_idx >= slice_start_mb + self.mb_w;
+ self.sstate.has_l = (mb_idx > slice_start_mb) && (mb_x > 0);
+ self.sstate.has_tl = (mb_idx > slice_start_mb + self.mb_w) && (mb_x > 0);
+ self.sstate.has_tr = (mb_idx >= slice_start_mb + self.mb_w - 1) && (mb_x + 1 < self.mb_w);
+ self.sstate.mb_x = mb_x;
+ self.sstate.mb_y = mb_y;
+
+ let offsets = [
+ vbuf.get_offset(0) + mb_x * 16 + mb_y * 16 * vbuf.get_stride(0),
+ vbuf.get_offset(1) + mb_x * 8 + mb_y * 8 * vbuf.get_stride(1),
+ vbuf.get_offset(2) + mb_x * 8 + mb_y * 8 * vbuf.get_stride(2),
+ ];
+ let strides = [vbuf.get_stride(0), vbuf.get_stride(1), vbuf.get_stride(2)];
+ self.mbd.load_mb(src, offsets, strides, &self.sstate);
+
+ self.be.set_pred_mb_type(self.mbstate.get_pred_mbtype(&self.sstate, ftype == FrameType::B));
+ if ftype == FrameType::B {
+ self.mbd.try_b_coding(&self.ref_p, &self.ref_n, &mut self.be, &mut self.me, &self.rdm, &self.mbstate, self.refine_b);
+ }
+ if ftype == FrameType::P {
+ self.mbd.try_p_coding(&self.ref_n, &mut self.be, &mut self.me, &self.rdm, &self.mbstate);
+ }
+ self.mbd.try_intra_16_pred(&mut self.be, &self.rdm);
+ if ftype != FrameType::B || self.i4_in_b {
+ self.mbd.try_intra_4x4_pred(&mut self.be, &self.rdm, &mut self.mbstate);
+ }
+
+ let mb = self.mbd.get_macroblock();
+ est_bits += self.mbd.get_est_bits();
+ self.mbd.recon_mb(&mut recon_frm);
+ self.mbstate.update(&mb.mb_type, mb_x, mb_y);
+
+ if self.deblock {
+ self.dblk[mb_idx].q = self.mbd.q as u8;
+ if ftype == FrameType::I {
+ self.dblk[mb_idx].is_strong = true;
+ self.dblk[mb_idx].cbp_y = 0xFFFF;
+ self.dblk[mb_idx].cbp_c = 0xFF;
+ } else {
+ self.dblk[mb_idx].is_strong = mb.mb_type.is_intra() || mb.mb_type.is_16();
+ let mut cbp = 0u16;
+ let mut mask = 1;
+ for blk in mb.coeffs[..16].iter() {
+ if !blk.is_empty() {
+ cbp |= mask;
+ }
+ mask <<= 1;
+ }
+ self.dblk[mb_idx].cbp_y = cbp;
+ let mut cbp = 0u8;
+ let mut mask = 1;
+ for blk in mb.coeffs[16..24].iter() {
+ if !blk.is_empty() {
+ cbp |= mask;
+ }
+ mask <<= 1;
+ }
+ self.dblk[mb_idx].cbp_c = cbp;
+ }
+ self.mbstate.fill_deblock(&mut self.dblk[mb_idx], &self.sstate);
+ }
+
+ self.mbs.push_back(mb);
+
+ mb_idx += 1;
+ }
+
+ let set_idx = if let Some(idx) = self.force_set {
+ idx
+ } else {
+ let mut hist = [0usize; 17];
+ for mb in self.mbs.iter() {
+ let blocks = if mb.mb_type.is_16() { &mb.coeffs } else { &mb.coeffs[..24] };
+
+ for blk in blocks.iter() {
+ let nz = blk.count_nz();
+ for el in hist[nz..].iter_mut() {
+ *el += 1;
+ }
+ }
+ }
+ BitsEstimator::decide_set(&hist)
+ };
+
+ let start_bits = bw.tell();
+ write_slice_header(&mut bw, ftype, self.mbd.q, set_idx, self.deblock, fpts);
+ if ftype == FrameType::I {
+ write_slice_dimensions(&mut bw, self.width, self.height);
+ } else {
+ bw.write1(); // keep dimensions flag
+ }
+ write_slice_mb_idx(&mut bw, slice_start_mb, num_mbs);
+
+ mb_idx = slice_start_mb;
+ let mut skip_count = 0;
+ self.cset.init(self.mbd.q, set_idx);
+ while let Some(mb) = self.mbs.pop_front() {
+ if bw.tell() > start_bits + (self.slice_bits as usize) {
+ break;
+ }
+ let mb_x = mb_idx % self.mb_w;
+ let mb_y = mb_idx / self.mb_w;
+ self.sstate.has_t = mb_idx >= slice_start_mb + self.mb_w;
+ self.sstate.has_l = (mb_idx > slice_start_mb) && (mb_x > 0);
+ self.sstate.has_tl = (mb_idx > slice_start_mb + self.mb_w) && (mb_x > 0);
+ self.sstate.has_tr = (mb_idx >= slice_start_mb + self.mb_w - 1) && (mb_x + 1 < self.mb_w);
+ self.sstate.mb_x = mb_x;
+ self.sstate.mb_y = mb_y;
+ if mb.mb_type.is_skip() {
+ skip_count += 1;
+ } else {
+ if skip_count > 0 {
+ write_skip_count(&mut bw, skip_count);
+ skip_count = 0;
+ } else if ftype != FrameType::I {
+ bw.write1(); // zero skip count
+ }
+
+ write_mb_header(&mut bw, ftype, &self.sstate, &self.mbstate);
+ self.cset.set_params(&mb.mb_type);
+ self.cset.write_coeffs(&mut bw, &mb.coeffs);
+ }
+ mb_idx += 1;
+ }
+ self.mbs.clear();
+ if skip_count > 0 {
+ write_skip_count(&mut bw, skip_count);
+ }
+ while (bw.tell() & 7) != 0 {
+ bw.write0();
+ }
+ if self.debug_log.is_set(DEBUG_BIT_SLICE_SIZE) {
+ println!(" slice {}..{} wrote {} bits / estimated {} bits", slice_start_mb, mb_idx, bw.tell(), est_bits);
+ }
+ dvec = bw.end();
+ nslices += 1;
+ }
+ for _ in 0..(nslices * 8 + 1) {
+ dvec.insert(0, 0);
+ }
+ dvec[0] = (nslices - 1) as u8;
+ for (i, &off) in slice_starts.iter().enumerate() {
+ dvec[i * 8 + 4] = 1;
+ write_u32be(&mut dvec[i * 8 + 5..], off as u32)?;
+ }
+ if self.debug_log.is_set(DEBUG_BIT_RATECTL) {
+ println!(" got frame size {}", dvec.len());
+ }
+
+ if is_ref_frame && self.deblock {
+ loop_filter_frame(&mut recon_frm, &self.dblk, self.mb_w, self.mb_h);
+ }
+
+ if self.debug_log.is_set(DEBUG_BIT_PSNR) {
+ let psnr = calc_psnr(&vbuf, &rvbuf);
+ println!(" encoded frame PSNR {} size {}", psnr, dvec.len());
+ }
+
+ if is_ref_frame {
+ std::mem::swap(&mut self.ref_p, &mut self.ref_n);
+ self.ref_n = rvbuf;
+
+ self.p_pts = self.n_pts;
+ self.n_pts = pts;
+
+ self.mbstate.swap_mvs();
+ }
+
+ if is_ref_frame {
+ if self.last_k_ts > self.last_b_ts {
+ self.last_b_ts = self.last_k_ts;
+ }
+ self.last_k_ts = pts;
+ self.fce.update_ref();
+ } else {
+ self.last_b_ts = pts;
+ }
+
+ self.brc.update_stats(ftype, dvec.len(), ts_diff);
+
+ Ok(NAPacket::new(self.stream.clone().unwrap(), frm.ts, ftype == FrameType::I, dvec))
+ } else {
+ Err(EncoderError::InvalidParameters)
+ }
+ }
+}
+
+fn calc_psnr(pic1: &NAVideoBuffer<u8>, pic2: &NAVideoBuffer<u8>) -> f64 {
+ let data1 = pic1.get_data();
+ let data2 = pic2.get_data();
+ let mut sum = 0u64;
+ let mut size = 0;
+ for comp in 0..3 {
+ let (w, h) = pic1.get_dimensions(comp);
+ size += w * h;
+ for (line1, line2) in data1[pic1.get_offset(comp)..].chunks(pic1.get_stride(comp)).zip(
+ data2[pic2.get_offset(comp)..].chunks(pic2.get_stride(comp))).take(h) {
+ for (&pix1, &pix2) in line1[..w].iter().zip(line2.iter()) {
+ let diff = (i32::from(pix1) - i32::from(pix2)).abs() as u32;
+ sum += u64::from(diff * diff);
+ }
+ }
+ }
+ if size > 0 {
+ 48.13080360867910341240 - 10.0 * ((sum as f64) / (size as f64)).log10()
+ } else {
+ std::f64::INFINITY
+ }
+}
+
+impl NAEncoder for RV40Encoder {
+ fn negotiate_format(&self, encinfo: &EncodeParameters) -> EncoderResult<EncodeParameters> {
+ match encinfo.format {
+ NACodecTypeInfo::None => {
+ Ok(EncodeParameters {
+ format: NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, YUV420_FORMAT)),
+ ..Default::default() })
+ },
+ NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError),
+ NACodecTypeInfo::Video(vinfo) => {
+ let outinfo = NAVideoInfo::new((vinfo.width + 15) & !15, (vinfo.height + 15) & !15, false, YUV420_FORMAT);
+ let mut ofmt = *encinfo;
+ ofmt.format = NACodecTypeInfo::Video(outinfo);
+ Ok(ofmt)
+ }
+ }
+ }
+ fn get_capabilities(&self) -> u64 { 0 }
+ fn init(&mut self, stream_id: u32, encinfo: EncodeParameters) -> EncoderResult<NAStreamRef> {
+ match encinfo.format {
+ NACodecTypeInfo::None => Err(EncoderError::FormatError),
+ NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError),
+ NACodecTypeInfo::Video(vinfo) => {
+ if vinfo.format != YUV420_FORMAT {
+ return Err(EncoderError::FormatError);
+ }
+ if ((vinfo.width | vinfo.height) & 15) != 0 {
+ return Err(EncoderError::FormatError);
+ }
+ if (vinfo.width | vinfo.height) >= (1 << 12) {
+ return Err(EncoderError::FormatError);
+ }
+
+ // 32-bit flags (VBR, bframes, slices, something else) and 32-bit version
+ let edata = vec![0x01, 0x08, 0x10, 0x20, 0x40, 0x00, 0x80, 0x00];
+ let out_info = NAVideoInfo::new(vinfo.width, vinfo.height, false, vinfo.format);
+ let info = NACodecInfo::new("realvideo4", NACodecTypeInfo::Video(out_info), Some(edata));
+ let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den, 0);
+ stream.set_num(stream_id as usize);
+ let stream = stream.into_ref();
+
+ self.stream = Some(stream.clone());
+
+ self.width = vinfo.width;
+ self.height = vinfo.height;
+ self.mb_w = (vinfo.width + 15) >> 4;
+ self.mb_h = (vinfo.height + 15) >> 4;
+
+ if self.mb_w * self.mb_h > 9216 {
+ return Err(EncoderError::FormatError);
+ }
+
+ if (1..=100u8).contains(&encinfo.quality) {
+ self.brc.set_force_quality(Some(encinfo.quality));
+ } else {
+ self.brc.set_force_quality(None);
+ }
+ self.brc.set_bitrate(encinfo.bitrate);
+
+ self.vinfo = out_info;
+ let max_frames = self.order.get_max_grp_len();
+ self.frm_pool.set_dec_bufs(max_frames + 3);
+ self.max_grp_bufs = max_frames;
+ self.needs_alloc = true;
+
+ self.fce.resize(self.width, self.height);
+ self.mbstate.resize(self.mb_w, self.mb_h);
+ self.mbd.resize(self.mb_w);
+ self.dblk.resize(self.mb_w * self.mb_h, DeblockInfo::default());
+
+ Ok(stream)
+ },
+ }
+ }
+ fn encode(&mut self, frm: &NAFrame) -> EncoderResult<()> {
+ if self.needs_alloc {
+ self.frm_pool.prealloc_video(self.vinfo, 4)?;
+ self.ref_n = self.frm_pool.get_free().unwrap();
+ self.ref_p = self.frm_pool.get_free().unwrap();
+ self.needs_alloc = false;
+ }
+ if let Some(ref vbuf) = frm.get_buffer().get_vbuf() {
+ if let Some(dbuf) = self.frm_pool.get_copy(vbuf) {
+ let newfrm = NAFrame::new(frm.ts, frm.frame_type, frm.key, frm.get_info(), NABufferType::Video(dbuf));
+ self.qframes.push(newfrm);
+
+ loop {
+ let (mut ftype, mut frame_pos) = self.order.peek_next_frame();
+ if frame_pos >= self.qframes.len() {
+ break;
+ }
+
+ if ftype == FrameType::Other {
+ if self.qframes.len() < 2 {
+ return Err(EncoderError::Bug);
+ }
+ if let (Some(ref frm1), Some(ref frm2)) = (self.qframes[0].get_buffer().get_vbuf(), self.qframes[1].get_buffer().get_vbuf()) {
+ let is_b = self.fce.decide_b_frame(frm1, frm2);
+ ftype = if is_b {
+ frame_pos = 1;
+ FrameType::B
+ } else {
+ frame_pos = 0;
+ FrameType::P
+ };
+ } else {
+ return Err(EncoderError::Bug);
+ }
+ self.order.update(ftype);
+ }
+
+ let frm = self.qframes.remove(frame_pos);
+ let pkt = self.encode_frame(frm, frame_pos)?;
+ self.pkts.push_back(pkt);
+ }
+ Ok(())
+ } else {
+ Err(EncoderError::AllocError)
+ }
+ } else {
+ Err(EncoderError::FormatError)
+ }
+ }
+ fn get_packet(&mut self) -> EncoderResult<Option<NAPacket>> {
+ Ok(self.pkts.pop_front())
+ }
+ fn flush(&mut self) -> EncoderResult<()> {
+ Ok(())
+ }
+}
+
+const DEBUG_LOG_OPTION: &str = "debug";
+const SLICE_SIZE_OPTION: &str = "slice_size";
+const FRAME_ORDER_OPTION: &str = "frame_order";
+const DEBLOCK_OPTION: &str = "loop_filt";
+const QUANT_OPTION: &str = "quant";
+const QUALITY_OPTION: &str = "quality";
+const SET_OPTION: &str = "coding_set";
+const SEARCH_MODE_OPTION: &str = "me_mode";
+const SEARCH_RANGE_OPTION: &str = "me_range";
+const SEARCH_THR_OPTION: &str = "me_thr";
+const B_REFINE_OPTION: &str = "refine_b";
+const I4_IN_B_OPTION: &str = "i4_in_b";
+const B_OFFSET_OPTION: &str = "b_offset";
+
+const ENCODER_OPTS: &[NAOptionDefinition] = &[
+ NAOptionDefinition {
+ name: DEBUG_LOG_OPTION, description: "debug flags",
+ opt_type: NAOptionDefinitionType::String(None) },
+ NAOptionDefinition {
+ name: SLICE_SIZE_OPTION, description: "soft slice size limit in bits",
+ opt_type: NAOptionDefinitionType::Int(Some(4096), Some(100000)) },
+ NAOptionDefinition {
+ name: FRAME_ORDER_OPTION, description: "frame order (e.g. IBBPBB)",
+ opt_type: NAOptionDefinitionType::String(None) },
+ NAOptionDefinition {
+ name: DEBLOCK_OPTION, description: "in-loop filter",
+ opt_type: NAOptionDefinitionType::Bool },
+ NAOptionDefinition {
+ name: QUANT_OPTION, description: "force quantiser (-1 = none)",
+ opt_type: NAOptionDefinitionType::Int(Some(-1), Some(31)) },
+ NAOptionDefinition {
+ name: QUALITY_OPTION, description: "force quality (-1 = none)",
+ opt_type: NAOptionDefinitionType::Int(Some(-1), Some(100)) },
+ NAOptionDefinition {
+ name: SET_OPTION, description: "force coding set (-1 = none)",
+ opt_type: NAOptionDefinitionType::Int(Some(-1), Some(3)) },
+ NAOptionDefinition {
+ name: SEARCH_MODE_OPTION, description: "motion search mode",
+ opt_type: NAOptionDefinitionType::String(Some(MVSearchMode::get_possible_modes())) },
+ NAOptionDefinition {
+ name: SEARCH_RANGE_OPTION, description: "motion search range",
+ opt_type: NAOptionDefinitionType::Int(Some(0), Some(256)) },
+ NAOptionDefinition {
+ name: SEARCH_THR_OPTION, description: "motion search cut-off threshold",
+ opt_type: NAOptionDefinitionType::Int(Some(0), Some(1048576)) },
+ NAOptionDefinition {
+ name: B_REFINE_OPTION, description: "better ME for B-frames",
+ opt_type: NAOptionDefinitionType::Bool },
+ NAOptionDefinition {
+ name: I4_IN_B_OPTION, description: "allow intra 4x4 coding in B-frames",
+ opt_type: NAOptionDefinitionType::Bool },
+ NAOptionDefinition {
+ name: B_OFFSET_OPTION, description: "B-frame quantiser offset",
+ opt_type: NAOptionDefinitionType::Int(Some(0), Some(16)) },
+];
+
+impl NAOptionHandler for RV40Encoder {
+ fn get_supported_options(&self) -> &[NAOptionDefinition] { ENCODER_OPTS }
+ fn set_options(&mut self, options: &[NAOption]) {
+ for option in options.iter() {
+ for opt_def in ENCODER_OPTS.iter() {
+ if opt_def.check(option).is_ok() {
+ match option.name {
+ DEBUG_LOG_OPTION => {
+ if let NAValue::String(ref strval) = option.value {
+ self.debug_log.parse(strval);
+ }
+ },
+ SLICE_SIZE_OPTION => {
+ if let NAValue::Int(intval) = option.value {
+ self.slice_bits = intval as u32;
+ }
+ },
+ FRAME_ORDER_OPTION => {
+ if let NAValue::String(ref strval) = option.value {
+ if let Ok(norder) = strval.parse::<FrameOrder>() {
+ self.order = norder;
+ let max_frames = self.order.get_max_grp_len();
+ if max_frames > self.max_grp_bufs {
+ self.frm_pool.set_dec_bufs(max_frames + 3);
+ self.needs_alloc = true;
+ self.max_grp_bufs = max_frames;
+ }
+ } else {
+ println!("Invalid order sequence");
+ }
+ }
+ },
+ DEBLOCK_OPTION => {
+ if let NAValue::Bool(val) = option.value {
+ self.deblock = val;
+ }
+ },
+ QUANT_OPTION => {
+ if let NAValue::Int(val) = option.value {
+ if val != -1 {
+ self.brc.set_force_quant(Some(val as usize));
+ } else {
+ self.brc.set_force_quant(None);
+ }
+ }
+ },
+ QUALITY_OPTION => {
+ if let NAValue::Int(val) = option.value {
+ if val != -1 {
+ self.brc.set_force_quality(Some(val as u8));
+ } else {
+ self.brc.set_force_quality(None);
+ }
+ }
+ },
+ SET_OPTION => {
+ if let NAValue::Int(val) = option.value {
+ self.force_set = if val != -1 { Some(val as usize) } else { None };
+ }
+ },
+ SEARCH_MODE_OPTION => {
+ if let NAValue::String(ref strval) = option.value {
+ if let Ok(mmode) = strval.parse::<MVSearchMode>() {
+ self.me.set_mode(mmode);
+ } else {
+ println!("Invalid mode");
+ }
+ }
+ },
+ SEARCH_RANGE_OPTION => {
+ if let NAValue::Int(val) = option.value {
+ self.me.range = val as i16;
+ }
+ },
+ SEARCH_THR_OPTION => {
+ if let NAValue::Int(val) = option.value {
+ self.me.thresh = val as u32;
+ }
+ },
+ B_REFINE_OPTION => {
+ if let NAValue::Bool(val) = option.value {
+ self.refine_b = val;
+ }
+ },
+ I4_IN_B_OPTION => {
+ if let NAValue::Bool(val) = option.value {
+ self.i4_in_b = val;
+ }
+ },
+ B_OFFSET_OPTION => {
+ if let NAValue::Int(val) = option.value {
+ self.brc.b_offset = val as usize;
+ }
+ },
+ _ => {},
+ };
+ }
+ }
+ }
+ }
+ fn query_option_value(&self, name: &str) -> Option<NAValue> {
+ match name {
+ DEBUG_LOG_OPTION => Some(NAValue::String(self.debug_log.to_string())),
+ SLICE_SIZE_OPTION => Some(NAValue::Int(self.slice_bits as i64)),
+ FRAME_ORDER_OPTION => Some(NAValue::String(self.order.to_string())),
+ DEBLOCK_OPTION => Some(NAValue::Bool(self.deblock)),
+ QUANT_OPTION => Some(NAValue::Int(self.brc.get_force_quant().into())),
+ QUALITY_OPTION => Some(NAValue::Int(self.brc.get_force_quality().into())),
+ SET_OPTION => Some(NAValue::Int(if let Some(set) = self.force_set { set as i64 } else { -1 })),
+ SEARCH_MODE_OPTION => Some(NAValue::String(self.me.get_mode().to_string())),
+ SEARCH_THR_OPTION => Some(NAValue::Int(self.me.thresh.into())),
+ SEARCH_RANGE_OPTION => Some(NAValue::Int(self.me.range.into())),
+ B_REFINE_OPTION => Some(NAValue::Bool(self.refine_b)),
+ I4_IN_B_OPTION => Some(NAValue::Bool(self.i4_in_b)),
+ B_OFFSET_OPTION => Some(NAValue::Int(self.brc.b_offset as i64)),
+ _ => None,
+ }
+ }
+}
+
+pub fn get_encoder() -> Box<dyn NAEncoder + Send> {
+ Box::new(RV40Encoder::new())
+}
+
+#[cfg(test)]
+mod test {
+ use nihav_core::codecs::*;
+ use nihav_core::demuxers::*;
+ use nihav_core::muxers::*;
+ use crate::*;
+ use nihav_codec_support::test::enc_video::*;
+ use nihav_commonfmt::*;
+
+ #[allow(unused_variables)]
+ fn encode_test(out_name: &'static str, enc_options: &[NAOption], limit: Option<u64>, hash: &[u32; 4]) {
+ let mut dmx_reg = RegisteredDemuxers::new();
+ generic_register_all_demuxers(&mut dmx_reg);
+ let mut dec_reg = RegisteredDecoders::new();
+ generic_register_all_decoders(&mut dec_reg);
+ let mut mux_reg = RegisteredMuxers::new();
+ realmedia_register_all_muxers(&mut mux_reg);
+ let mut enc_reg = RegisteredEncoders::new();
+ realmedia_register_all_encoders(&mut enc_reg);
+
+ // sample from private collection
+ let dec_config = DecoderTestParams {
+ demuxer: "yuv4mpeg",
+ in_name: "assets/day3b.y4m",
+ stream_type: StreamType::Video,
+ limit,
+ dmx_reg, dec_reg,
+ };
+ let enc_config = EncoderTestParams {
+ muxer: "realmedia",
+ enc_name: "realvideo4",
+ out_name,
+ mux_reg, enc_reg,
+ };
+ let dst_vinfo = NAVideoInfo {
+ width: 0,
+ height: 0,
+ format: YUV420_FORMAT,
+ flipped: false,
+ bits: 12,
+ };
+ let enc_params = EncodeParameters {
+ format: NACodecTypeInfo::Video(dst_vinfo),
+ quality: 0,
+ bitrate: 300000,
+ tb_num: 0,
+ tb_den: 0,
+ flags: 0,
+ };
+ //test_encoding_to_file(&dec_config, &enc_config, enc_params, enc_options);
+ test_encoding_md5(&dec_config, &enc_config, enc_params, enc_options,
+ hash);
+ }
+ #[test]
+ fn test_rv40_encoder_simple() {
+ let enc_options = &[
+ NAOption { name: super::FRAME_ORDER_OPTION, value: NAValue::String("I".to_owned()) },
+ NAOption { name: super::DEBLOCK_OPTION, value: NAValue::Bool(false) },
+ NAOption { name: super::QUANT_OPTION, value: NAValue::Int(17) },
+ NAOption { name: super::SEARCH_MODE_OPTION, value: NAValue::String("diamond".to_owned()) },
+ ];
+ encode_test("rv40simple.rmvb", enc_options, Some(10), &[0x03b0d743, 0x36c20dbb, 0x18fa1c9e, 0x4b2b7324]);
+ }
+ #[test]
+ fn test_rv40_encoder_ipb() {
+ let enc_options = &[
+ NAOption { name: super::FRAME_ORDER_OPTION, value: NAValue::String("IBPB".to_owned()) },
+ NAOption { name: super::DEBLOCK_OPTION, value: NAValue::Bool(true) },
+ NAOption { name: super::QUANT_OPTION, value: NAValue::Int(17) },
+ NAOption { name: super::SEARCH_MODE_OPTION, value: NAValue::String("hexagon".to_owned()) },
+ ];
+ encode_test("rv40ipb.rmvb", enc_options, Some(8), &[0xc382ab0b, 0xbcfbb02a, 0xf12a064f, 0xe6a5c2c3]);
+ }
+ #[test]
+ fn test_rv40_encoder_advanced() {
+ let enc_options = &[
+ NAOption { name: super::FRAME_ORDER_OPTION, value: NAValue::String("dynamic".to_owned()) },
+ NAOption { name: super::DEBLOCK_OPTION, value: NAValue::Bool(true) },
+ NAOption { name: super::SEARCH_MODE_OPTION, value: NAValue::String("umh".to_owned()) },
+ ];
+ encode_test("rv40adv.rmvb", enc_options, Some(8), &[0xc4395f49, 0x0536d5f0, 0x32406834, 0xb7b634be]);
+ }
+}
--- /dev/null
+use nihav_core::frame::*;
+use nihav_codec_support::codecs::{MV, ZERO_MV};
+use std::str::FromStr;
+use super::dsp::{RefMBData, luma_mc, chroma_mc};
+
+#[derive(Clone,Copy,PartialEq)]
+pub enum MVSearchMode {
+ Dummy,
+ Diamond,
+ Hexagon,
+ UMH,
+}
+
+impl MVSearchMode {
+ pub const fn get_possible_modes() -> &'static [&'static str] {
+ &["diamond", "hexagon", "umh"]
+ }
+ fn create(self) -> Box<dyn MVSearch+Send> {
+ match self {
+ MVSearchMode::Dummy => Box::new(DummySearcher{}),
+ MVSearchMode::Diamond => Box::new(DiaSearch::new()),
+ MVSearchMode::Hexagon => Box::new(HexSearch::new()),
+ MVSearchMode::UMH => Box::new(UnevenHexSearch::new()),
+ }
+ }
+}
+
+impl Default for MVSearchMode {
+ fn default() -> Self { MVSearchMode::Hexagon }
+}
+
+impl std::fmt::Display for MVSearchMode {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ match *self {
+ MVSearchMode::Diamond => write!(f, "diamond"),
+ MVSearchMode::Hexagon => write!(f, "hexagon"),
+ MVSearchMode::UMH => write!(f, "umh"),
+ MVSearchMode::Dummy => write!(f, "dummy"),
+ }
+ }
+}
+
+impl FromStr for MVSearchMode {
+ type Err = ();
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ match s {
+ "diamond" => Ok(MVSearchMode::Diamond),
+ "hexagon" => Ok(MVSearchMode::Hexagon),
+ "umh" => Ok(MVSearchMode::UMH),
+ "dummy" => Ok(MVSearchMode::Dummy),
+ _ => Err(()),
+ }
+ }
+}
+
+const MAX_DIST: u32 = std::u32::MAX;
+const DIST_THRESH: u32 = 256;
+
+trait FromPixels {
+ fn from_pixels(self) -> Self;
+}
+
+impl FromPixels for MV {
+ fn from_pixels(self) -> MV {
+ MV { x: self.x * 4, y: self.y * 4 }
+ }
+}
+
+const DIA_PATTERN: [MV; 9] = [
+ ZERO_MV,
+ MV {x: -2, y: 0},
+ MV {x: -1, y: 1},
+ MV {x: 0, y: 2},
+ MV {x: 1, y: 1},
+ MV {x: 2, y: 0},
+ MV {x: 1, y: -1},
+ MV {x: 0, y: -2},
+ MV {x: -1, y: -1}
+];
+
+const HEX_PATTERN: [MV; 7] = [
+ ZERO_MV,
+ MV {x: -2, y: 0},
+ MV {x: -1, y: 2},
+ MV {x: 1, y: 2},
+ MV {x: 2, y: 0},
+ MV {x: 1, y: -2},
+ MV {x: -1, y: -2}
+];
+
+const REFINEMENT: [MV; 4] = [
+ MV {x: -1, y: 0},
+ MV {x: 0, y: 1},
+ MV {x: 1, y: 0},
+ MV {x: 0, y: -1}
+];
+
+macro_rules! search_template {
+ ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident, $threshold: expr) => ({
+ search_template!($self, $mv_est, $cur_blk, $mb_x, $mb_y, $sad_func, $threshold, ZERO_MV, MAX_DIST, true)
+ });
+ ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident, $threshold: expr, $start_mv: expr, $best_dist: expr, $fullpel_stage: expr) => ({
+ let mut best_dist = $best_dist;
+ let mut best_mv = $start_mv;
+
+ let mut min_dist;
+ let mut min_idx;
+
+ if $fullpel_stage {
+ $self.reset();
+ loop {
+ let mut cur_best_dist = best_dist;
+ for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) {
+ if *dist == MAX_DIST {
+ *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist);
+ cur_best_dist = cur_best_dist.min(*dist);
+ if *dist <= $threshold {
+ break;
+ }
+ }
+ }
+ min_dist = $self.dist[0];
+ min_idx = 0;
+ for (i, &dist) in $self.dist.iter().enumerate().skip(1) {
+ if dist < min_dist {
+ min_dist = dist;
+ min_idx = i;
+ if dist <= $threshold {
+ break;
+ }
+ }
+ }
+ if min_dist <= $threshold || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range {
+ break;
+ }
+ best_dist = min_dist;
+ $self.update($self.steps[min_idx]);
+ }
+ best_dist = min_dist;
+ best_mv = $self.point[min_idx];
+ if best_dist <= $threshold {
+ return (best_mv.from_pixels(), best_dist);
+ }
+ for &step in REFINEMENT.iter() {
+ let mv = best_mv + step;
+ let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST);
+ if best_dist > dist {
+ best_dist = dist;
+ best_mv = mv;
+ }
+ }
+ best_mv = best_mv.from_pixels();
+ if best_dist <= $threshold {
+ return (best_mv, best_dist);
+ }
+ }
+
+ // subpel refinement
+ $self.set_new_point(best_mv, best_dist);
+ loop {
+ let mut cur_best_dist = best_dist;
+ for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) {
+ if *dist == MAX_DIST {
+ *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point, cur_best_dist);
+ cur_best_dist = cur_best_dist.min(*dist);
+ if *dist <= $threshold {
+ break;
+ }
+ }
+ }
+ min_dist = $self.dist[0];
+ min_idx = 0;
+ for (i, &dist) in $self.dist.iter().enumerate().skip(1) {
+ if dist < min_dist {
+ min_dist = dist;
+ min_idx = i;
+ if dist <= $threshold {
+ break;
+ }
+ }
+ }
+ if min_dist <= $threshold || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 8 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 8 {
+ break;
+ }
+ best_dist = min_dist;
+ $self.update($self.steps[min_idx]);
+ }
+ best_dist = min_dist;
+ best_mv = $self.point[min_idx];
+ if best_dist <= $threshold {
+ return (best_mv, best_dist);
+ }
+ for &step in REFINEMENT.iter() {
+ let mv = best_mv + step;
+ let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv, MAX_DIST);
+ if best_dist > dist {
+ best_dist = dist;
+ best_mv = mv;
+ }
+ }
+ (best_mv, best_dist)
+ });
+}
+
+macro_rules! pattern_search {
+ ($struct_name: ident, $patterns: expr) => {
+ pub struct $struct_name {
+ point: [MV; $patterns.len()],
+ dist: [u32; $patterns.len()],
+ steps: &'static [MV; $patterns.len()],
+ }
+
+ impl $struct_name {
+ pub fn new() -> Self {
+ Self {
+ point: $patterns,
+ dist: [MAX_DIST; $patterns.len()],
+ steps: &$patterns,
+ }
+ }
+ fn reset(&mut self) {
+ self.point = $patterns;
+ self.dist = [MAX_DIST; $patterns.len()];
+ }
+ fn set_new_point(&mut self, start: MV, dist: u32) {
+ for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) {
+ *dst = src + start;
+ }
+ self.dist = [MAX_DIST; $patterns.len()];
+ self.dist[0] = dist;
+ }
+ fn update(&mut self, step: MV) {
+ let mut new_point = self.point;
+ let mut new_dist = [MAX_DIST; $patterns.len()];
+
+ for point in new_point.iter_mut() {
+ *point += step;
+ }
+
+ for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) {
+ for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) {
+ if *new_point == old_point {
+ *new_dist = old_dist;
+ break;
+ }
+ }
+ }
+ self.point = new_point;
+ self.dist = new_dist;
+ }
+ }
+
+ impl MVSearch for $struct_name {
+ fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &RefMBData, mb_x: usize, mb_y: usize, _cand_mvs: &[MV]) -> (MV, u32) {
+ search_template!(self, mv_est, cur_mb, mb_x, mb_y, sad_mb, DIST_THRESH)
+ }
+ fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &RefMBData, xpos: usize, ypos: usize, _cand_mvs: &[MV]) -> (MV, u32) {
+ search_template!(self, mv_est, ref_blk, xpos, ypos, sad_blk8, DIST_THRESH / 4)
+ }
+ }
+ }
+}
+
+pattern_search!(DiaSearch, DIA_PATTERN);
+pattern_search!(HexSearch, HEX_PATTERN);
+
+const LARGE_HEX_PATTERN: [MV; 16] = [
+ MV { x: -4, y: 0 },
+ MV { x: -4, y: 1 },
+ MV { x: -4, y: 2 },
+ MV { x: -2, y: 3 },
+ MV { x: 0, y: 4 },
+ MV { x: 2, y: 3 },
+ MV { x: 4, y: 2 },
+ MV { x: 4, y: 1 },
+ MV { x: 4, y: 0 },
+ MV { x: 4, y: -1 },
+ MV { x: 4, y: -2 },
+ MV { x: -2, y: -3 },
+ MV { x: 0, y: -4 },
+ MV { x: -2, y: -3 },
+ MV { x: -4, y: -2 },
+ MV { x: -4, y: -1 }
+];
+
+const UNSYMM_CROSS: [MV; 4] = [
+ MV { x: -2, y: 0 },
+ MV { x: 0, y: 1 },
+ MV { x: 2, y: 0 },
+ MV { x: 0, y: -1 }
+];
+
+#[derive(Default)]
+struct UniqueSet<T:Copy+Default> {
+ list: [T; 16],
+ count: usize,
+}
+
+impl<T:Copy+Default+PartialEq> UniqueSet<T> {
+ fn new() -> Self { Self::default() }
+ fn clear(&mut self) { self.count = 0; }
+ fn get_list(&self) -> &[T] { &self.list[..self.count] }
+ fn add(&mut self, val: T) {
+ if self.count < self.list.len() && !self.get_list().contains(&val) {
+ self.list[self.count] = val;
+ self.count += 1;
+ }
+ }
+}
+
+trait MVOps {
+ fn scale(self, scale: i16) -> Self;
+ fn is_in_range(self, range: i16) -> bool;
+}
+
+impl MVOps for MV {
+ fn scale(self, scale: i16) -> MV {
+ MV { x: self.x * scale, y: self.y * scale }
+ }
+ fn is_in_range(self, range: i16) -> bool {
+ self.x.abs() <= range && self.y.abs() <= range
+ }
+}
+
+macro_rules! single_search_step {
+ ($start:expr, $best_dist:expr, $mv_est:expr, $sad_func:ident, $ref_blk:expr, $xpos:expr, $ypos:expr, $pattern:expr, $scale:expr, $dist_thr:expr) => {{
+ let mut best_mv = $start;
+ let mut best_dist = $best_dist;
+ for point in $pattern.iter() {
+ let mv = point.scale($scale) + $start;
+ if !mv.is_in_range($mv_est.mv_range * 4) {
+ continue;
+ }
+ let dist = $mv_est.$sad_func($ref_blk, $xpos, $ypos, mv, best_dist);
+ if dist < best_dist {
+ best_mv = mv;
+ best_dist = dist;
+ if best_dist < $dist_thr {
+ break;
+ }
+ }
+ }
+ (best_mv, best_dist, best_mv != $start)
+ }}
+}
+
+struct UnevenHexSearch {
+ mv_list: UniqueSet<MV>,
+}
+
+impl UnevenHexSearch {
+ fn new() -> Self {
+ Self {
+ mv_list: UniqueSet::new(),
+ }
+ }
+ fn get_cand_mv(&mut self, cand_mvs: &[MV]) -> MV {
+ self.mv_list.clear();
+ for &mv in cand_mvs.iter() {
+ self.mv_list.add(mv);
+ }
+ match self.mv_list.count {
+ 1 => self.mv_list.list[0],
+ 3 => MV::pred(self.mv_list.list[0], self.mv_list.list[1], self.mv_list.list[2]),
+ _ => {
+ let sum = self.mv_list.get_list().iter().fold((0i32, 0i32),
+ |acc, mv| (acc.0 + i32::from(mv.x), acc.1 + i32::from(mv.y)));
+ MV {x: (sum.0 / (self.mv_list.count as i32)) as i16,
+ y: (sum.1 / (self.mv_list.count as i32)) as i16}
+ },
+ }
+ }
+}
+
+macro_rules! umh_search_template {
+ ($cand_mv:expr, $cutoff:expr, $mv_est:expr, $sad_func:ident, $ref_blk:expr, $xpos:expr, $ypos:expr) => {{
+ let cand_mv = $cand_mv;
+ let best_dist = $mv_est.$sad_func($ref_blk, $xpos, $ypos, cand_mv, MAX_DIST);
+ if best_dist < $cutoff {
+ return (cand_mv, best_dist);
+ }
+
+ // step 1 - small refinement search
+ let (mut cand_mv, mut best_dist, _) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, DIA_PATTERN, 1, $cutoff);
+ if best_dist < $cutoff {
+ return (cand_mv, best_dist);
+ }
+
+ // step 2 - unsymmetrical cross search
+ loop {
+ let (mv, dist, changed) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, UNSYMM_CROSS, 4, $cutoff);
+ if !changed {
+ break;
+ }
+ cand_mv = mv;
+ best_dist = dist;
+ if best_dist < $cutoff {
+ return (mv, dist);
+ }
+ }
+
+ // step 3 - multi-hexagon grid search
+ let mut scale = 4;
+ while scale > 0 {
+ let (mv, dist, changed) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, LARGE_HEX_PATTERN, scale, $cutoff);
+ if !changed {
+ break;
+ }
+ cand_mv = mv;
+ best_dist = dist;
+ if best_dist < $cutoff {
+ return (mv, dist);
+ }
+ scale >>= 1;
+ }
+ // step 4 - final hexagon search
+ let (cand_mv, best_dist, _) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, HEX_PATTERN, 1, $cutoff);
+ if best_dist > $cutoff {
+ let (mv, dist, _) = single_search_step!(cand_mv, best_dist, $mv_est, $sad_func, $ref_blk, $xpos, $ypos, DIA_PATTERN, 1, $cutoff);
+ (mv, dist)
+ } else {
+ (cand_mv, best_dist)
+ }
+ }}
+}
+
+impl MVSearch for UnevenHexSearch {
+ fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_mb: &RefMBData, mb_x: usize, mb_y: usize, cand_mvs: &[MV]) -> (MV, u32) {
+ let cand_mv = self.get_cand_mv(cand_mvs);
+ let cutoff = mv_est.cutoff_thr;
+ umh_search_template!(cand_mv, cutoff, mv_est, sad_mb, cur_mb, mb_x, mb_y)
+ }
+ fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &RefMBData, xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32) {
+ let cand_mv = self.get_cand_mv(cand_mvs);
+ let cutoff = mv_est.cutoff_thr / 4;
+ umh_search_template!(cand_mv, cutoff, mv_est, sad_blk8, ref_blk, xpos, ypos)
+ }
+}
+
+struct MVEstimator<'a> {
+ pic: &'a NAVideoBuffer<u8>,
+ mv_range: i16,
+ cutoff_thr: u32,
+}
+
+macro_rules! sad {
+ ($src1:expr, $src2:expr) => {
+ $src1.iter().zip($src2.iter()).fold(0u32, |acc, (&a, &b)|
+ acc + (((i32::from(a) - i32::from(b)) * (i32::from(a) - i32::from(b))) as u32))
+ }
+}
+
+impl<'a> MVEstimator<'a> {
+ fn sad_mb(&self, ref_mb: &RefMBData, mb_x: usize, mb_y: usize, mv: MV, cur_best_dist: u32) -> u32 {
+ let mut dst = RefMBData::new();
+ luma_mc(&mut dst.y, 16, self.pic, mb_x * 16, mb_y * 16, mv, true);
+
+ let mut dist = 0;
+ for (dline, sline) in dst.y.chunks(16).zip(ref_mb.y.chunks(16)) {
+ dist += sad!(dline, sline);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ }
+ chroma_mc(&mut dst.u, 8, self.pic, mb_x * 8, mb_y * 8, 1, mv, true);
+ dist += sad!(dst.u, ref_mb.u);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ chroma_mc(&mut dst.v, 8, self.pic, mb_x * 8, mb_y * 8, 2, mv, true);
+ dist += sad!(dst.v, ref_mb.v);
+
+ dist
+ }
+ fn sad_blk8(&self, ref_mb: &RefMBData, xpos: usize, ypos: usize, mv: MV, cur_best_dist: u32) -> u32 {
+ let mut cur_y = [0; 64];
+ let mut cur_u = [0; 16];
+ let mut cur_v = [0; 16];
+
+ let mut dist = 0;
+
+ let y_off = (xpos & 8) + (ypos & 8) * 16;
+ luma_mc(&mut cur_y, 8, self.pic, xpos, ypos, mv, false);
+ for (dline, sline) in cur_y.chunks(8).zip(ref_mb.y[y_off..].chunks(16)) {
+ dist += sad!(dline, sline);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ }
+
+ let c_off = (xpos & 8) / 2 + (ypos & 8) * 4;
+ chroma_mc(&mut cur_u, 4, self.pic, xpos / 2, ypos / 2, 1, mv, false);
+ for (dline, sline) in cur_u.chunks(4).zip(ref_mb.u[c_off..].chunks(8)) {
+ dist += sad!(dline, sline);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ }
+ chroma_mc(&mut cur_v, 4, self.pic, xpos / 2, ypos / 2, 2, mv, false);
+ for (dline, sline) in cur_v.chunks(4).zip(ref_mb.v[c_off..].chunks(8)) {
+ dist += sad!(dline, sline);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ }
+
+ dist
+ }
+}
+
+trait MVSearch {
+ fn search_mb(&mut self, mv_est: &mut MVEstimator, ref_mb: &RefMBData, mb_x: usize, mb_y: usize, cand_mvs: &[MV]) -> (MV, u32);
+ fn search_blk8(&mut self, mv_est: &mut MVEstimator, ref_blk: &RefMBData, xpos: usize, ypos: usize, cand_mvs: &[MV]) -> (MV, u32);
+}
+
+struct DummySearcher {}
+
+impl MVSearch for DummySearcher {
+ fn search_mb(&mut self, _mv_est: &mut MVEstimator, _ref_mb: &RefMBData, _mb_x: usize, _mb_y: usize, _cand_mvs: &[MV]) -> (MV, u32) {
+ (ZERO_MV, std::u32::MAX / 2)
+ }
+ fn search_blk8(&mut self, _mv_est: &mut MVEstimator, _ref_mb: &RefMBData, _xpos: usize, _ypos: usize, _cand_mvs: &[MV]) -> (MV, u32) {
+ (ZERO_MV, std::u32::MAX / 2)
+ }
+}
+
+pub struct MotionEstimator {
+ pub range: i16,
+ pub thresh: u32,
+ mode: MVSearchMode,
+ srch: Box<dyn MVSearch+Send>,
+}
+
+impl MotionEstimator {
+ pub fn new() -> Self {
+ let mode = MVSearchMode::default();
+ Self {
+ range: 64,
+ thresh: 32,
+ mode,
+ srch: mode.create(),
+ }
+ }
+ pub fn get_mode(&self) -> MVSearchMode { self.mode }
+ pub fn set_mode(&mut self, new_mode: MVSearchMode) {
+ if self.mode != new_mode {
+ self.mode = new_mode;
+ self.srch = self.mode.create();
+ }
+ }
+ pub fn search_mb_p(&mut self, pic: &NAVideoBuffer<u8>, refmb: &RefMBData, mb_x: usize, mb_y: usize, cand_mvs: &[MV]) -> (MV, u32) {
+ let mut mv_est = MVEstimator {
+ mv_range: self.range,
+ cutoff_thr: self.thresh,
+ pic,
+ };
+ self.srch.search_mb(&mut mv_est, refmb, mb_x, mb_y, cand_mvs)
+ }
+ pub fn search_blk8(&mut self, pic: &NAVideoBuffer<u8>, refmb: &RefMBData, xoff: usize, yoff: usize, cand_mvs: &[MV]) -> (MV, u32) {
+ let mut mv_est = MVEstimator {
+ mv_range: self.range,
+ cutoff_thr: self.thresh,
+ pic,
+ };
+ self.srch.search_blk8(&mut mv_est, refmb, xoff, yoff, cand_mvs)
+ }
+}
+
+pub struct SearchB<'a> {
+ ref_p: &'a NAVideoBuffer<u8>,
+ ref_n: &'a NAVideoBuffer<u8>,
+ xpos: usize,
+ ypos: usize,
+ ratios: [u32; 2],
+ tmp1: RefMBData,
+ tmp2: RefMBData,
+ pred_blk: RefMBData,
+}
+
+impl<'a> SearchB<'a> {
+ pub fn new(ref_p: &'a NAVideoBuffer<u8>, ref_n: &'a NAVideoBuffer<u8>, mb_x: usize, mb_y: usize, ratios: [u32; 2]) -> Self {
+ Self {
+ ref_p, ref_n,
+ xpos: mb_x * 16,
+ ypos: mb_y * 16,
+ ratios,
+ tmp1: RefMBData::new(),
+ tmp2: RefMBData::new(),
+ pred_blk: RefMBData::new(),
+ }
+ }
+ pub fn search_mb(&mut self, ref_mb: &RefMBData, cand_mvs: [MV; 2]) -> (MV, MV) {
+ let mut best_cand = cand_mvs;
+ let mut best_dist = self.interp_b_dist(ref_mb, best_cand, MAX_DIST);
+
+ loop {
+ let mut improved = false;
+ for &fmv_add in DIA_PATTERN.iter() {
+ for &bmv_add in DIA_PATTERN.iter() {
+ let cand = [best_cand[0] + fmv_add.from_pixels(),
+ best_cand[1] + bmv_add.from_pixels()];
+ let dist = self.interp_b_dist(ref_mb, cand, best_dist);
+ if dist < best_dist {
+ best_dist = dist;
+ best_cand = cand;
+ improved = true;
+ }
+ }
+ }
+ if !improved {
+ break;
+ }
+ }
+
+ for &fmv_add in REFINEMENT.iter() {
+ for &bmv_add in REFINEMENT.iter() {
+ let cand = [best_cand[0] + fmv_add, best_cand[1] + bmv_add];
+ let dist = self.interp_b_dist(ref_mb, cand, best_dist);
+ if dist < best_dist {
+ best_dist = dist;
+ best_cand = cand;
+ }
+ }
+ }
+
+ (best_cand[0], best_cand[1])
+ }
+ fn interp_b_dist(&mut self, ref_mb: &RefMBData, cand_mv: [MV; 2], cur_best_dist: u32) -> u32 {
+ let [fmv, bmv] = cand_mv;
+ luma_mc(&mut self.tmp1.y, 16, self.ref_p, self.xpos, self.ypos, fmv, true);
+ chroma_mc(&mut self.tmp1.u, 8, self.ref_p, self.xpos / 2, self.ypos / 2, 1, fmv, true);
+ chroma_mc(&mut self.tmp1.v, 8, self.ref_p, self.xpos / 2, self.ypos / 2, 2, fmv, true);
+ luma_mc(&mut self.tmp2.y, 16, self.ref_n, self.xpos, self.ypos, bmv, true);
+ chroma_mc(&mut self.tmp2.u, 8, self.ref_n, self.xpos / 2, self.ypos / 2, 1, bmv, true);
+ chroma_mc(&mut self.tmp2.v, 8, self.ref_n, self.xpos / 2, self.ypos / 2, 2, bmv, true);
+ self.pred_blk.avg(&self.tmp1, self.ratios[0], &self.tmp2, self.ratios[1]);
+
+ let mut dist = 0;
+ for (dline, sline) in self.pred_blk.y.chunks(16).zip(ref_mb.y.chunks(16)) {
+ dist += sad!(dline, sline);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ }
+ dist += sad!(self.pred_blk.u, ref_mb.u);
+ if dist > cur_best_dist {
+ return dist;
+ }
+ dist += sad!(self.pred_blk.v, ref_mb.v);
+
+ dist
+ }
+}
+
+macro_rules! hadamard {
+ ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $d0:expr, $d1:expr, $d2:expr, $d3:expr) => {
+ let t0 = $s0 + $s1;
+ let t1 = $s0 - $s1;
+ let t2 = $s2 + $s3;
+ let t3 = $s2 - $s3;
+ $d0 = t0 + t2;
+ $d2 = t0 - t2;
+ $d1 = t1 + t3;
+ $d3 = t1 - t3;
+ }
+}
+
+pub struct FrameComplexityEstimate {
+ ref_frm: NAVideoBufferRef<u8>,
+ cur_frm: NAVideoBufferRef<u8>,
+ nxt_frm: NAVideoBufferRef<u8>,
+ width: usize,
+ height: usize,
+}
+
+impl FrameComplexityEstimate {
+ pub fn new() -> Self {
+ let vinfo = NAVideoInfo::new(24, 24, false, YUV420_FORMAT);
+ let vt = alloc_video_buffer(vinfo, 4).unwrap();
+ let buf = vt.get_vbuf().unwrap();
+ Self {
+ ref_frm: buf.clone(),
+ cur_frm: buf.clone(),
+ nxt_frm: buf,
+ width: 0,
+ height: 0,
+ }
+ }
+ pub fn resize(&mut self, width: usize, height: usize) {
+ if width != self.width || height != self.height {
+ self.width = width;
+ self.height = height;
+
+ let vinfo = NAVideoInfo::new(self.width / 2, self.height / 2, false, YUV420_FORMAT);
+ let vt = alloc_video_buffer(vinfo, 4).unwrap();
+ self.ref_frm = vt.get_vbuf().unwrap();
+ let frm = self.ref_frm.get_data_mut().unwrap();
+ for el in frm.iter_mut() {
+ *el = 0x80;
+ }
+ let vt = alloc_video_buffer(vinfo, 4).unwrap();
+ self.cur_frm = vt.get_vbuf().unwrap();
+ let vt = alloc_video_buffer(vinfo, 4).unwrap();
+ self.nxt_frm = vt.get_vbuf().unwrap();
+ }
+ }
+ pub fn set_current(&mut self, frm: &NAVideoBuffer<u8>) {
+ Self::downscale(&mut self.cur_frm, frm);
+ }
+ pub fn get_complexity(&self, ftype: FrameType) -> u32 {
+ match ftype {
+ FrameType::I => Self::calculate_i_cplx(&self.cur_frm),
+ FrameType::P => Self::calculate_mv_diff(&self.ref_frm, &self.cur_frm),
+ _ => 0,
+ }
+ }
+ pub fn decide_b_frame(&mut self, frm1: &NAVideoBuffer<u8>, frm2: &NAVideoBuffer<u8>) -> bool {
+ Self::downscale(&mut self.cur_frm, frm1);
+ Self::downscale(&mut self.nxt_frm, frm2);
+ let diff_ref_cur = Self::calculate_mv_diff(&self.ref_frm, &self.cur_frm);
+ let diff_cur_nxt = Self::calculate_mv_diff(&self.cur_frm, &self.nxt_frm);
+
+ // simple rule - if complexity ref->cur and cur->next is about the same this should be a B-frame
+ let ddiff = diff_ref_cur.max(diff_cur_nxt) - diff_ref_cur.min(diff_cur_nxt);
+ if ddiff < 256 {
+ true
+ } else {
+ let mut order = 0;
+ while (ddiff << order) < diff_ref_cur.min(diff_cur_nxt) {
+ order += 1;
+ }
+ order > 2
+ }
+ }
+ pub fn update_ref(&mut self) {
+ std::mem::swap(&mut self.ref_frm, &mut self.cur_frm);
+ }
+
+ fn add_mv(mb_x: usize, mb_y: usize, mv: MV) -> (usize, usize) {
+ (((mb_x * 16) as isize + (mv.x as isize)) as usize,
+ ((mb_y * 16) as isize + (mv.y as isize)) as usize)
+ }
+ fn calculate_i_cplx(frm: &NAVideoBuffer<u8>) -> u32 {
+ let (w, h) = frm.get_dimensions(0);
+ let src = frm.get_data();
+ let stride = frm.get_stride(0);
+ let mut sum = 0;
+ let mut offset = 0;
+ for y in (0..h).step_by(4) {
+ for x in (0..w).step_by(4) {
+ sum += Self::satd_i(src, offset + x, stride, x > 0, y > 0);
+ }
+ offset += stride * 4;
+ }
+ sum
+ }
+ fn calculate_mv_diff(ref_frm: &NAVideoBuffer<u8>, cur_frm: &NAVideoBuffer<u8>) -> u32 {
+ let (w, h) = ref_frm.get_dimensions(0);
+ let mut sum = 0;
+ for mb_y in 0..(h / 16) {
+ for mb_x in 0..(w / 16) {
+ sum += Self::satd_mb_diff(ref_frm, cur_frm, mb_x, mb_y);
+ }
+ }
+ sum
+ }
+ fn satd_mb_diff(ref_frm: &NAVideoBuffer<u8>, cur_frm: &NAVideoBuffer<u8>, mb_x: usize, mb_y: usize) -> u32 {
+ let mv = Self::search_mv(ref_frm, cur_frm, mb_x, mb_y);
+ let mut sum = 0;
+ let src0 = ref_frm.get_data();
+ let src1 = cur_frm.get_data();
+ let stride = ref_frm.get_stride(0);
+ let (src_x, src_y) = Self::add_mv(mb_x, mb_y, mv);
+ for y in (0..16).step_by(4) {
+ for x in (0..16).step_by(4) {
+ sum += Self::satd(&src0[src_x + x + (src_y + y) * stride..],
+ &src1[mb_x * 16 + x + (mb_y * 16 + y) * stride..],
+ stride);
+ }
+ }
+ sum
+ }
+ fn search_mv(ref_frm: &NAVideoBuffer<u8>, cur_frm: &NAVideoBuffer<u8>, mb_x: usize, mb_y: usize) -> MV {
+ let stride = ref_frm.get_stride(0);
+ let (w, h) = ref_frm.get_dimensions(0);
+ let (v_edge, h_edge) = (w - 16, h - 16);
+ let ref_src = ref_frm.get_data();
+ let cur_src = cur_frm.get_data();
+ let cur_src = &cur_src[mb_x * 16 + mb_y * 16 * stride..];
+
+ let mut best_mv = ZERO_MV;
+ let mut best_dist = Self::sad(cur_src, ref_src, mb_x, mb_y, stride, best_mv);
+ if best_dist == 0 {
+ return best_mv;
+ }
+
+ for step in (0..=2).rev() {
+ let mut changed = true;
+ while changed {
+ changed = false;
+ for &mv in DIA_PATTERN[1..].iter() {
+ let cand_mv = best_mv + mv.scale(1 << step);
+ let (cx, cy) = Self::add_mv(mb_x, mb_y, cand_mv);
+ if cx > v_edge || cy > h_edge {
+ continue;
+ }
+ let cand_dist = Self::sad(cur_src, ref_src, mb_x, mb_y, stride, cand_mv);
+ if cand_dist < best_dist {
+ best_dist = cand_dist;
+ best_mv = cand_mv;
+ if best_dist == 0 {
+ return best_mv;
+ }
+ changed = true;
+ }
+ }
+ }
+ }
+ best_mv
+ }
+ fn sad(cur_src: &[u8], src: &[u8], mb_x: usize, mb_y: usize, stride: usize, mv: MV) -> u32 {
+ let (src_x, src_y) = Self::add_mv(mb_x, mb_y, mv);
+ let mut sum = 0;
+ for (line1, line2) in cur_src.chunks(stride).zip(src[src_x + src_y * stride..].chunks(stride)).take(16) {
+ sum += line1[..16].iter().zip(line2[..16].iter()).fold(0u32,
+ |acc, (&a, &b)| acc + u32::from(a.max(b) - a.min(b)) * u32::from(a.max(b) - a.min(b)));
+ }
+ sum
+ }
+ fn satd_i(src: &[u8], mut offset: usize, stride: usize, has_left: bool, has_top: bool) -> u32 {
+ let mut diffs = [0; 16];
+ match (has_left, has_top) {
+ (true, true) => {
+ for row in diffs.chunks_exact_mut(4) {
+ let mut left = i16::from(src[offset - 1]);
+ let mut tl = i16::from(src[offset - stride - 1]);
+ for (x, dst) in row.iter_mut().enumerate() {
+ let cur = i16::from(src[offset + x]);
+ let top = i16::from(src[offset + x - stride]);
+
+ *dst = cur - (top + left + tl - top.min(left).min(tl) - top.max(left).max(tl));
+
+ left = cur;
+ tl = top;
+ }
+
+ offset += stride;
+ }
+ },
+ (true, false) => {
+ for (dst, (left, cur)) in diffs.chunks_exact_mut(4).zip(
+ src[offset - 1..].chunks(stride).zip(src[offset..].chunks(stride))) {
+ for (dst, (&left, &cur)) in dst.iter_mut().zip(left.iter().zip(cur.iter())) {
+ *dst = i16::from(cur) - i16::from(left);
+ }
+ }
+ },
+ (false, true) => {
+ for (dst, (top, cur)) in diffs.chunks_exact_mut(4).zip(
+ src[offset - stride..].chunks(stride).zip(src[offset..].chunks(stride))) {
+ for (dst, (&top, &cur)) in dst.iter_mut().zip(top.iter().zip(cur.iter())) {
+ *dst = i16::from(cur) - i16::from(top);
+ }
+ }
+ },
+ (false, false) => {
+ for (dst, src) in diffs.chunks_exact_mut(4).zip(src[offset..].chunks(stride)) {
+ for (dst, &src) in dst.iter_mut().zip(src.iter()) {
+ *dst = i16::from(src) - 128;
+ }
+ }
+ },
+ };
+ for row in diffs.chunks_exact_mut(4) {
+ hadamard!(row[0], row[1], row[2], row[3], row[0], row[1], row[2], row[3]);
+ }
+ for i in 0..4 {
+ hadamard!(diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12],
+ diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12]);
+ }
+ diffs.iter().fold(0u32, |acc, x| acc + (x.abs() as u32))
+ }
+ fn satd(src0: &[u8], src1: &[u8], stride: usize) -> u32 {
+ let mut diffs = [0; 16];
+ for (dst, (src0, src1)) in diffs.chunks_exact_mut(4).zip(
+ src0.chunks(stride).zip(src1.chunks(stride))) {
+ hadamard!(i16::from(src0[0]) - i16::from(src1[0]),
+ i16::from(src0[1]) - i16::from(src1[1]),
+ i16::from(src0[2]) - i16::from(src1[2]),
+ i16::from(src0[3]) - i16::from(src1[3]),
+ dst[0], dst[1], dst[2], dst[3]);
+ }
+ for i in 0..4 {
+ hadamard!(diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12],
+ diffs[i], diffs[i + 4], diffs[i + 8], diffs[i + 12]);
+ }
+ diffs.iter().fold(0u32, |acc, x| acc + (x.abs() as u32))
+ }
+ fn downscale(dst: &mut NAVideoBuffer<u8>, src: &NAVideoBuffer<u8>) {
+ let dst = NASimpleVideoFrame::from_video_buf(dst).unwrap();
+ let sdata = src.get_data();
+ for plane in 0..3 {
+ let cur_w = dst.width[plane];
+ let cur_h = dst.height[plane];
+ let doff = dst.offset[plane];
+ let soff = src.get_offset(plane);
+ let dstride = dst.stride[plane];
+ let sstride = src.get_stride(plane);
+ for (dline, sstrip) in dst.data[doff..].chunks_exact_mut(dstride).zip(
+ sdata[soff..].chunks_exact(sstride * 2)).take(cur_h) {
+ let (line0, line1) = sstrip.split_at(sstride);
+ for (dst, (src0, src1)) in dline.iter_mut().zip(
+ line0.chunks_exact(2).zip(line1.chunks_exact(2))).take(cur_w) {
+ *dst = ((u16::from(src0[0]) + u16::from(src0[1]) +
+ u16::from(src1[0]) + u16::from(src1[1]) + 2) >> 2) as u8;
+ }
+ }
+ }
+ }
+}
--- /dev/null
+use nihav_core::frame::FrameType;
+
+pub struct RateDistMetric {
+ pub lambda: f32,
+ pub good_enough: u32,
+ pub p_split_thr: u32,
+}
+
+impl RateDistMetric {
+ pub fn new() -> Self {
+ Self {
+ lambda: 1.0,
+ good_enough: 256,
+ p_split_thr: 8192,
+ }
+ }
+ pub fn get_metric(&self, bits: u32, dist: u32) -> u32 {
+ ((bits as f32) + (dist as f32) * 0.1 * self.lambda).ceil() as u32
+ }
+}
+
+#[derive(Clone,Copy)]
+struct BitrateCounter {
+ factors: [f32; 32],
+ last_q: usize,
+ proj_size: usize,
+ intra: bool,
+}
+
+impl BitrateCounter {
+ fn new(intra: bool) -> Self {
+ let mut obj = Self {
+ factors: [0.0; 32],
+ last_q: 0,
+ proj_size: 0,
+ intra
+ };
+ obj.reset();
+ obj
+ }
+ fn reset(&mut self) {
+ if self.intra {
+ self.last_q = 8;
+ for (q, dst) in self.factors.iter_mut().enumerate() {
+ let q = q as f32;
+ *dst = (-0.1 * q + 2.95) / 100.0;
+ }
+ } else {
+ self.last_q = 10;
+ for (q, dst) in self.factors.iter_mut().enumerate() {
+ let q = q as f32;
+ *dst = 100.0 / (8.2 * q * q + 51.0 * q + 3411.0);
+ }
+ }
+ }
+ fn init_metric(&self, metric: &mut RateDistMetric, q_add: usize) {
+ let q = (self.last_q + q_add).min(31);
+ const THRESHOLDS: [(u32, u32); 4] = [
+ (256, 8192), (128, 8192), (64, 4196), (32, 2048)
+ ];
+ let (ge_thr, ps_thr) = THRESHOLDS[q / 8];
+ metric.good_enough = ge_thr;
+ metric.p_split_thr = ps_thr;
+ metric.lambda = 1.0;
+ }
+ fn update_stats(&mut self, fsize: usize) {
+ if fsize < self.proj_size - self.proj_size / 8 {
+ let mut inv_fac = 1.0 / self.factors[self.last_q];
+ if inv_fac > 1.0 {
+ inv_fac -= 0.5;
+ }
+ self.factors[self.last_q] = 1.0 / inv_fac;
+ } else if fsize > self.proj_size + self.proj_size / 8 {
+ let mut inv_fac = 1.0 / self.factors[self.last_q];
+ if inv_fac < 200.0 {
+ inv_fac += 0.5;
+ }
+ self.factors[self.last_q] = 1.0 / inv_fac;
+ }
+ }
+ fn get_est_size(&self, complexity: u32, q: usize) -> usize {
+ ((complexity as f32) * self.factors[q]).ceil() as usize
+ }
+ fn get_quant(&mut self, target: usize, complexity: u32) -> usize {
+ let tgt_31 = self.get_est_size(complexity, 31);
+ let tgt_0 = self.get_est_size(complexity, 0);
+ if target < tgt_31 {
+ self.last_q = 31;
+ self.proj_size = tgt_31;
+ } else if target > tgt_0 {
+ self.last_q = 0;
+ self.proj_size = tgt_0;
+ } else { //xxx: do binary search?
+ for q in (0..31).rev() {
+ let expected_size = self.get_est_size(complexity, q);
+ if target >= (expected_size - expected_size / 8) &&
+ target <= (expected_size + expected_size / 8) {
+ self.proj_size = expected_size;
+ self.last_q = q;
+ }
+ }
+ }
+ self.last_q
+ }
+ fn get_last_quant(&self) -> usize { self.last_q }
+}
+
+const TIMEBASE: u32 = 1000;
+
+pub struct BitRateControl {
+ force_quant: Option<usize>,
+ force_quality: Option<u8>,
+ br_counter: [BitrateCounter; 2],
+
+ bitrate: u32,
+ tpos: u32,
+ bitpool: usize,
+
+ duration: u32,
+ dcount: u32,
+
+ pub b_offset: usize,
+}
+
+impl BitRateControl {
+ pub fn new() -> Self {
+ Self {
+ force_quant: None,
+ force_quality: None,
+ br_counter: [BitrateCounter::new(true), BitrateCounter::new(false)],
+
+ bitrate: 0,
+ tpos: 0,
+ bitpool: 0,
+
+ duration: 0,
+ dcount: 0,
+
+ b_offset: 4,
+ }
+ }
+ pub fn rate_ctl_in_use(&self) -> bool {
+ self.force_quant.is_none() && self.force_quality.is_none() && self.bitrate != 0
+ }
+ pub fn set_bitrate(&mut self, bitrate: u32) {
+ self.bitrate = bitrate;
+ for br in self.br_counter.iter_mut() {
+ br.reset();
+ }
+
+ self.bitpool = (self.bitrate as usize) * 2;
+ self.tpos = 0;
+ }
+ pub fn set_force_quant(&mut self, force_q: Option<usize>) { self.force_quant = force_q; }
+ pub fn get_force_quant(&self) -> i8 {
+ if let Some(q) = self.force_quant {
+ q as i8
+ } else {
+ -1
+ }
+ }
+ pub fn set_force_quality(&mut self, force_q: Option<u8>) { self.force_quality = force_q; }
+ pub fn get_force_quality(&self) -> i8 {
+ if let Some(q) = self.force_quality {
+ q as i8
+ } else {
+ -1
+ }
+ }
+ pub fn get_quant(&mut self, ftype: FrameType, complexity: u32) -> usize {
+ if let Some(q) = self.force_quant {
+ q
+ } else if self.force_quality.is_some() {
+ 4
+ } else if ftype != FrameType::B {
+ let tgt = self.get_target_size(ftype);
+ self.br_counter[if ftype == FrameType::I { 0 } else { 1 }].get_quant(tgt, complexity)
+ } else {
+ (self.br_counter[1].get_last_quant() + self.b_offset).min(31)
+ }
+ }
+ pub fn get_last_quant(&self, ftype: FrameType) -> usize {
+ match ftype {
+ FrameType::I => self.br_counter[0].get_last_quant(),
+ FrameType::P => self.br_counter[1].get_last_quant(),
+ _ => (self.br_counter[1].get_last_quant() + self.b_offset).min(31),
+ }
+ }
+ pub fn init_metric(&self, ftype: FrameType, metric: &mut RateDistMetric) {
+ if let Some(q) = self.force_quality {
+ metric.lambda = (q as f32) / 50.0;
+ } else {
+ match ftype {
+ FrameType::I => {
+ self.br_counter[0].init_metric(metric, 0);
+ },
+ FrameType::P => {
+ self.br_counter[1].init_metric(metric, 0);
+ },
+ _ => {
+ self.br_counter[1].init_metric(metric, self.b_offset);
+ },
+ };
+ }
+ }
+ pub fn update_stats(&mut self, ftype: FrameType, fsize: usize, ts_diff: u32) {
+ if self.bitrate > 0 {
+ if ts_diff > 0 && self.duration < std::u32::MAX / 2 {
+ self.duration += ts_diff;
+ self.dcount += 1;
+ }
+ self.tpos += ts_diff;
+ while self.tpos >= TIMEBASE {
+ self.tpos -= TIMEBASE;
+ self.bitpool += self.bitrate as usize;
+ }
+ self.bitpool = self.bitpool.saturating_sub(fsize * 8).max(1024);
+ }
+ match ftype {
+ FrameType::I => self.br_counter[0].update_stats(fsize),
+ FrameType::P => self.br_counter[1].update_stats(fsize),
+ _ => {},
+ };
+ }
+ pub fn get_target_size(&self, ftype: FrameType) -> usize {
+ if self.bitrate == 0 || self.bitpool == 0 {
+ return 0;
+ }
+ let bitpool_limit = (self.bitrate + self.bitrate / 8) as usize;
+ let bitpool_avail = self.bitpool.min(bitpool_limit);
+ let target_size = if self.dcount > 0 {
+ let avg_len = ((self.duration + self.dcount / 2) / self.dcount).max(1);
+ bitpool_avail * (avg_len as usize) / ((TIMEBASE - self.tpos) as usize)
+ } else {
+ bitpool_avail / 10
+ };
+ let tgt_bits = match ftype {
+ FrameType::I => target_size * 3,
+ FrameType::B => target_size * 3 / 4,
+ _ => target_size,
+ };
+ (tgt_bits + 7) / 8
+ }
+}
--- /dev/null
+use nihav_codec_support::codecs::{MV, ZERO_MV};
+
+pub trait RV34MVOps {
+ fn scale(&self, trd: u32, trb: u32) -> (MV, MV);
+ fn diff_gt_3(self, other: Self) -> bool;
+}
+
+impl RV34MVOps for MV {
+ fn scale(&self, trd: u32, trb: u32) -> (MV, MV) {
+ const TR_SHIFT: u8 = 14;
+ const TR_BIAS: i32 = 1 << (TR_SHIFT - 1);
+
+ let ratio = ((trb as i32) << TR_SHIFT) / (trd as i32);
+ let mv_f = MV {
+ x: (((self.x as i32) * ratio + TR_BIAS) >> TR_SHIFT) as i16,
+ y: (((self.y as i32) * ratio + TR_BIAS) >> TR_SHIFT) as i16
+ };
+ let mv_b = mv_f - *self;
+ (mv_f, mv_b)
+ }
+ fn diff_gt_3(self, other: Self) -> bool {
+ let diff = self - other;
+ diff.x.abs() > 3 || diff.y.abs() > 3
+ }
+}
+
+#[derive(Debug,Clone,Copy)]
+pub enum PredType4x4 {
+ Ver,
+ Hor,
+ DC,
+ DiagDownLeft,
+ DiagDownRight,
+ VerRight,
+ HorDown,
+ VerLeft,
+ HorUp,
+ LeftDC,
+ TopDC,
+ DC128,
+ DiagDownLeftNoDown,
+ HorUpNoDown,
+ VerLeftNoDown
+}
+
+#[derive(Debug,Clone,Copy)]
+pub enum PredType8x8 {
+ DC,
+ Hor,
+ Ver,
+ Plane,
+ LeftDC,
+ TopDC,
+ DC128
+}
+
+pub trait ToIndex {
+ fn to_index(self) -> i8;
+}
+
+impl ToIndex for PredType8x8 {
+ fn to_index(self) -> i8 {
+ match self {
+ PredType8x8::Ver => 1,
+ PredType8x8::Hor => 2,
+ PredType8x8::Plane => 3,
+ _ => 0,
+ }
+ }
+}
+
+impl ToIndex for PredType4x4 {
+ fn to_index(self) -> i8 {
+ match self {
+ PredType4x4::Ver => 1,
+ PredType4x4::Hor => 2,
+ PredType4x4::DiagDownRight => 3,
+ PredType4x4::DiagDownLeft | PredType4x4::DiagDownLeftNoDown => 4,
+ PredType4x4::VerRight => 5,
+ PredType4x4::VerLeft | PredType4x4::VerLeftNoDown => 6,
+ PredType4x4::HorUp |PredType4x4::HorUpNoDown => 7,
+ PredType4x4::HorDown => 8,
+ _ => 0, // DC predictions
+ }
+ }
+}
+
+#[derive(Clone,Copy,Default)]
+pub struct Block {
+ pub coeffs: [i16; 16],
+}
+
+impl Block {
+ pub fn new() -> Self { Self::default() }
+ pub fn is_empty(&self) -> bool {
+ for &el in self.coeffs.iter() {
+ if el != 0 {
+ return false;
+ }
+ }
+ true
+ }
+ pub fn count_nz(&self) -> usize {
+ self.coeffs.iter().filter(|&&x| x != 0).count()
+ }
+}
+impl std::fmt::Display for Block {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ let mut out = String::new();
+ for row in self.coeffs.chunks(4) {
+ out += format!(" {:3} {:3} {:3} {:3}\n", row[0], row[1], row[2], row[3]).as_str();
+ }
+ write!(f, "{}", out)
+ }
+}
+
+#[derive(Clone,Copy,Default)]
+pub struct DeblockInfo {
+ pub is_strong: bool,
+ pub q: u8,
+ pub cbp_y: u16,
+ pub cbp_c: u8,
+ pub deblock_y: u16,
+}
+
+#[derive(Debug,Clone,Copy,PartialEq)]
+pub enum MBType {
+ Intra,
+ Intra16,
+ Skip,
+ P16x16,
+ P16x16Mix,
+ P16x8,
+ P8x16,
+ P8x8,
+ Direct,
+ Bidir,
+ Forward,
+ Backward,
+ Invalid,
+}
+
+impl MBType {
+ pub fn is_intra(self) -> bool { matches!(self, MBType::Intra | MBType::Intra16) }
+ fn get_weight(self) -> u8 {
+ match self {
+ MBType::Intra => 0,
+ MBType::Intra16 => 1,
+ MBType::Skip => unreachable!(),
+ MBType::P16x16 => 2,
+ MBType::P16x16Mix => 10,
+ MBType::P16x8 => 7,
+ MBType::P8x16 => 8,
+ MBType::P8x8 => 3,
+ MBType::Direct => 6,
+ MBType::Bidir => 9,
+ MBType::Forward => 4,
+ MBType::Backward => 5,
+ MBType::Invalid => unreachable!(),
+ }
+ }
+ pub fn to_code(self) -> usize {
+ match self {
+ MBType::Intra => 0,
+ MBType::Intra16 => 1,
+ MBType::P16x16 | MBType::Forward => 2,
+ MBType::P8x8 | MBType::Backward => 3,
+ MBType::P16x8 | MBType::Bidir => 4,
+ MBType::P8x16 | MBType::Direct => 5,
+ MBType::P16x16Mix => 6,
+ _ => unreachable!(),
+ }
+ }
+ pub fn has_dir_mv(self, fwd: bool) -> bool {
+ match self {
+ MBType::Bidir => true,
+ MBType::Forward if fwd => true,
+ MBType::Backward if !fwd => true,
+ _ => false,
+ }
+ }
+}
+
+#[derive(Default)]
+pub struct SliceState {
+ pub has_t: bool,
+ pub has_l: bool,
+ pub has_tl: bool,
+ pub has_tr: bool,
+ pub mb_x: usize,
+ pub mb_y: usize,
+}
+
+impl SliceState {
+ pub fn new() -> Self { Self::default() }
+}
+
+#[derive(Default)]
+pub struct MBState {
+ pub mb_type: Vec<MBType>,
+ pub ipred: Vec<i8>,
+ pub fwd_mv: Vec<MV>,
+ pub bwd_mv: Vec<MV>,
+ pub ref_mv: Vec<MV>,
+ pub mb_stride: usize,
+ pub blk8_stride: usize,
+ pub blk4_stride: usize,
+}
+
+impl MBState {
+ pub fn new() -> Self { Self::default() }
+ pub fn resize(&mut self, mb_w: usize, mb_h: usize) {
+ self.mb_stride = mb_w + 2;
+ self.blk8_stride = mb_w * 2 + 2;
+ self.blk4_stride = mb_w * 4 + 2;
+
+ self.mb_type.resize(self.mb_stride * (mb_h + 1), MBType::Invalid);
+ self.ipred.resize(self.blk4_stride * (mb_h * 4 + 1), -1);
+ self.fwd_mv.resize(self.blk8_stride * (mb_w * 2 + 1), ZERO_MV);
+ self.bwd_mv.resize(self.blk8_stride * (mb_w * 2 + 1), ZERO_MV);
+ self.ref_mv.resize(self.blk8_stride * (mb_w * 2 + 1), ZERO_MV);
+ }
+ pub fn reset(&mut self) {
+ for el in self.mb_type.iter_mut() {
+ *el = MBType::Invalid;
+ }
+ for el in self.ipred.iter_mut() {
+ *el = -1;
+ }
+ }
+ fn set_mv(&mut self, blk8_idx: usize, fwd: bool, mv: MV) {
+ if fwd {
+ self.fwd_mv[blk8_idx] = mv;
+ self.fwd_mv[blk8_idx + 1] = mv;
+ self.fwd_mv[blk8_idx + self.blk8_stride] = mv;
+ self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mv;
+ } else {
+ self.bwd_mv[blk8_idx] = mv;
+ self.bwd_mv[blk8_idx + 1] = mv;
+ self.bwd_mv[blk8_idx + self.blk8_stride] = mv;
+ self.bwd_mv[blk8_idx + self.blk8_stride + 1] = mv;
+ }
+ }
+ pub fn get_mb_idx(&self, mb_x: usize, mb_y: usize) -> usize {
+ mb_x + 1 + (mb_y + 1) * self.mb_stride
+ }
+ pub fn get_blk8_idx(&self, mb_x: usize, mb_y: usize) -> usize {
+ mb_x * 2 + 1 + (mb_y * 2 + 1) * self.blk8_stride
+ }
+ pub fn get_blk4_idx(&self, mb_x: usize, mb_y: usize) -> usize {
+ mb_x * 4 + 1 + (mb_y * 4 + 1) * self.blk4_stride
+ }
+ pub fn update(&mut self, mb_type: &MacroblockType, mb_x: usize, mb_y: usize) {
+ let mb_idx = self.get_mb_idx(mb_x, mb_y);
+ let blk8_idx = self.get_blk8_idx(mb_x, mb_y);
+ let blk4_idx = self.get_blk4_idx(mb_x, mb_y);
+
+ for row in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).take(4) {
+ for el in row[..4].iter_mut() {
+ *el = 0;
+ }
+ }
+
+ match *mb_type {
+ MacroblockType::Intra16x16(ptype) => {
+ self.mb_type[mb_idx] = MBType::Intra16;
+ let pred_id = ptype.to_index();
+ for row in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).take(4) {
+ for el in row[..4].iter_mut() {
+ *el = pred_id;
+ }
+ }
+ self.set_mv(blk8_idx, true, ZERO_MV);
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::Intra4x4(ptypes) => {
+ self.mb_type[mb_idx] = MBType::Intra;
+ for (dst, src) in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).zip(ptypes.chunks(4)) {
+ for (dst, &ptype) in dst.iter_mut().zip(src.iter()) {
+ *dst = ptype.to_index();
+ }
+ }
+ self.set_mv(blk8_idx, true, ZERO_MV);
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::PSkip => {
+ self.mb_type[mb_idx] = MBType::Skip;
+ self.set_mv(blk8_idx, true, ZERO_MV);
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::Inter16x16(mv) => {
+ self.mb_type[mb_idx] = MBType::P16x16;
+ self.set_mv(blk8_idx, true, mv);
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::InterMix(mv) => {
+ self.mb_type[mb_idx] = MBType::P16x16Mix;
+ self.set_mv(blk8_idx, true, mv);
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::Inter16x8(mvs) => {
+ self.mb_type[mb_idx] = MBType::P16x8;
+ self.fwd_mv[blk8_idx] = mvs[0];
+ self.fwd_mv[blk8_idx + 1] = mvs[0];
+ self.fwd_mv[blk8_idx + self.blk8_stride] = mvs[1];
+ self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mvs[1];
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::Inter8x16(mvs) => {
+ self.mb_type[mb_idx] = MBType::P8x16;
+ self.fwd_mv[blk8_idx] = mvs[0];
+ self.fwd_mv[blk8_idx + 1] = mvs[1];
+ self.fwd_mv[blk8_idx + self.blk8_stride] = mvs[0];
+ self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mvs[1];
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::Inter8x8(mvs) => {
+ self.mb_type[mb_idx] = MBType::P8x8;
+ self.fwd_mv[blk8_idx] = mvs[0];
+ self.fwd_mv[blk8_idx + 1] = mvs[1];
+ self.fwd_mv[blk8_idx + self.blk8_stride] = mvs[2];
+ self.fwd_mv[blk8_idx + self.blk8_stride + 1] = mvs[3];
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::BSkip(fmvs, bmvs) => {
+ self.mb_type[mb_idx] = MBType::Skip;
+ self.fwd_mv[blk8_idx] = fmvs[0];
+ self.fwd_mv[blk8_idx + 1] = fmvs[1];
+ self.fwd_mv[blk8_idx + self.blk8_stride] = fmvs[0];
+ self.fwd_mv[blk8_idx + self.blk8_stride + 1] = fmvs[1];
+ self.bwd_mv[blk8_idx] = bmvs[0];
+ self.bwd_mv[blk8_idx + 1] = bmvs[1];
+ self.bwd_mv[blk8_idx + self.blk8_stride] = bmvs[0];
+ self.bwd_mv[blk8_idx + self.blk8_stride + 1] = bmvs[1];
+ },
+ /*MacroblockType::Direct(fmv, bmv) => {
+ self.mb_type[mb_idx] = MBType::Direct;
+ self.set_mv(blk8_idx, true, fmv);
+ self.set_mv(blk8_idx, false, bmv);
+ },*/
+ MacroblockType::Bidir(fmv, bmv) => {
+ self.mb_type[mb_idx] = MBType::Bidir;
+ self.set_mv(blk8_idx, true, fmv);
+ self.set_mv(blk8_idx, false, bmv);
+ },
+ MacroblockType::Forward(mv) => {
+ self.mb_type[mb_idx] = MBType::Forward;
+ self.set_mv(blk8_idx, true, mv);
+ self.set_mv(blk8_idx, false, ZERO_MV);
+ },
+ MacroblockType::Backward(mv) => {
+ self.mb_type[mb_idx] = MBType::Backward;
+ self.set_mv(blk8_idx, true, ZERO_MV);
+ self.set_mv(blk8_idx, false, mv);
+ },
+ };
+ }
+ pub fn get_pred_mbtype(&self, sstate: &SliceState, is_b: bool) -> MBType {
+ let mut cand = [MBType::Invalid; 4];
+ let mut ccount = 0;
+
+ let mb_idx = self.get_mb_idx(sstate.mb_x, sstate.mb_y);
+ if sstate.has_t {
+ cand[ccount] = self.mb_type[mb_idx - self.mb_stride];
+ ccount += 1;
+ if sstate.has_tr {
+ cand[ccount] = self.mb_type[mb_idx - self.mb_stride + 1];
+ ccount += 1;
+ }
+ }
+ if sstate.has_l {
+ cand[ccount] = self.mb_type[mb_idx - 1];
+ ccount += 1;
+ }
+ if sstate.has_tl {
+ cand[ccount] = self.mb_type[mb_idx - self.mb_stride - 1];
+ ccount += 1;
+ }
+ if !is_b {
+ for el in cand[..ccount].iter_mut() {
+ if *el == MBType::Skip {
+ *el = MBType::P16x16;
+ }
+ }
+ } else {
+ for el in cand[..ccount].iter_mut() {
+ if *el == MBType::Skip {
+ *el = MBType::Direct;
+ }
+ }
+ }
+ match ccount {
+ 0 => MBType::Intra,
+ 1 => cand[0],
+ 2 => if cand[0].get_weight() <= cand[1].get_weight() { cand[0] } else { cand[1] },
+ _ => {
+ const MBTYPE_FROM_WEIGHT: [MBType; 11] = [
+ MBType::Intra, MBType::Intra16, MBType::P16x16, MBType::P8x8,
+ MBType::Forward, MBType::Backward, MBType::Direct, MBType::P16x8,
+ MBType::P8x16, MBType::Bidir, MBType::P16x16Mix
+ ];
+
+ let mut counts = [0; 12];
+ for el in cand[..ccount].iter() {
+ counts[usize::from(el.get_weight())] += 1;
+ }
+ let mut best_idx = 0;
+ let mut best_wgt = 0;
+ for (idx, &weight) in counts.iter().enumerate() {
+ if weight > best_wgt {
+ best_idx = idx;
+ best_wgt = weight;
+ }
+ }
+ MBTYPE_FROM_WEIGHT[best_idx]
+ },
+ }
+ }
+ pub fn get_ipred4x4_ctx(&self, mb_x: usize, mb_y: usize, x: usize, y: usize) -> (i8, i8, i8) {
+ let blk4_idx = self.get_blk4_idx(mb_x, mb_y) + x + y * self.blk4_stride;
+ (self.ipred[blk4_idx - 1],
+ self.ipred[blk4_idx - self.blk4_stride],
+ self.ipred[blk4_idx - self.blk4_stride + 1])
+ }
+ pub fn set_ipred4x4(&mut self, mb_x: usize, mb_y: usize, modes: &[PredType4x4; 16]) {
+ let blk4_idx = self.get_blk4_idx(mb_x, mb_y);
+ for (dst, src) in self.ipred[blk4_idx..].chunks_mut(self.blk4_stride).zip(modes.chunks(4)) {
+ for (dst, src) in dst.iter_mut().zip(src.iter()) {
+ *dst = src.to_index();
+ }
+ }
+ }
+ fn get_mv(&self, idx: usize, fwd: bool) -> MV {
+ if fwd {
+ self.fwd_mv[idx]
+ } else {
+ self.bwd_mv[idx]
+ }
+ }
+ pub fn get_diff_mv(&self, sstate: &SliceState, w16: bool, xoff: usize, yoff: usize) -> MV {
+ let blk8_idx = self.get_blk8_idx(sstate.mb_x, sstate.mb_y) + xoff + yoff * self.blk8_stride;
+
+ let cur_mv = self.get_mv(blk8_idx, true);
+
+ if (yoff == 0 && !sstate.has_t) && (xoff == 0 && !sstate.has_l) {
+ return cur_mv;
+ }
+
+ let left_mv = if sstate.has_l || (xoff != 0) { self.get_mv(blk8_idx - 1, true) } else { ZERO_MV };
+ let top_mv = if sstate.has_t || (yoff != 0) { self.get_mv(blk8_idx - self.blk8_stride, true) } else { left_mv };
+ let has_tr = match xoff + yoff * 2 {
+ 0 if w16 => sstate.has_tr,
+ 0 => sstate.has_t,
+ 1 => sstate.has_tr,
+ 2 if w16 => false,
+ 2 => true,
+ _ => false,
+ };
+ let has_tl = match xoff + yoff * 2 {
+ 0 => sstate.has_tl,
+ 1 => sstate.has_t,
+ 2 => sstate.has_l,
+ _ => true,
+ };
+ let mv_c = if has_tr {
+ self.get_mv(blk8_idx - self.blk8_stride + if w16 { 2 } else { 1 }, true)
+ } else if has_tl {
+ self.get_mv(blk8_idx - self.blk8_stride - 1, true)
+ } else {
+ return cur_mv - left_mv;
+ };
+
+ cur_mv - MV::pred(left_mv, top_mv, mv_c)
+ }
+ pub fn get_diff_mv_b(&self, sstate: &SliceState, fwd: bool) -> MV {
+ let mb_idx = self.get_mb_idx(sstate.mb_x, sstate.mb_y);
+ let blk8_idx = self.get_blk8_idx(sstate.mb_x, sstate.mb_y);
+
+ let mut pred_mv = [ZERO_MV; 3];
+ let mut pcount = 0;
+
+ let cur_mv = self.get_mv(blk8_idx, fwd);
+
+ if sstate.has_l && self.mb_type[mb_idx - 1].has_dir_mv(fwd) {
+ pred_mv[pcount] = self.get_mv(blk8_idx - 1, fwd);
+ pcount += 1;
+ }
+ if !sstate.has_t {
+ return cur_mv - pred_mv[0];
+ }
+ if self.mb_type[mb_idx - self.mb_stride].has_dir_mv(fwd) {
+ pred_mv[pcount] = self.get_mv(blk8_idx - self.blk8_stride, fwd);
+ pcount += 1;
+ }
+ if sstate.has_tr {
+ if self.mb_type[mb_idx - self.mb_stride + 1].has_dir_mv(fwd) {
+ pred_mv[pcount] = self.get_mv(blk8_idx - self.blk8_stride + 2, fwd);
+ pcount += 1;
+ }
+ } else if sstate.has_tl && self.mb_type[mb_idx - self.mb_stride - 1].has_dir_mv(fwd) {
+ pred_mv[pcount] = self.get_mv(blk8_idx - self.blk8_stride - 1, fwd);
+ pcount += 1;
+ }
+ let pred_mv = match pcount {
+ 3 => MV::pred(pred_mv[0], pred_mv[1], pred_mv[2]),
+ 2 => MV{ x: (pred_mv[0].x + pred_mv[1].x) / 2, y: (pred_mv[0].y + pred_mv[1].y) / 2 },
+ 1 => pred_mv[0],
+ _ => ZERO_MV,
+ };
+ cur_mv - pred_mv
+ }
+ pub fn swap_mvs(&mut self) {
+ std::mem::swap(&mut self.fwd_mv, &mut self.ref_mv);
+ }
+ pub fn fill_deblock(&self, dblk: &mut DeblockInfo, sstate: &SliceState) {
+ if dblk.is_strong {
+ dblk.deblock_y = 0xFFFF;
+ return;
+ }
+ let mut hmvmask = 0;
+ let mut vmvmask = 0;
+
+ let mut blk8_idx = self.get_blk8_idx(sstate.mb_x, sstate.mb_y);
+ for y in 0..2 {
+ for x in 0..2 {
+ let shift = x * 2 + y * 8;
+ let cur_mv = self.get_mv(blk8_idx + x, true);
+ if (x > 0) || (sstate.mb_x > 0) {
+ let left_mv = self.get_mv(blk8_idx + x - 1, true);
+ if cur_mv.diff_gt_3(left_mv) {
+ vmvmask |= 0x11 << shift;
+ }
+ }
+ if (y > 0) || (sstate.mb_y > 0) {
+ let top_mv = self.get_mv(blk8_idx + x - self.blk8_stride, true);
+ if cur_mv.diff_gt_3(top_mv) {
+ hmvmask |= 0x03 << shift;
+ }
+ }
+ }
+ blk8_idx += self.blk8_stride;
+ }
+ if sstate.mb_y == 0 { hmvmask &= !0x000F; }
+ if sstate.mb_x == 0 { vmvmask &= !0x1111; }
+
+ dblk.deblock_y = dblk.cbp_y | hmvmask | vmvmask;
+ }
+}
+
+#[derive(Clone)]
+pub enum MacroblockType {
+ Intra16x16(PredType8x8),
+ Intra4x4([PredType4x4; 16]),
+ PSkip,
+ Inter16x16(MV),
+ InterMix(MV),
+ Inter16x8([MV; 2]),
+ Inter8x16([MV; 2]),
+ Inter8x8([MV; 4]),
+ BSkip([MV; 4], [MV; 4]),
+ //Direct(MV, MV),
+ Bidir(MV, MV),
+ Forward(MV),
+ Backward(MV),
+}
+
+impl Default for MacroblockType {
+ fn default() -> Self { Self::Intra16x16(PredType8x8::DC) }
+}
+
+impl MacroblockType {
+ pub fn is_intra(&self) -> bool {
+ matches!(*self, MacroblockType::Intra16x16(_) | MacroblockType::Intra4x4(_))
+ }
+ pub fn is_16(&self) -> bool {
+ matches!(*self, MacroblockType::Intra16x16(_) | MacroblockType::InterMix(_))
+ }
+ pub fn is_skip(&self) -> bool {
+ matches!(*self, MacroblockType::PSkip | MacroblockType::BSkip(_, _))
+ }
+}
+
+pub struct Macroblock {
+ pub mb_type: MacroblockType,
+ pub coeffs: [Block; 25],
+}