X-Git-Url: https://git.nihav.org/?p=nihav.git;a=blobdiff_plain;f=nihav-duck%2Fsrc%2Fcodecs%2Fvp6enc%2Fdsp.rs;fp=nihav-duck%2Fsrc%2Fcodecs%2Fvp6enc%2Fdsp.rs;h=c1e86530201599b50bfd33be33af86f6fa89027c;hp=0000000000000000000000000000000000000000;hb=3952bfd9d2d5c2a64d50c2a89b02e93d9b97d541;hpb=e5b5248d8e4f0b6be84db2d00158a9dcdff0d512 diff --git a/nihav-duck/src/codecs/vp6enc/dsp.rs b/nihav-duck/src/codecs/vp6enc/dsp.rs new file mode 100644 index 0000000..c1e8653 --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/dsp.rs @@ -0,0 +1,508 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::super::vpcommon::*; +use super::super::vp6dsp::*; +use super::super::vp6data::*; +use super::ResidueMB; + +use std::str::FromStr; + +#[derive(Debug,Clone,Copy,PartialEq)] +pub enum MVSearchMode { + Full, + Diamond, + Hexagon, +} + +impl Default for MVSearchMode { + fn default() -> Self { MVSearchMode::Hexagon } +} + +pub struct ParseError{} + +impl FromStr for MVSearchMode { + type Err = ParseError; + + #[allow(clippy::single_match)] + fn from_str(s: &str) -> Result { + match s { + "full" => Ok(MVSearchMode::Full), + "dia" => Ok(MVSearchMode::Diamond), + "hex" => Ok(MVSearchMode::Hexagon), + _ => Err(ParseError{}), + } + } +} + +impl std::fmt::Display for MVSearchMode { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + MVSearchMode::Full => write!(f, "full"), + MVSearchMode::Diamond => write!(f, "dia"), + MVSearchMode::Hexagon => write!(f, "hex"), + } + } +} + + +const C1S7: i32 = 64277; +const C2S6: i32 = 60547; +const C3S5: i32 = 54491; +const C4S4: i32 = 46341; +const C5S3: i32 = 36410; +const C6S2: i32 = 25080; +const C7S1: i32 = 12785; + +fn mul16(a: i32, b: i32) -> i32 { + let res = a * b; + (res + if res < 0 { 0xFFFF } else { 0 }) >> 16 +} + +macro_rules! fdct_step { + ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, + $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr) => { + let t_g = i32::from($s0) + i32::from($s7); + let t_c = i32::from($s0) - i32::from($s7); + let t_a = i32::from($s1) + i32::from($s2); + let t_h = i32::from($s1) - i32::from($s2); + let t_e1 = i32::from($s3) + i32::from($s4); + let t_d = i32::from($s3) - i32::from($s4); + let t_f = i32::from($s5) + i32::from($s6); + let t_b = i32::from($s5) - i32::from($s6); + + let t_b1 = t_h + t_b; + let t_h = t_h - t_b; + let t_a1 = t_a - t_f; + let t_f = t_a + t_f; + let t_e = t_g + t_e1; + let t_g = t_g - t_e1; + + $d2 = (mul16(C2S6, t_g) + mul16(C6S2, t_h)).max(-32768).min(32767) as i16; + $d6 = (mul16(C6S2, t_g) - mul16(C2S6, t_h)).max(-32768).min(32767) as i16; + $d0 = mul16(C4S4, t_e + t_f).max(-32768).min(32767) as i16; + $d4 = mul16(C4S4, t_e - t_f).max(-32768).min(32767) as i16; + let t_a = t_c + mul16(C4S4, t_a1); + let t_c = t_c - mul16(C4S4, t_a1); + let t_b = t_d + mul16(C4S4, t_b1); + let t_d = t_d - mul16(C4S4, t_b1); + $d3 = (mul16(C3S5, t_c) - mul16(C5S3, t_d)).max(-32768).min(32767) as i16; + $d5 = (mul16(C5S3, t_c) + mul16(C3S5, t_d)).max(-32768).min(32767) as i16; + $d1 = (mul16(C1S7, t_a) + mul16(C7S1, t_b)).max(-32768).min(32767) as i16; + $d7 = (mul16(C7S1, t_a) - mul16(C1S7, t_b)).max(-32768).min(32767) as i16; + } +} + +#[allow(clippy::erasing_op)] +pub fn vp_fdct(blk: &mut [i16; 64]) { + for row in blk.chunks_mut(8) { + fdct_step!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], + row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]); + } + for i in 0..8 { + fdct_step!(blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], + blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i], + blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], + blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i]); + } +} + +const MAX_DIST: u32 = std::u32::MAX; +const DIST_THRESH: u32 = 256; + +trait FromPixels { + fn from_pixels(self) -> Self; +} + +impl FromPixels for MV { + fn from_pixels(self) -> MV { + MV { x: self.x * 4, y: self.y * 4 } + } +} + +pub trait MVSearch { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32); + fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32); +} + +pub struct FullMVSearch {} + +impl FullMVSearch { + pub fn new() -> Self { Self{} } +} + +impl MVSearch for FullMVSearch { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + cur_mv.y = dy * 4; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + cur_mv.x = dx * 4; + + let dist = mv_est.sad_mb(cur_blk, mb_x, mb_y, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + } + } + } + (best_mv, best_dist) + } + fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + cur_mv.y = dy * 4; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + cur_mv.x = dx * 4; + + let dist = mv_est.sad_blk(cur_blk, xpos, ypos, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + } + } + } + (best_mv, best_dist) + } +} + +const DIA_PATTERN: [MV; 9] = [ + ZERO_MV, + MV {x: -2, y: 0}, + MV {x: -1, y: 1}, + MV {x: 0, y: 2}, + MV {x: 1, y: 1}, + MV {x: 2, y: 0}, + MV {x: 1, y: -1}, + MV {x: 0, y: -2}, + MV {x: -1, y: -1} +]; + +const HEX_PATTERN: [MV; 7] = [ + ZERO_MV, + MV {x: -2, y: 0}, + MV {x: -1, y: 2}, + MV {x: 1, y: 2}, + MV {x: 2, y: 0}, + MV {x: 1, y: -2}, + MV {x: -1, y: -2} +]; + +const REFINEMENT: [MV; 4] = [ + MV {x: -1, y: 0}, + MV {x: 0, y: 1}, + MV {x: 1, y: 0}, + MV {x: 0, y: -1} +]; + +macro_rules! search_template { + ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident) => ({ + let mut best_dist = MAX_DIST; + let mut best_mv; + + let mut min_dist; + let mut min_idx; + + $self.reset(); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= DIST_THRESH { + break; + } + } + } + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= DIST_THRESH { + break; + } + } + } + if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); + } + best_dist = min_dist; + best_mv = $self.point[min_idx]; + if best_dist <= DIST_THRESH { + return (best_mv.from_pixels(), best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + best_mv = best_mv.from_pixels(); + if best_dist <= DIST_THRESH { + return (best_mv, best_dist); + } + + // subpel refinement + $self.set_new_point(best_mv, best_dist); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point, cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= DIST_THRESH { + break; + } + } + } + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= DIST_THRESH { + break; + } + } + } + if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 4 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 4 { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); + } + best_dist = min_dist; + best_mv = $self.point[min_idx]; + if best_dist <= DIST_THRESH { + return (best_mv, best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv, MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + (best_mv, best_dist) + }) +} + +macro_rules! pattern_search { + ($struct_name: ident, $patterns: expr) => { + pub struct $struct_name { + point: [MV; $patterns.len()], + dist: [u32; $patterns.len()], + steps: &'static [MV; $patterns.len()], + } + + impl $struct_name { + pub fn new() -> Self { + Self { + point: $patterns, + dist: [MAX_DIST; $patterns.len()], + steps: &$patterns, + } + } + fn reset(&mut self) { + self.point = $patterns; + self.dist = [MAX_DIST; $patterns.len()]; + } + fn set_new_point(&mut self, start: MV, dist: u32) { + for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { + *dst = src + start; + } + self.dist = [MAX_DIST; $patterns.len()]; + self.dist[0] = dist; + } + fn update(&mut self, step: MV) { + let mut new_point = self.point; + let mut new_dist = [MAX_DIST; $patterns.len()]; + + for point in new_point.iter_mut() { + *point += step; + } + + for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { + for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { + if *new_point == old_point { + *new_dist = old_dist; + break; + } + } + } + self.point = new_point; + self.dist = new_dist; + } + } + + impl MVSearch for $struct_name { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { + search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb) + } + fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { + search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk) + } + } + } +} + +pattern_search!(DiaSearch, DIA_PATTERN); +pattern_search!(HexSearch, HEX_PATTERN); + +pub struct MVEstimator { + pub ref_blk: [[u8; 64]; 6], + mc_buf: NAVideoBufferRef, + ref_frame: NAVideoBufferRef, + adv_profile: bool, + bicubic: bool, + autosel_pm: bool, + mv_thresh: u8, + var_thresh: u16, + filter_alpha: usize, + loop_thr: i16, + mv_range: i16, +pub count: usize, +pub count2: usize, +} + +impl MVEstimator { + pub fn new(ref_frame: NAVideoBufferRef, mc_buf: NAVideoBufferRef, loop_thr: i16, mv_range: i16) -> Self { + Self { + ref_blk: [[0; 64]; 6], + ref_frame, mc_buf, + adv_profile: false, + bicubic: false, + autosel_pm: false, + mv_thresh: 0, + var_thresh: 0, + filter_alpha: 0, + loop_thr, + mv_range, +count: 0, +count2: 0, + } + } + pub fn mc_block(&mut self, dst_idx: usize, plane: usize, x: usize, y: usize, mv: MV) { + let is_luma = (plane != 1) && (plane != 2); + let (sx, sy, mx, my, msx, msy) = if is_luma { + (mv.x >> 2, mv.y >> 2, (mv.x & 3) << 1, (mv.y & 3) << 1, mv.x / 4, mv.y / 4) + } else { + (mv.x >> 3, mv.y >> 3, mv.x & 7, mv.y & 7, mv.x / 8, mv.y / 8) + }; + let tmp_blk = self.mc_buf.get_data_mut().unwrap(); + get_block(tmp_blk, 16, self.ref_frame.clone(), plane, x, y, sx, sy); + if (msx & 7) != 0 { + let foff = (8 - (sx & 7)) as usize; + let off = 2 + foff; + vp31_loop_filter(tmp_blk, off, 1, 16, 12, self.loop_thr); + } + if (msy & 7) != 0 { + let foff = (8 - (sy & 7)) as usize; + let off = (2 + foff) * 16; + vp31_loop_filter(tmp_blk, off, 16, 1, 12, self.loop_thr); + } + let copy_mode = (mx == 0) && (my == 0); + let mut bicubic = !copy_mode && is_luma && self.bicubic; + if is_luma && !copy_mode && self.adv_profile { + if !self.autosel_pm { + bicubic = true; + } else { + let mv_limit = 1 << (self.mv_thresh + 1); + if (mv.x.abs() <= mv_limit) && (mv.y.abs() <= mv_limit) { + let mut var_off = 16 * 2 + 2; + if mv.x < 0 { var_off += 1; } + if mv.y < 0 { var_off += 16; } + let var = calc_variance(&tmp_blk[var_off..], 16); + if var >= self.var_thresh { + bicubic = true; + } + } + } + } + let dst = &mut self.ref_blk[dst_idx]; + if copy_mode { + let src = &tmp_blk[2 * 16 + 2..]; + for (dline, sline) in dst.chunks_mut(8).zip(src.chunks(16)).take(8) { + dline.copy_from_slice(&sline[..8]); + } + } else if bicubic { + let coeff_h = &VP6_BICUBIC_COEFFS[self.filter_alpha][mx as usize]; + let coeff_v = &VP6_BICUBIC_COEFFS[self.filter_alpha][my as usize]; + mc_bicubic(dst, 8, tmp_blk, 16 * 2 + 2, 16, coeff_h, coeff_v); + } else { + mc_bilinear(dst, 8, tmp_blk, 16 * 2 + 2, 16, mx as u16, my as u16); + } + } + fn sad_mb(&mut self, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 { + let mut dist = 0; + for i in 0..4 { + self.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, cur_mv); + dist += sad(&cur_blk[i], &self.ref_blk[i]); +self.count2 += 1; + if dist > best_dist { + break; + } + } + if dist <= best_dist { + for plane in 1..3 { + self.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, cur_mv); + dist += sad(&cur_blk[plane + 3], &self.ref_blk[plane + 3]); +self.count2 += 1; + if dist > best_dist { + break; + } + } + } +self.count += 1; + dist + } + fn sad_blk(&mut self, cur_blk: &[u8; 64], xpos: usize, ypos: usize, cur_mv: MV, _: u32) -> u32 { + self.mc_block(0, 0, xpos, ypos, cur_mv); + sad(cur_blk, &self.ref_blk[0]) + } +} + +fn sad(src1: &[u8; 64], src2: &[u8; 64]) -> u32 { + let mut sum = 0; + for (&p1, &p2) in src1.iter().zip(src2.iter()) { + sum += (i32::from(p1) - i32::from(p2)).abs() as u32; + } + sum +} + +pub fn sub_blk(dst: &mut [i16; 64], src1: &[u8; 64], src2: &[u8; 64]) { + for (dst, (&p1, &p2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) { + *dst = i16::from(p1) - i16::from(p2); + } +} + +pub fn calc_mb_dist(mb1: &ResidueMB, mb2: &ResidueMB) -> u32 { + let mut sum = 0; + for (blk1, blk2) in mb1.coeffs.iter().zip(mb2.coeffs.iter()) { + for (&c1, &c2) in blk1.iter().zip(blk2.iter()) { + sum += (i32::from(c1) - i32::from(c2)).abs() as u32; + } + } + sum +}