| 1 | use nihav_core::frame::*; |
| 2 | use nihav_codec_support::codecs::{MV, ZERO_MV}; |
| 3 | use super::super::vpcommon::*; |
| 4 | use super::super::vp6dsp::*; |
| 5 | use super::super::vp6data::*; |
| 6 | use super::ResidueMB; |
| 7 | use crate::codecs::vpenc::motion_est::*; |
| 8 | pub use crate::codecs::vpenc::motion_est::MVSearchMode; |
| 9 | |
| 10 | |
| 11 | const C1S7: i32 = 64277; |
| 12 | const C2S6: i32 = 60547; |
| 13 | const C3S5: i32 = 54491; |
| 14 | const C4S4: i32 = 46341; |
| 15 | const C5S3: i32 = 36410; |
| 16 | const C6S2: i32 = 25080; |
| 17 | const C7S1: i32 = 12785; |
| 18 | |
| 19 | fn mul16(a: i32, b: i32) -> i32 { |
| 20 | let res = a * b; |
| 21 | (res + if res < 0 { 0xFFFF } else { 0 }) >> 16 |
| 22 | } |
| 23 | |
| 24 | macro_rules! fdct_step { |
| 25 | ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, |
| 26 | $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr) => { |
| 27 | let t_g = i32::from($s0) + i32::from($s7); |
| 28 | let t_c = i32::from($s0) - i32::from($s7); |
| 29 | let t_a = i32::from($s1) + i32::from($s2); |
| 30 | let t_h = i32::from($s1) - i32::from($s2); |
| 31 | let t_e1 = i32::from($s3) + i32::from($s4); |
| 32 | let t_d = i32::from($s3) - i32::from($s4); |
| 33 | let t_f = i32::from($s5) + i32::from($s6); |
| 34 | let t_b = i32::from($s5) - i32::from($s6); |
| 35 | |
| 36 | let t_b1 = t_h + t_b; |
| 37 | let t_h = t_h - t_b; |
| 38 | let t_a1 = t_a - t_f; |
| 39 | let t_f = t_a + t_f; |
| 40 | let t_e = t_g + t_e1; |
| 41 | let t_g = t_g - t_e1; |
| 42 | |
| 43 | $d2 = (mul16(C2S6, t_g) + mul16(C6S2, t_h)).max(-32768).min(32767) as i16; |
| 44 | $d6 = (mul16(C6S2, t_g) - mul16(C2S6, t_h)).max(-32768).min(32767) as i16; |
| 45 | $d0 = mul16(C4S4, t_e + t_f).max(-32768).min(32767) as i16; |
| 46 | $d4 = mul16(C4S4, t_e - t_f).max(-32768).min(32767) as i16; |
| 47 | let t_a = t_c + mul16(C4S4, t_a1); |
| 48 | let t_c = t_c - mul16(C4S4, t_a1); |
| 49 | let t_b = t_d + mul16(C4S4, t_b1); |
| 50 | let t_d = t_d - mul16(C4S4, t_b1); |
| 51 | $d3 = (mul16(C3S5, t_c) - mul16(C5S3, t_d)).max(-32768).min(32767) as i16; |
| 52 | $d5 = (mul16(C5S3, t_c) + mul16(C3S5, t_d)).max(-32768).min(32767) as i16; |
| 53 | $d1 = (mul16(C1S7, t_a) + mul16(C7S1, t_b)).max(-32768).min(32767) as i16; |
| 54 | $d7 = (mul16(C7S1, t_a) - mul16(C1S7, t_b)).max(-32768).min(32767) as i16; |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | #[allow(clippy::erasing_op)] |
| 59 | pub fn vp_fdct(blk: &mut [i16; 64]) { |
| 60 | for row in blk.chunks_mut(8) { |
| 61 | fdct_step!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], |
| 62 | row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]); |
| 63 | } |
| 64 | for i in 0..8 { |
| 65 | fdct_step!(blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], |
| 66 | blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i], |
| 67 | blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], |
| 68 | blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i]); |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | const MAX_DIST: u32 = std::u32::MAX; |
| 73 | const DIST_THRESH: u32 = 256; |
| 74 | |
| 75 | trait FromPixels { |
| 76 | fn from_pixels(self) -> Self; |
| 77 | } |
| 78 | |
| 79 | impl FromPixels for MV { |
| 80 | fn from_pixels(self) -> MV { |
| 81 | MV { x: self.x * 4, y: self.y * 4 } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | pub trait MVSearch { |
| 86 | fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32); |
| 87 | fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32); |
| 88 | } |
| 89 | |
| 90 | pub struct FullMVSearch {} |
| 91 | |
| 92 | impl FullMVSearch { |
| 93 | pub fn new() -> Self { Self{} } |
| 94 | } |
| 95 | |
| 96 | impl MVSearch for FullMVSearch { |
| 97 | fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { |
| 98 | let mut best_dist = MAX_DIST; |
| 99 | let mut best_mv = ZERO_MV; |
| 100 | |
| 101 | let mut cur_mv = ZERO_MV; |
| 102 | for ytry in 0..mv_est.mv_range * 2 + 1 { |
| 103 | let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; |
| 104 | cur_mv.y = dy * 4; |
| 105 | for xtry in 0..mv_est.mv_range * 2 + 1 { |
| 106 | let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; |
| 107 | cur_mv.x = dx * 4; |
| 108 | |
| 109 | let dist = mv_est.sad_mb(cur_blk, mb_x, mb_y, cur_mv, best_dist); |
| 110 | |
| 111 | if dist < best_dist { |
| 112 | best_dist = dist; |
| 113 | best_mv = cur_mv; |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | (best_mv, best_dist) |
| 118 | } |
| 119 | fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { |
| 120 | let mut best_dist = MAX_DIST; |
| 121 | let mut best_mv = ZERO_MV; |
| 122 | |
| 123 | let mut cur_mv = ZERO_MV; |
| 124 | for ytry in 0..mv_est.mv_range * 2 + 1 { |
| 125 | let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; |
| 126 | cur_mv.y = dy * 4; |
| 127 | for xtry in 0..mv_est.mv_range * 2 + 1 { |
| 128 | let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; |
| 129 | cur_mv.x = dx * 4; |
| 130 | |
| 131 | let dist = mv_est.sad_blk(cur_blk, xpos, ypos, cur_mv, best_dist); |
| 132 | |
| 133 | if dist < best_dist { |
| 134 | best_dist = dist; |
| 135 | best_mv = cur_mv; |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | (best_mv, best_dist) |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | macro_rules! pattern_search { |
| 144 | ($struct_name: ident, $patterns: expr) => { |
| 145 | pub struct $struct_name { |
| 146 | point: [MV; $patterns.len()], |
| 147 | dist: [u32; $patterns.len()], |
| 148 | steps: &'static [MV; $patterns.len()], |
| 149 | } |
| 150 | |
| 151 | impl $struct_name { |
| 152 | pub fn new() -> Self { |
| 153 | Self { |
| 154 | point: $patterns, |
| 155 | dist: [MAX_DIST; $patterns.len()], |
| 156 | steps: &$patterns, |
| 157 | } |
| 158 | } |
| 159 | fn reset(&mut self) { |
| 160 | self.point = $patterns; |
| 161 | self.dist = [MAX_DIST; $patterns.len()]; |
| 162 | } |
| 163 | fn set_new_point(&mut self, start: MV, dist: u32) { |
| 164 | for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { |
| 165 | *dst = src + start; |
| 166 | } |
| 167 | self.dist = [MAX_DIST; $patterns.len()]; |
| 168 | self.dist[0] = dist; |
| 169 | } |
| 170 | fn update(&mut self, step: MV) { |
| 171 | let mut new_point = self.point; |
| 172 | let mut new_dist = [MAX_DIST; $patterns.len()]; |
| 173 | |
| 174 | for point in new_point.iter_mut() { |
| 175 | *point += step; |
| 176 | } |
| 177 | |
| 178 | for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { |
| 179 | for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { |
| 180 | if *new_point == old_point { |
| 181 | *new_dist = old_dist; |
| 182 | break; |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | self.point = new_point; |
| 187 | self.dist = new_dist; |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | impl MVSearch for $struct_name { |
| 192 | fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { |
| 193 | search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb) |
| 194 | } |
| 195 | fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { |
| 196 | search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk) |
| 197 | } |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | pattern_search!(DiaSearch, DIA_PATTERN); |
| 203 | pattern_search!(HexSearch, HEX_PATTERN); |
| 204 | |
| 205 | pub struct MVEstimator { |
| 206 | pub ref_blk: [[u8; 64]; 6], |
| 207 | mc_buf: NAVideoBufferRef<u8>, |
| 208 | ref_frame: NAVideoBufferRef<u8>, |
| 209 | adv_profile: bool, |
| 210 | bicubic: bool, |
| 211 | autosel_pm: bool, |
| 212 | mv_thresh: u8, |
| 213 | var_thresh: u16, |
| 214 | filter_alpha: usize, |
| 215 | loop_thr: i16, |
| 216 | mv_range: i16, |
| 217 | } |
| 218 | |
| 219 | impl MVEstimator { |
| 220 | pub fn new(ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, loop_thr: i16, mv_range: i16) -> Self { |
| 221 | Self { |
| 222 | ref_blk: [[0; 64]; 6], |
| 223 | ref_frame, mc_buf, |
| 224 | adv_profile: false, |
| 225 | bicubic: false, |
| 226 | autosel_pm: false, |
| 227 | mv_thresh: 0, |
| 228 | var_thresh: 0, |
| 229 | filter_alpha: 0, |
| 230 | loop_thr, |
| 231 | mv_range, |
| 232 | } |
| 233 | } |
| 234 | pub fn mc_block(&mut self, dst_idx: usize, plane: usize, x: usize, y: usize, mv: MV) { |
| 235 | let is_luma = (plane != 1) && (plane != 2); |
| 236 | let (sx, sy, mx, my, msx, msy) = if is_luma { |
| 237 | (mv.x >> 2, mv.y >> 2, (mv.x & 3) << 1, (mv.y & 3) << 1, mv.x / 4, mv.y / 4) |
| 238 | } else { |
| 239 | (mv.x >> 3, mv.y >> 3, mv.x & 7, mv.y & 7, mv.x / 8, mv.y / 8) |
| 240 | }; |
| 241 | let tmp_blk = self.mc_buf.get_data_mut().unwrap(); |
| 242 | get_block(tmp_blk, 16, self.ref_frame.clone(), plane, x, y, sx, sy); |
| 243 | if (msx & 7) != 0 { |
| 244 | let foff = (8 - (sx & 7)) as usize; |
| 245 | let off = 2 + foff; |
| 246 | vp31_loop_filter(tmp_blk, off, 1, 16, 12, self.loop_thr); |
| 247 | } |
| 248 | if (msy & 7) != 0 { |
| 249 | let foff = (8 - (sy & 7)) as usize; |
| 250 | let off = (2 + foff) * 16; |
| 251 | vp31_loop_filter(tmp_blk, off, 16, 1, 12, self.loop_thr); |
| 252 | } |
| 253 | let copy_mode = (mx == 0) && (my == 0); |
| 254 | let mut bicubic = !copy_mode && is_luma && self.bicubic; |
| 255 | if is_luma && !copy_mode && self.adv_profile { |
| 256 | if !self.autosel_pm { |
| 257 | bicubic = true; |
| 258 | } else { |
| 259 | let mv_limit = 1 << (self.mv_thresh + 1); |
| 260 | if (mv.x.abs() <= mv_limit) && (mv.y.abs() <= mv_limit) { |
| 261 | let mut var_off = 16 * 2 + 2; |
| 262 | if mv.x < 0 { var_off += 1; } |
| 263 | if mv.y < 0 { var_off += 16; } |
| 264 | let var = calc_variance(&tmp_blk[var_off..], 16); |
| 265 | if var >= self.var_thresh { |
| 266 | bicubic = true; |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | let dst = &mut self.ref_blk[dst_idx]; |
| 272 | if copy_mode { |
| 273 | let src = &tmp_blk[2 * 16 + 2..]; |
| 274 | for (dline, sline) in dst.chunks_mut(8).zip(src.chunks(16)).take(8) { |
| 275 | dline.copy_from_slice(&sline[..8]); |
| 276 | } |
| 277 | } else if bicubic { |
| 278 | let coeff_h = &VP6_BICUBIC_COEFFS[self.filter_alpha][mx as usize]; |
| 279 | let coeff_v = &VP6_BICUBIC_COEFFS[self.filter_alpha][my as usize]; |
| 280 | mc_bicubic(dst, 8, tmp_blk, 16 * 2 + 2, 16, coeff_h, coeff_v); |
| 281 | } else { |
| 282 | mc_bilinear(dst, 8, tmp_blk, 16 * 2 + 2, 16, mx as u16, my as u16); |
| 283 | } |
| 284 | } |
| 285 | fn sad_mb(&mut self, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 { |
| 286 | let mut dist = 0; |
| 287 | for i in 0..4 { |
| 288 | self.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, cur_mv); |
| 289 | dist += sad(&cur_blk[i], &self.ref_blk[i]); |
| 290 | if dist > best_dist { |
| 291 | break; |
| 292 | } |
| 293 | } |
| 294 | if dist <= best_dist { |
| 295 | for plane in 1..3 { |
| 296 | self.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, cur_mv); |
| 297 | dist += sad(&cur_blk[plane + 3], &self.ref_blk[plane + 3]); |
| 298 | if dist > best_dist { |
| 299 | break; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | dist |
| 304 | } |
| 305 | fn sad_blk(&mut self, cur_blk: &[u8; 64], xpos: usize, ypos: usize, cur_mv: MV, _: u32) -> u32 { |
| 306 | self.mc_block(0, 0, xpos, ypos, cur_mv); |
| 307 | sad(cur_blk, &self.ref_blk[0]) |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | fn sad(src1: &[u8; 64], src2: &[u8; 64]) -> u32 { |
| 312 | let mut sum = 0; |
| 313 | for (&p1, &p2) in src1.iter().zip(src2.iter()) { |
| 314 | sum += (i32::from(p1) - i32::from(p2)).abs() as u32; |
| 315 | } |
| 316 | sum |
| 317 | } |
| 318 | |
| 319 | pub fn sub_blk(dst: &mut [i16; 64], src1: &[u8; 64], src2: &[u8; 64]) { |
| 320 | for (dst, (&p1, &p2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) { |
| 321 | *dst = i16::from(p1) - i16::from(p2); |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | pub fn calc_mb_dist(mb1: &ResidueMB, mb2: &ResidueMB) -> u32 { |
| 326 | let mut sum = 0; |
| 327 | for (blk1, blk2) in mb1.coeffs.iter().zip(mb2.coeffs.iter()) { |
| 328 | for (&c1, &c2) in blk1.iter().zip(blk2.iter()) { |
| 329 | sum += (i32::from(c1) - i32::from(c2)).abs() as u32; |
| 330 | } |
| 331 | } |
| 332 | sum |
| 333 | } |