41b72cf849c0c3081bd878a8d2cf5f73284388e8
[nihav.git] / nihav-duck / src / codecs / vp6enc / dsp.rs
1 use nihav_core::frame::*;
2 use nihav_codec_support::codecs::{MV, ZERO_MV};
3 use super::super::vpcommon::*;
4 use super::super::vp6dsp::*;
5 use super::super::vp6data::*;
6 use super::ResidueMB;
7
8 use std::str::FromStr;
9
10 #[derive(Debug,Clone,Copy,PartialEq)]
11 pub enum MVSearchMode {
12 Full,
13 Diamond,
14 Hexagon,
15 }
16
17 impl Default for MVSearchMode {
18 fn default() -> Self { MVSearchMode::Hexagon }
19 }
20
21 pub struct ParseError{}
22
23 impl FromStr for MVSearchMode {
24 type Err = ParseError;
25
26 #[allow(clippy::single_match)]
27 fn from_str(s: &str) -> Result<Self, Self::Err> {
28 match s {
29 "full" => Ok(MVSearchMode::Full),
30 "dia" => Ok(MVSearchMode::Diamond),
31 "hex" => Ok(MVSearchMode::Hexagon),
32 _ => Err(ParseError{}),
33 }
34 }
35 }
36
37 impl std::fmt::Display for MVSearchMode {
38 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
39 match *self {
40 MVSearchMode::Full => write!(f, "full"),
41 MVSearchMode::Diamond => write!(f, "dia"),
42 MVSearchMode::Hexagon => write!(f, "hex"),
43 }
44 }
45 }
46
47
48 const C1S7: i32 = 64277;
49 const C2S6: i32 = 60547;
50 const C3S5: i32 = 54491;
51 const C4S4: i32 = 46341;
52 const C5S3: i32 = 36410;
53 const C6S2: i32 = 25080;
54 const C7S1: i32 = 12785;
55
56 fn mul16(a: i32, b: i32) -> i32 {
57 let res = a * b;
58 (res + if res < 0 { 0xFFFF } else { 0 }) >> 16
59 }
60
61 macro_rules! fdct_step {
62 ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr,
63 $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr) => {
64 let t_g = i32::from($s0) + i32::from($s7);
65 let t_c = i32::from($s0) - i32::from($s7);
66 let t_a = i32::from($s1) + i32::from($s2);
67 let t_h = i32::from($s1) - i32::from($s2);
68 let t_e1 = i32::from($s3) + i32::from($s4);
69 let t_d = i32::from($s3) - i32::from($s4);
70 let t_f = i32::from($s5) + i32::from($s6);
71 let t_b = i32::from($s5) - i32::from($s6);
72
73 let t_b1 = t_h + t_b;
74 let t_h = t_h - t_b;
75 let t_a1 = t_a - t_f;
76 let t_f = t_a + t_f;
77 let t_e = t_g + t_e1;
78 let t_g = t_g - t_e1;
79
80 $d2 = (mul16(C2S6, t_g) + mul16(C6S2, t_h)).max(-32768).min(32767) as i16;
81 $d6 = (mul16(C6S2, t_g) - mul16(C2S6, t_h)).max(-32768).min(32767) as i16;
82 $d0 = mul16(C4S4, t_e + t_f).max(-32768).min(32767) as i16;
83 $d4 = mul16(C4S4, t_e - t_f).max(-32768).min(32767) as i16;
84 let t_a = t_c + mul16(C4S4, t_a1);
85 let t_c = t_c - mul16(C4S4, t_a1);
86 let t_b = t_d + mul16(C4S4, t_b1);
87 let t_d = t_d - mul16(C4S4, t_b1);
88 $d3 = (mul16(C3S5, t_c) - mul16(C5S3, t_d)).max(-32768).min(32767) as i16;
89 $d5 = (mul16(C5S3, t_c) + mul16(C3S5, t_d)).max(-32768).min(32767) as i16;
90 $d1 = (mul16(C1S7, t_a) + mul16(C7S1, t_b)).max(-32768).min(32767) as i16;
91 $d7 = (mul16(C7S1, t_a) - mul16(C1S7, t_b)).max(-32768).min(32767) as i16;
92 }
93 }
94
95 #[allow(clippy::erasing_op)]
96 pub fn vp_fdct(blk: &mut [i16; 64]) {
97 for row in blk.chunks_mut(8) {
98 fdct_step!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7],
99 row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]);
100 }
101 for i in 0..8 {
102 fdct_step!(blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i],
103 blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i],
104 blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i],
105 blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i]);
106 }
107 }
108
109 const MAX_DIST: u32 = std::u32::MAX;
110 const DIST_THRESH: u32 = 256;
111
112 trait FromPixels {
113 fn from_pixels(self) -> Self;
114 }
115
116 impl FromPixels for MV {
117 fn from_pixels(self) -> MV {
118 MV { x: self.x * 4, y: self.y * 4 }
119 }
120 }
121
122 pub trait MVSearch {
123 fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32);
124 fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32);
125 }
126
127 pub struct FullMVSearch {}
128
129 impl FullMVSearch {
130 pub fn new() -> Self { Self{} }
131 }
132
133 impl MVSearch for FullMVSearch {
134 fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) {
135 let mut best_dist = MAX_DIST;
136 let mut best_mv = ZERO_MV;
137
138 let mut cur_mv = ZERO_MV;
139 for ytry in 0..mv_est.mv_range * 2 + 1 {
140 let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) };
141 cur_mv.y = dy * 4;
142 for xtry in 0..mv_est.mv_range * 2 + 1 {
143 let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) };
144 cur_mv.x = dx * 4;
145
146 let dist = mv_est.sad_mb(cur_blk, mb_x, mb_y, cur_mv, best_dist);
147
148 if dist < best_dist {
149 best_dist = dist;
150 best_mv = cur_mv;
151 }
152 }
153 }
154 (best_mv, best_dist)
155 }
156 fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) {
157 let mut best_dist = MAX_DIST;
158 let mut best_mv = ZERO_MV;
159
160 let mut cur_mv = ZERO_MV;
161 for ytry in 0..mv_est.mv_range * 2 + 1 {
162 let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) };
163 cur_mv.y = dy * 4;
164 for xtry in 0..mv_est.mv_range * 2 + 1 {
165 let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) };
166 cur_mv.x = dx * 4;
167
168 let dist = mv_est.sad_blk(cur_blk, xpos, ypos, cur_mv, best_dist);
169
170 if dist < best_dist {
171 best_dist = dist;
172 best_mv = cur_mv;
173 }
174 }
175 }
176 (best_mv, best_dist)
177 }
178 }
179
180 const DIA_PATTERN: [MV; 9] = [
181 ZERO_MV,
182 MV {x: -2, y: 0},
183 MV {x: -1, y: 1},
184 MV {x: 0, y: 2},
185 MV {x: 1, y: 1},
186 MV {x: 2, y: 0},
187 MV {x: 1, y: -1},
188 MV {x: 0, y: -2},
189 MV {x: -1, y: -1}
190 ];
191
192 const HEX_PATTERN: [MV; 7] = [
193 ZERO_MV,
194 MV {x: -2, y: 0},
195 MV {x: -1, y: 2},
196 MV {x: 1, y: 2},
197 MV {x: 2, y: 0},
198 MV {x: 1, y: -2},
199 MV {x: -1, y: -2}
200 ];
201
202 const REFINEMENT: [MV; 4] = [
203 MV {x: -1, y: 0},
204 MV {x: 0, y: 1},
205 MV {x: 1, y: 0},
206 MV {x: 0, y: -1}
207 ];
208
209 macro_rules! search_template {
210 ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident) => ({
211 let mut best_dist = MAX_DIST;
212 let mut best_mv;
213
214 let mut min_dist;
215 let mut min_idx;
216
217 $self.reset();
218 loop {
219 let mut cur_best_dist = best_dist;
220 for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) {
221 if *dist == MAX_DIST {
222 *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist);
223 cur_best_dist = cur_best_dist.min(*dist);
224 if *dist <= DIST_THRESH {
225 break;
226 }
227 }
228 }
229 min_dist = $self.dist[0];
230 min_idx = 0;
231 for (i, &dist) in $self.dist.iter().enumerate().skip(1) {
232 if dist < min_dist {
233 min_dist = dist;
234 min_idx = i;
235 if dist <= DIST_THRESH {
236 break;
237 }
238 }
239 }
240 if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range {
241 break;
242 }
243 best_dist = min_dist;
244 $self.update($self.steps[min_idx]);
245 }
246 best_dist = min_dist;
247 best_mv = $self.point[min_idx];
248 if best_dist <= DIST_THRESH {
249 return (best_mv.from_pixels(), best_dist);
250 }
251 for &step in REFINEMENT.iter() {
252 let mv = best_mv + step;
253 let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST);
254 if best_dist > dist {
255 best_dist = dist;
256 best_mv = mv;
257 }
258 }
259 best_mv = best_mv.from_pixels();
260 if best_dist <= DIST_THRESH {
261 return (best_mv, best_dist);
262 }
263
264 // subpel refinement
265 $self.set_new_point(best_mv, best_dist);
266 loop {
267 let mut cur_best_dist = best_dist;
268 for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) {
269 if *dist == MAX_DIST {
270 *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point, cur_best_dist);
271 cur_best_dist = cur_best_dist.min(*dist);
272 if *dist <= DIST_THRESH {
273 break;
274 }
275 }
276 }
277 min_dist = $self.dist[0];
278 min_idx = 0;
279 for (i, &dist) in $self.dist.iter().enumerate().skip(1) {
280 if dist < min_dist {
281 min_dist = dist;
282 min_idx = i;
283 if dist <= DIST_THRESH {
284 break;
285 }
286 }
287 }
288 if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 4 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 4 {
289 break;
290 }
291 best_dist = min_dist;
292 $self.update($self.steps[min_idx]);
293 }
294 best_dist = min_dist;
295 best_mv = $self.point[min_idx];
296 if best_dist <= DIST_THRESH {
297 return (best_mv, best_dist);
298 }
299 for &step in REFINEMENT.iter() {
300 let mv = best_mv + step;
301 let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv, MAX_DIST);
302 if best_dist > dist {
303 best_dist = dist;
304 best_mv = mv;
305 }
306 }
307 (best_mv, best_dist)
308 })
309 }
310
311 macro_rules! pattern_search {
312 ($struct_name: ident, $patterns: expr) => {
313 pub struct $struct_name {
314 point: [MV; $patterns.len()],
315 dist: [u32; $patterns.len()],
316 steps: &'static [MV; $patterns.len()],
317 }
318
319 impl $struct_name {
320 pub fn new() -> Self {
321 Self {
322 point: $patterns,
323 dist: [MAX_DIST; $patterns.len()],
324 steps: &$patterns,
325 }
326 }
327 fn reset(&mut self) {
328 self.point = $patterns;
329 self.dist = [MAX_DIST; $patterns.len()];
330 }
331 fn set_new_point(&mut self, start: MV, dist: u32) {
332 for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) {
333 *dst = src + start;
334 }
335 self.dist = [MAX_DIST; $patterns.len()];
336 self.dist[0] = dist;
337 }
338 fn update(&mut self, step: MV) {
339 let mut new_point = self.point;
340 let mut new_dist = [MAX_DIST; $patterns.len()];
341
342 for point in new_point.iter_mut() {
343 *point += step;
344 }
345
346 for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) {
347 for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) {
348 if *new_point == old_point {
349 *new_dist = old_dist;
350 break;
351 }
352 }
353 }
354 self.point = new_point;
355 self.dist = new_dist;
356 }
357 }
358
359 impl MVSearch for $struct_name {
360 fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) {
361 search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb)
362 }
363 fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) {
364 search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk)
365 }
366 }
367 }
368 }
369
370 pattern_search!(DiaSearch, DIA_PATTERN);
371 pattern_search!(HexSearch, HEX_PATTERN);
372
373 pub struct MVEstimator {
374 pub ref_blk: [[u8; 64]; 6],
375 mc_buf: NAVideoBufferRef<u8>,
376 ref_frame: NAVideoBufferRef<u8>,
377 adv_profile: bool,
378 bicubic: bool,
379 autosel_pm: bool,
380 mv_thresh: u8,
381 var_thresh: u16,
382 filter_alpha: usize,
383 loop_thr: i16,
384 mv_range: i16,
385 }
386
387 impl MVEstimator {
388 pub fn new(ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, loop_thr: i16, mv_range: i16) -> Self {
389 Self {
390 ref_blk: [[0; 64]; 6],
391 ref_frame, mc_buf,
392 adv_profile: false,
393 bicubic: false,
394 autosel_pm: false,
395 mv_thresh: 0,
396 var_thresh: 0,
397 filter_alpha: 0,
398 loop_thr,
399 mv_range,
400 }
401 }
402 pub fn mc_block(&mut self, dst_idx: usize, plane: usize, x: usize, y: usize, mv: MV) {
403 let is_luma = (plane != 1) && (plane != 2);
404 let (sx, sy, mx, my, msx, msy) = if is_luma {
405 (mv.x >> 2, mv.y >> 2, (mv.x & 3) << 1, (mv.y & 3) << 1, mv.x / 4, mv.y / 4)
406 } else {
407 (mv.x >> 3, mv.y >> 3, mv.x & 7, mv.y & 7, mv.x / 8, mv.y / 8)
408 };
409 let tmp_blk = self.mc_buf.get_data_mut().unwrap();
410 get_block(tmp_blk, 16, self.ref_frame.clone(), plane, x, y, sx, sy);
411 if (msx & 7) != 0 {
412 let foff = (8 - (sx & 7)) as usize;
413 let off = 2 + foff;
414 vp31_loop_filter(tmp_blk, off, 1, 16, 12, self.loop_thr);
415 }
416 if (msy & 7) != 0 {
417 let foff = (8 - (sy & 7)) as usize;
418 let off = (2 + foff) * 16;
419 vp31_loop_filter(tmp_blk, off, 16, 1, 12, self.loop_thr);
420 }
421 let copy_mode = (mx == 0) && (my == 0);
422 let mut bicubic = !copy_mode && is_luma && self.bicubic;
423 if is_luma && !copy_mode && self.adv_profile {
424 if !self.autosel_pm {
425 bicubic = true;
426 } else {
427 let mv_limit = 1 << (self.mv_thresh + 1);
428 if (mv.x.abs() <= mv_limit) && (mv.y.abs() <= mv_limit) {
429 let mut var_off = 16 * 2 + 2;
430 if mv.x < 0 { var_off += 1; }
431 if mv.y < 0 { var_off += 16; }
432 let var = calc_variance(&tmp_blk[var_off..], 16);
433 if var >= self.var_thresh {
434 bicubic = true;
435 }
436 }
437 }
438 }
439 let dst = &mut self.ref_blk[dst_idx];
440 if copy_mode {
441 let src = &tmp_blk[2 * 16 + 2..];
442 for (dline, sline) in dst.chunks_mut(8).zip(src.chunks(16)).take(8) {
443 dline.copy_from_slice(&sline[..8]);
444 }
445 } else if bicubic {
446 let coeff_h = &VP6_BICUBIC_COEFFS[self.filter_alpha][mx as usize];
447 let coeff_v = &VP6_BICUBIC_COEFFS[self.filter_alpha][my as usize];
448 mc_bicubic(dst, 8, tmp_blk, 16 * 2 + 2, 16, coeff_h, coeff_v);
449 } else {
450 mc_bilinear(dst, 8, tmp_blk, 16 * 2 + 2, 16, mx as u16, my as u16);
451 }
452 }
453 fn sad_mb(&mut self, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 {
454 let mut dist = 0;
455 for i in 0..4 {
456 self.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, cur_mv);
457 dist += sad(&cur_blk[i], &self.ref_blk[i]);
458 if dist > best_dist {
459 break;
460 }
461 }
462 if dist <= best_dist {
463 for plane in 1..3 {
464 self.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, cur_mv);
465 dist += sad(&cur_blk[plane + 3], &self.ref_blk[plane + 3]);
466 if dist > best_dist {
467 break;
468 }
469 }
470 }
471 dist
472 }
473 fn sad_blk(&mut self, cur_blk: &[u8; 64], xpos: usize, ypos: usize, cur_mv: MV, _: u32) -> u32 {
474 self.mc_block(0, 0, xpos, ypos, cur_mv);
475 sad(cur_blk, &self.ref_blk[0])
476 }
477 }
478
479 fn sad(src1: &[u8; 64], src2: &[u8; 64]) -> u32 {
480 let mut sum = 0;
481 for (&p1, &p2) in src1.iter().zip(src2.iter()) {
482 sum += (i32::from(p1) - i32::from(p2)).abs() as u32;
483 }
484 sum
485 }
486
487 pub fn sub_blk(dst: &mut [i16; 64], src1: &[u8; 64], src2: &[u8; 64]) {
488 for (dst, (&p1, &p2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) {
489 *dst = i16::from(p1) - i16::from(p2);
490 }
491 }
492
493 pub fn calc_mb_dist(mb1: &ResidueMB, mb2: &ResidueMB) -> u32 {
494 let mut sum = 0;
495 for (blk1, blk2) in mb1.coeffs.iter().zip(mb2.coeffs.iter()) {
496 for (&c1, &c2) in blk1.iter().zip(blk2.iter()) {
497 sum += (i32::from(c1) - i32::from(c2)).abs() as u32;
498 }
499 }
500 sum
501 }