VP7 encoder
[nihav.git] / nihav-duck / src / codecs / vp6enc / dsp.rs
CommitLineData
3952bfd9
KS
1use nihav_core::frame::*;
2use nihav_codec_support::codecs::{MV, ZERO_MV};
3use super::super::vpcommon::*;
4use super::super::vp6dsp::*;
5use super::super::vp6data::*;
6use super::ResidueMB;
19cfcd2f
KS
7use crate::codecs::vpenc::motion_est::*;
8pub use crate::codecs::vpenc::motion_est::MVSearchMode;
3952bfd9
KS
9
10
11const C1S7: i32 = 64277;
12const C2S6: i32 = 60547;
13const C3S5: i32 = 54491;
14const C4S4: i32 = 46341;
15const C5S3: i32 = 36410;
16const C6S2: i32 = 25080;
17const C7S1: i32 = 12785;
18
19fn mul16(a: i32, b: i32) -> i32 {
20 let res = a * b;
21 (res + if res < 0 { 0xFFFF } else { 0 }) >> 16
22}
23
24macro_rules! fdct_step {
25 ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr,
26 $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr) => {
27 let t_g = i32::from($s0) + i32::from($s7);
28 let t_c = i32::from($s0) - i32::from($s7);
29 let t_a = i32::from($s1) + i32::from($s2);
30 let t_h = i32::from($s1) - i32::from($s2);
31 let t_e1 = i32::from($s3) + i32::from($s4);
32 let t_d = i32::from($s3) - i32::from($s4);
33 let t_f = i32::from($s5) + i32::from($s6);
34 let t_b = i32::from($s5) - i32::from($s6);
35
36 let t_b1 = t_h + t_b;
37 let t_h = t_h - t_b;
38 let t_a1 = t_a - t_f;
39 let t_f = t_a + t_f;
40 let t_e = t_g + t_e1;
41 let t_g = t_g - t_e1;
42
43 $d2 = (mul16(C2S6, t_g) + mul16(C6S2, t_h)).max(-32768).min(32767) as i16;
44 $d6 = (mul16(C6S2, t_g) - mul16(C2S6, t_h)).max(-32768).min(32767) as i16;
45 $d0 = mul16(C4S4, t_e + t_f).max(-32768).min(32767) as i16;
46 $d4 = mul16(C4S4, t_e - t_f).max(-32768).min(32767) as i16;
47 let t_a = t_c + mul16(C4S4, t_a1);
48 let t_c = t_c - mul16(C4S4, t_a1);
49 let t_b = t_d + mul16(C4S4, t_b1);
50 let t_d = t_d - mul16(C4S4, t_b1);
51 $d3 = (mul16(C3S5, t_c) - mul16(C5S3, t_d)).max(-32768).min(32767) as i16;
52 $d5 = (mul16(C5S3, t_c) + mul16(C3S5, t_d)).max(-32768).min(32767) as i16;
53 $d1 = (mul16(C1S7, t_a) + mul16(C7S1, t_b)).max(-32768).min(32767) as i16;
54 $d7 = (mul16(C7S1, t_a) - mul16(C1S7, t_b)).max(-32768).min(32767) as i16;
55 }
56}
57
58#[allow(clippy::erasing_op)]
59pub fn vp_fdct(blk: &mut [i16; 64]) {
60 for row in blk.chunks_mut(8) {
61 fdct_step!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7],
62 row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]);
63 }
64 for i in 0..8 {
65 fdct_step!(blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i],
66 blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i],
67 blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i],
68 blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i]);
69 }
70}
71
c5d5793c
KS
72pub trait MVSearchModeCreate {
73 fn create_search(&self) -> Box<dyn MVSearch + Send>;
74}
75
76impl MVSearchModeCreate for MVSearchMode {
77 fn create_search(&self) -> Box<dyn MVSearch + Send> {
78 match *self {
79 MVSearchMode::Full => Box::new(FullMVSearch::new()),
80 MVSearchMode::Diamond => Box::new(DiaSearch::new()),
81 MVSearchMode::Hexagon => Box::new(HexSearch::new()),
82 _ => unreachable!(),
83 }
84 }
85}
86
3952bfd9
KS
87const MAX_DIST: u32 = std::u32::MAX;
88const DIST_THRESH: u32 = 256;
89
90trait FromPixels {
91 fn from_pixels(self) -> Self;
92}
93
94impl FromPixels for MV {
95 fn from_pixels(self) -> MV {
96 MV { x: self.x * 4, y: self.y * 4 }
97 }
98}
99
100pub trait MVSearch {
101 fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32);
102 fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32);
103}
104
105pub struct FullMVSearch {}
106
107impl FullMVSearch {
108 pub fn new() -> Self { Self{} }
109}
110
111impl MVSearch for FullMVSearch {
112 fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) {
113 let mut best_dist = MAX_DIST;
114 let mut best_mv = ZERO_MV;
115
116 let mut cur_mv = ZERO_MV;
117 for ytry in 0..mv_est.mv_range * 2 + 1 {
118 let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) };
119 cur_mv.y = dy * 4;
120 for xtry in 0..mv_est.mv_range * 2 + 1 {
121 let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) };
122 cur_mv.x = dx * 4;
123
124 let dist = mv_est.sad_mb(cur_blk, mb_x, mb_y, cur_mv, best_dist);
125
126 if dist < best_dist {
127 best_dist = dist;
128 best_mv = cur_mv;
129 }
130 }
131 }
132 (best_mv, best_dist)
133 }
134 fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) {
135 let mut best_dist = MAX_DIST;
136 let mut best_mv = ZERO_MV;
137
138 let mut cur_mv = ZERO_MV;
139 for ytry in 0..mv_est.mv_range * 2 + 1 {
140 let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) };
141 cur_mv.y = dy * 4;
142 for xtry in 0..mv_est.mv_range * 2 + 1 {
143 let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) };
144 cur_mv.x = dx * 4;
145
146 let dist = mv_est.sad_blk(cur_blk, xpos, ypos, cur_mv, best_dist);
147
148 if dist < best_dist {
149 best_dist = dist;
150 best_mv = cur_mv;
151 }
152 }
153 }
154 (best_mv, best_dist)
155 }
156}
157
3952bfd9
KS
158macro_rules! pattern_search {
159 ($struct_name: ident, $patterns: expr) => {
160 pub struct $struct_name {
161 point: [MV; $patterns.len()],
162 dist: [u32; $patterns.len()],
163 steps: &'static [MV; $patterns.len()],
164 }
165
166 impl $struct_name {
167 pub fn new() -> Self {
168 Self {
169 point: $patterns,
170 dist: [MAX_DIST; $patterns.len()],
171 steps: &$patterns,
172 }
173 }
174 fn reset(&mut self) {
175 self.point = $patterns;
176 self.dist = [MAX_DIST; $patterns.len()];
177 }
178 fn set_new_point(&mut self, start: MV, dist: u32) {
179 for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) {
180 *dst = src + start;
181 }
182 self.dist = [MAX_DIST; $patterns.len()];
183 self.dist[0] = dist;
184 }
185 fn update(&mut self, step: MV) {
186 let mut new_point = self.point;
187 let mut new_dist = [MAX_DIST; $patterns.len()];
188
189 for point in new_point.iter_mut() {
190 *point += step;
191 }
192
193 for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) {
194 for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) {
195 if *new_point == old_point {
196 *new_dist = old_dist;
197 break;
198 }
199 }
200 }
201 self.point = new_point;
202 self.dist = new_dist;
203 }
204 }
205
206 impl MVSearch for $struct_name {
207 fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) {
c5d5793c 208 search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb, DIST_THRESH)
3952bfd9
KS
209 }
210 fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) {
c5d5793c 211 search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk, DIST_THRESH)
3952bfd9
KS
212 }
213 }
214 }
215}
216
217pattern_search!(DiaSearch, DIA_PATTERN);
218pattern_search!(HexSearch, HEX_PATTERN);
219
220pub struct MVEstimator {
221 pub ref_blk: [[u8; 64]; 6],
222 mc_buf: NAVideoBufferRef<u8>,
223 ref_frame: NAVideoBufferRef<u8>,
224 adv_profile: bool,
225 bicubic: bool,
226 autosel_pm: bool,
227 mv_thresh: u8,
228 var_thresh: u16,
229 filter_alpha: usize,
230 loop_thr: i16,
231 mv_range: i16,
3952bfd9
KS
232}
233
234impl MVEstimator {
235 pub fn new(ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, loop_thr: i16, mv_range: i16) -> Self {
236 Self {
237 ref_blk: [[0; 64]; 6],
238 ref_frame, mc_buf,
239 adv_profile: false,
240 bicubic: false,
241 autosel_pm: false,
242 mv_thresh: 0,
243 var_thresh: 0,
244 filter_alpha: 0,
245 loop_thr,
246 mv_range,
3952bfd9
KS
247 }
248 }
249 pub fn mc_block(&mut self, dst_idx: usize, plane: usize, x: usize, y: usize, mv: MV) {
250 let is_luma = (plane != 1) && (plane != 2);
251 let (sx, sy, mx, my, msx, msy) = if is_luma {
252 (mv.x >> 2, mv.y >> 2, (mv.x & 3) << 1, (mv.y & 3) << 1, mv.x / 4, mv.y / 4)
253 } else {
254 (mv.x >> 3, mv.y >> 3, mv.x & 7, mv.y & 7, mv.x / 8, mv.y / 8)
255 };
256 let tmp_blk = self.mc_buf.get_data_mut().unwrap();
257 get_block(tmp_blk, 16, self.ref_frame.clone(), plane, x, y, sx, sy);
258 if (msx & 7) != 0 {
259 let foff = (8 - (sx & 7)) as usize;
260 let off = 2 + foff;
261 vp31_loop_filter(tmp_blk, off, 1, 16, 12, self.loop_thr);
262 }
263 if (msy & 7) != 0 {
264 let foff = (8 - (sy & 7)) as usize;
265 let off = (2 + foff) * 16;
266 vp31_loop_filter(tmp_blk, off, 16, 1, 12, self.loop_thr);
267 }
268 let copy_mode = (mx == 0) && (my == 0);
269 let mut bicubic = !copy_mode && is_luma && self.bicubic;
270 if is_luma && !copy_mode && self.adv_profile {
271 if !self.autosel_pm {
272 bicubic = true;
273 } else {
274 let mv_limit = 1 << (self.mv_thresh + 1);
275 if (mv.x.abs() <= mv_limit) && (mv.y.abs() <= mv_limit) {
276 let mut var_off = 16 * 2 + 2;
277 if mv.x < 0 { var_off += 1; }
278 if mv.y < 0 { var_off += 16; }
279 let var = calc_variance(&tmp_blk[var_off..], 16);
280 if var >= self.var_thresh {
281 bicubic = true;
282 }
283 }
284 }
285 }
286 let dst = &mut self.ref_blk[dst_idx];
287 if copy_mode {
288 let src = &tmp_blk[2 * 16 + 2..];
289 for (dline, sline) in dst.chunks_mut(8).zip(src.chunks(16)).take(8) {
290 dline.copy_from_slice(&sline[..8]);
291 }
292 } else if bicubic {
293 let coeff_h = &VP6_BICUBIC_COEFFS[self.filter_alpha][mx as usize];
294 let coeff_v = &VP6_BICUBIC_COEFFS[self.filter_alpha][my as usize];
295 mc_bicubic(dst, 8, tmp_blk, 16 * 2 + 2, 16, coeff_h, coeff_v);
296 } else {
297 mc_bilinear(dst, 8, tmp_blk, 16 * 2 + 2, 16, mx as u16, my as u16);
298 }
299 }
300 fn sad_mb(&mut self, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 {
301 let mut dist = 0;
302 for i in 0..4 {
303 self.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, cur_mv);
304 dist += sad(&cur_blk[i], &self.ref_blk[i]);
3952bfd9
KS
305 if dist > best_dist {
306 break;
307 }
308 }
309 if dist <= best_dist {
310 for plane in 1..3 {
311 self.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, cur_mv);
312 dist += sad(&cur_blk[plane + 3], &self.ref_blk[plane + 3]);
3952bfd9
KS
313 if dist > best_dist {
314 break;
315 }
316 }
317 }
3952bfd9
KS
318 dist
319 }
320 fn sad_blk(&mut self, cur_blk: &[u8; 64], xpos: usize, ypos: usize, cur_mv: MV, _: u32) -> u32 {
321 self.mc_block(0, 0, xpos, ypos, cur_mv);
322 sad(cur_blk, &self.ref_blk[0])
323 }
324}
325
326fn sad(src1: &[u8; 64], src2: &[u8; 64]) -> u32 {
327 let mut sum = 0;
328 for (&p1, &p2) in src1.iter().zip(src2.iter()) {
329 sum += (i32::from(p1) - i32::from(p2)).abs() as u32;
330 }
331 sum
332}
333
334pub fn sub_blk(dst: &mut [i16; 64], src1: &[u8; 64], src2: &[u8; 64]) {
335 for (dst, (&p1, &p2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) {
336 *dst = i16::from(p1) - i16::from(p2);
337 }
338}
339
340pub fn calc_mb_dist(mb1: &ResidueMB, mb2: &ResidueMB) -> u32 {
341 let mut sum = 0;
342 for (blk1, blk2) in mb1.coeffs.iter().zip(mb2.coeffs.iter()) {
343 for (&c1, &c2) in blk1.iter().zip(blk2.iter()) {
344 sum += (i32::from(c1) - i32::from(c2)).abs() as u32;
345 }
346 }
347 sum
348}