]>
Commit | Line | Data |
---|---|---|
3952bfd9 KS |
1 | use nihav_core::frame::*; |
2 | use nihav_codec_support::codecs::{MV, ZERO_MV}; | |
3 | use super::super::vpcommon::*; | |
4 | use super::super::vp6dsp::*; | |
5 | use super::super::vp6data::*; | |
6 | use super::ResidueMB; | |
19cfcd2f KS |
7 | use crate::codecs::vpenc::motion_est::*; |
8 | pub use crate::codecs::vpenc::motion_est::MVSearchMode; | |
3952bfd9 KS |
9 | |
10 | ||
11 | const C1S7: i32 = 64277; | |
12 | const C2S6: i32 = 60547; | |
13 | const C3S5: i32 = 54491; | |
14 | const C4S4: i32 = 46341; | |
15 | const C5S3: i32 = 36410; | |
16 | const C6S2: i32 = 25080; | |
17 | const C7S1: i32 = 12785; | |
18 | ||
19 | fn mul16(a: i32, b: i32) -> i32 { | |
20 | let res = a * b; | |
21 | (res + if res < 0 { 0xFFFF } else { 0 }) >> 16 | |
22 | } | |
23 | ||
24 | macro_rules! fdct_step { | |
25 | ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, | |
26 | $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr) => { | |
27 | let t_g = i32::from($s0) + i32::from($s7); | |
28 | let t_c = i32::from($s0) - i32::from($s7); | |
29 | let t_a = i32::from($s1) + i32::from($s2); | |
30 | let t_h = i32::from($s1) - i32::from($s2); | |
31 | let t_e1 = i32::from($s3) + i32::from($s4); | |
32 | let t_d = i32::from($s3) - i32::from($s4); | |
33 | let t_f = i32::from($s5) + i32::from($s6); | |
34 | let t_b = i32::from($s5) - i32::from($s6); | |
35 | ||
36 | let t_b1 = t_h + t_b; | |
37 | let t_h = t_h - t_b; | |
38 | let t_a1 = t_a - t_f; | |
39 | let t_f = t_a + t_f; | |
40 | let t_e = t_g + t_e1; | |
41 | let t_g = t_g - t_e1; | |
42 | ||
43 | $d2 = (mul16(C2S6, t_g) + mul16(C6S2, t_h)).max(-32768).min(32767) as i16; | |
44 | $d6 = (mul16(C6S2, t_g) - mul16(C2S6, t_h)).max(-32768).min(32767) as i16; | |
45 | $d0 = mul16(C4S4, t_e + t_f).max(-32768).min(32767) as i16; | |
46 | $d4 = mul16(C4S4, t_e - t_f).max(-32768).min(32767) as i16; | |
47 | let t_a = t_c + mul16(C4S4, t_a1); | |
48 | let t_c = t_c - mul16(C4S4, t_a1); | |
49 | let t_b = t_d + mul16(C4S4, t_b1); | |
50 | let t_d = t_d - mul16(C4S4, t_b1); | |
51 | $d3 = (mul16(C3S5, t_c) - mul16(C5S3, t_d)).max(-32768).min(32767) as i16; | |
52 | $d5 = (mul16(C5S3, t_c) + mul16(C3S5, t_d)).max(-32768).min(32767) as i16; | |
53 | $d1 = (mul16(C1S7, t_a) + mul16(C7S1, t_b)).max(-32768).min(32767) as i16; | |
54 | $d7 = (mul16(C7S1, t_a) - mul16(C1S7, t_b)).max(-32768).min(32767) as i16; | |
55 | } | |
56 | } | |
57 | ||
58 | #[allow(clippy::erasing_op)] | |
59 | pub fn vp_fdct(blk: &mut [i16; 64]) { | |
60 | for row in blk.chunks_mut(8) { | |
61 | fdct_step!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], | |
62 | row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]); | |
63 | } | |
64 | for i in 0..8 { | |
65 | fdct_step!(blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], | |
66 | blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i], | |
67 | blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], | |
68 | blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i]); | |
69 | } | |
70 | } | |
71 | ||
c5d5793c KS |
72 | pub trait MVSearchModeCreate { |
73 | fn create_search(&self) -> Box<dyn MVSearch + Send>; | |
74 | } | |
75 | ||
76 | impl MVSearchModeCreate for MVSearchMode { | |
77 | fn create_search(&self) -> Box<dyn MVSearch + Send> { | |
78 | match *self { | |
79 | MVSearchMode::Full => Box::new(FullMVSearch::new()), | |
80 | MVSearchMode::Diamond => Box::new(DiaSearch::new()), | |
81 | MVSearchMode::Hexagon => Box::new(HexSearch::new()), | |
82 | _ => unreachable!(), | |
83 | } | |
84 | } | |
85 | } | |
86 | ||
3952bfd9 KS |
87 | const MAX_DIST: u32 = std::u32::MAX; |
88 | const DIST_THRESH: u32 = 256; | |
89 | ||
20b5a55f | 90 | #[allow(clippy::wrong_self_convention)] |
3952bfd9 KS |
91 | trait FromPixels { |
92 | fn from_pixels(self) -> Self; | |
93 | } | |
94 | ||
95 | impl FromPixels for MV { | |
96 | fn from_pixels(self) -> MV { | |
97 | MV { x: self.x * 4, y: self.y * 4 } | |
98 | } | |
99 | } | |
100 | ||
101 | pub trait MVSearch { | |
102 | fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32); | |
103 | fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32); | |
104 | } | |
105 | ||
106 | pub struct FullMVSearch {} | |
107 | ||
108 | impl FullMVSearch { | |
109 | pub fn new() -> Self { Self{} } | |
110 | } | |
111 | ||
112 | impl MVSearch for FullMVSearch { | |
113 | fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { | |
114 | let mut best_dist = MAX_DIST; | |
115 | let mut best_mv = ZERO_MV; | |
116 | ||
117 | let mut cur_mv = ZERO_MV; | |
118 | for ytry in 0..mv_est.mv_range * 2 + 1 { | |
119 | let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; | |
120 | cur_mv.y = dy * 4; | |
121 | for xtry in 0..mv_est.mv_range * 2 + 1 { | |
122 | let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; | |
123 | cur_mv.x = dx * 4; | |
124 | ||
125 | let dist = mv_est.sad_mb(cur_blk, mb_x, mb_y, cur_mv, best_dist); | |
126 | ||
127 | if dist < best_dist { | |
128 | best_dist = dist; | |
129 | best_mv = cur_mv; | |
130 | } | |
131 | } | |
132 | } | |
133 | (best_mv, best_dist) | |
134 | } | |
135 | fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { | |
136 | let mut best_dist = MAX_DIST; | |
137 | let mut best_mv = ZERO_MV; | |
138 | ||
139 | let mut cur_mv = ZERO_MV; | |
140 | for ytry in 0..mv_est.mv_range * 2 + 1 { | |
141 | let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; | |
142 | cur_mv.y = dy * 4; | |
143 | for xtry in 0..mv_est.mv_range * 2 + 1 { | |
144 | let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; | |
145 | cur_mv.x = dx * 4; | |
146 | ||
147 | let dist = mv_est.sad_blk(cur_blk, xpos, ypos, cur_mv, best_dist); | |
148 | ||
149 | if dist < best_dist { | |
150 | best_dist = dist; | |
151 | best_mv = cur_mv; | |
152 | } | |
153 | } | |
154 | } | |
155 | (best_mv, best_dist) | |
156 | } | |
157 | } | |
158 | ||
3952bfd9 KS |
159 | macro_rules! pattern_search { |
160 | ($struct_name: ident, $patterns: expr) => { | |
161 | pub struct $struct_name { | |
162 | point: [MV; $patterns.len()], | |
163 | dist: [u32; $patterns.len()], | |
164 | steps: &'static [MV; $patterns.len()], | |
165 | } | |
166 | ||
167 | impl $struct_name { | |
168 | pub fn new() -> Self { | |
169 | Self { | |
170 | point: $patterns, | |
171 | dist: [MAX_DIST; $patterns.len()], | |
172 | steps: &$patterns, | |
173 | } | |
174 | } | |
175 | fn reset(&mut self) { | |
176 | self.point = $patterns; | |
177 | self.dist = [MAX_DIST; $patterns.len()]; | |
178 | } | |
179 | fn set_new_point(&mut self, start: MV, dist: u32) { | |
180 | for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { | |
181 | *dst = src + start; | |
182 | } | |
183 | self.dist = [MAX_DIST; $patterns.len()]; | |
184 | self.dist[0] = dist; | |
185 | } | |
186 | fn update(&mut self, step: MV) { | |
187 | let mut new_point = self.point; | |
188 | let mut new_dist = [MAX_DIST; $patterns.len()]; | |
189 | ||
190 | for point in new_point.iter_mut() { | |
191 | *point += step; | |
192 | } | |
193 | ||
194 | for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { | |
195 | for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { | |
196 | if *new_point == old_point { | |
197 | *new_dist = old_dist; | |
198 | break; | |
199 | } | |
200 | } | |
201 | } | |
202 | self.point = new_point; | |
203 | self.dist = new_dist; | |
204 | } | |
205 | } | |
206 | ||
207 | impl MVSearch for $struct_name { | |
208 | fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { | |
c5d5793c | 209 | search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb, DIST_THRESH) |
3952bfd9 KS |
210 | } |
211 | fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { | |
c5d5793c | 212 | search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk, DIST_THRESH) |
3952bfd9 KS |
213 | } |
214 | } | |
215 | } | |
216 | } | |
217 | ||
218 | pattern_search!(DiaSearch, DIA_PATTERN); | |
219 | pattern_search!(HexSearch, HEX_PATTERN); | |
220 | ||
221 | pub struct MVEstimator { | |
222 | pub ref_blk: [[u8; 64]; 6], | |
223 | mc_buf: NAVideoBufferRef<u8>, | |
224 | ref_frame: NAVideoBufferRef<u8>, | |
225 | adv_profile: bool, | |
226 | bicubic: bool, | |
227 | autosel_pm: bool, | |
228 | mv_thresh: u8, | |
229 | var_thresh: u16, | |
230 | filter_alpha: usize, | |
231 | loop_thr: i16, | |
232 | mv_range: i16, | |
3952bfd9 KS |
233 | } |
234 | ||
235 | impl MVEstimator { | |
236 | pub fn new(ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, loop_thr: i16, mv_range: i16) -> Self { | |
237 | Self { | |
238 | ref_blk: [[0; 64]; 6], | |
239 | ref_frame, mc_buf, | |
240 | adv_profile: false, | |
241 | bicubic: false, | |
242 | autosel_pm: false, | |
243 | mv_thresh: 0, | |
244 | var_thresh: 0, | |
245 | filter_alpha: 0, | |
246 | loop_thr, | |
247 | mv_range, | |
3952bfd9 KS |
248 | } |
249 | } | |
250 | pub fn mc_block(&mut self, dst_idx: usize, plane: usize, x: usize, y: usize, mv: MV) { | |
251 | let is_luma = (plane != 1) && (plane != 2); | |
252 | let (sx, sy, mx, my, msx, msy) = if is_luma { | |
253 | (mv.x >> 2, mv.y >> 2, (mv.x & 3) << 1, (mv.y & 3) << 1, mv.x / 4, mv.y / 4) | |
254 | } else { | |
255 | (mv.x >> 3, mv.y >> 3, mv.x & 7, mv.y & 7, mv.x / 8, mv.y / 8) | |
256 | }; | |
257 | let tmp_blk = self.mc_buf.get_data_mut().unwrap(); | |
258 | get_block(tmp_blk, 16, self.ref_frame.clone(), plane, x, y, sx, sy); | |
259 | if (msx & 7) != 0 { | |
260 | let foff = (8 - (sx & 7)) as usize; | |
261 | let off = 2 + foff; | |
262 | vp31_loop_filter(tmp_blk, off, 1, 16, 12, self.loop_thr); | |
263 | } | |
264 | if (msy & 7) != 0 { | |
265 | let foff = (8 - (sy & 7)) as usize; | |
266 | let off = (2 + foff) * 16; | |
267 | vp31_loop_filter(tmp_blk, off, 16, 1, 12, self.loop_thr); | |
268 | } | |
269 | let copy_mode = (mx == 0) && (my == 0); | |
270 | let mut bicubic = !copy_mode && is_luma && self.bicubic; | |
271 | if is_luma && !copy_mode && self.adv_profile { | |
272 | if !self.autosel_pm { | |
273 | bicubic = true; | |
274 | } else { | |
275 | let mv_limit = 1 << (self.mv_thresh + 1); | |
276 | if (mv.x.abs() <= mv_limit) && (mv.y.abs() <= mv_limit) { | |
277 | let mut var_off = 16 * 2 + 2; | |
278 | if mv.x < 0 { var_off += 1; } | |
279 | if mv.y < 0 { var_off += 16; } | |
280 | let var = calc_variance(&tmp_blk[var_off..], 16); | |
281 | if var >= self.var_thresh { | |
282 | bicubic = true; | |
283 | } | |
284 | } | |
285 | } | |
286 | } | |
287 | let dst = &mut self.ref_blk[dst_idx]; | |
288 | if copy_mode { | |
289 | let src = &tmp_blk[2 * 16 + 2..]; | |
290 | for (dline, sline) in dst.chunks_mut(8).zip(src.chunks(16)).take(8) { | |
291 | dline.copy_from_slice(&sline[..8]); | |
292 | } | |
293 | } else if bicubic { | |
294 | let coeff_h = &VP6_BICUBIC_COEFFS[self.filter_alpha][mx as usize]; | |
295 | let coeff_v = &VP6_BICUBIC_COEFFS[self.filter_alpha][my as usize]; | |
296 | mc_bicubic(dst, 8, tmp_blk, 16 * 2 + 2, 16, coeff_h, coeff_v); | |
297 | } else { | |
298 | mc_bilinear(dst, 8, tmp_blk, 16 * 2 + 2, 16, mx as u16, my as u16); | |
299 | } | |
300 | } | |
301 | fn sad_mb(&mut self, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 { | |
302 | let mut dist = 0; | |
303 | for i in 0..4 { | |
304 | self.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, cur_mv); | |
305 | dist += sad(&cur_blk[i], &self.ref_blk[i]); | |
3952bfd9 KS |
306 | if dist > best_dist { |
307 | break; | |
308 | } | |
309 | } | |
310 | if dist <= best_dist { | |
311 | for plane in 1..3 { | |
312 | self.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, cur_mv); | |
313 | dist += sad(&cur_blk[plane + 3], &self.ref_blk[plane + 3]); | |
3952bfd9 KS |
314 | if dist > best_dist { |
315 | break; | |
316 | } | |
317 | } | |
318 | } | |
3952bfd9 KS |
319 | dist |
320 | } | |
321 | fn sad_blk(&mut self, cur_blk: &[u8; 64], xpos: usize, ypos: usize, cur_mv: MV, _: u32) -> u32 { | |
322 | self.mc_block(0, 0, xpos, ypos, cur_mv); | |
323 | sad(cur_blk, &self.ref_blk[0]) | |
324 | } | |
325 | } | |
326 | ||
327 | fn sad(src1: &[u8; 64], src2: &[u8; 64]) -> u32 { | |
328 | let mut sum = 0; | |
329 | for (&p1, &p2) in src1.iter().zip(src2.iter()) { | |
e6aaad5c | 330 | sum += (i32::from(p1) - i32::from(p2)).unsigned_abs(); |
3952bfd9 KS |
331 | } |
332 | sum | |
333 | } | |
334 | ||
335 | pub fn sub_blk(dst: &mut [i16; 64], src1: &[u8; 64], src2: &[u8; 64]) { | |
336 | for (dst, (&p1, &p2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) { | |
337 | *dst = i16::from(p1) - i16::from(p2); | |
338 | } | |
339 | } | |
340 | ||
341 | pub fn calc_mb_dist(mb1: &ResidueMB, mb2: &ResidueMB) -> u32 { | |
342 | let mut sum = 0; | |
343 | for (blk1, blk2) in mb1.coeffs.iter().zip(mb2.coeffs.iter()) { | |
344 | for (&c1, &c2) in blk1.iter().zip(blk2.iter()) { | |
e6aaad5c | 345 | sum += (i32::from(c1) - i32::from(c2)).unsigned_abs(); |
3952bfd9 KS |
346 | } |
347 | } | |
348 | sum | |
349 | } |