vp6enc: add fast(er) encoding mode
[nihav.git] / nihav-duck / src / codecs / vp6enc / mb.rs
1 use nihav_core::frame::*;
2 use nihav_codec_support::codecs::{MV, ZERO_MV};
3 use super::super::vpcommon::*;
4 use super::VP56DCPred;
5 use super::dsp::*;
6 use super::rdo::*;
7
8 /*#[cfg(debug_assertions)]
9 use std::io::Write;
10 #[cfg(debug_assertions)]
11 use std::fs::File;
12 #[cfg(debug_assertions)]
13 pub fn dump_pgm(vbuf: &NAVideoBuffer<u8>, name: &str) {
14 let dst = vbuf.get_data();
15 let (w, h) = vbuf.get_dimensions(0);
16 let mut file = File::create(name).unwrap();
17 file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).unwrap();
18 for row in dst[vbuf.get_offset(0)..].chunks(vbuf.get_stride(0)).take(h).rev() {
19 file.write_all(row).unwrap();
20 }
21 for (row1, row2) in dst[vbuf.get_offset(1)..].chunks(vbuf.get_stride(1)).take(h / 2).zip(dst[vbuf.get_offset(2)..].chunks(vbuf.get_stride(2))).rev() {
22 file.write_all(row1).unwrap();
23 file.write_all(row2).unwrap();
24 }
25 }*/
26
27 pub type Coeffs = [[i16; 64]; 6];
28
29 #[derive(Clone)]
30 pub struct ResidueMB {
31 pub coeffs: Coeffs,
32 }
33
34 impl ResidueMB {
35 fn new() -> Self {
36 Self {
37 coeffs: [[0; 64]; 6],
38 }
39 }
40 fn fdct(&mut self) {
41 for blk in self.coeffs.iter_mut() {
42 vp_fdct(blk);
43 }
44 }
45 fn idct(&mut self) {
46 for blk in self.coeffs.iter_mut() {
47 vp_idct(blk);
48 }
49 }
50 fn quant(&mut self, q: usize) {
51 for blk in self.coeffs.iter_mut() {
52 if blk[0] != 0 {
53 blk[0] /= VP56_DC_QUANTS[q] * 4;
54 }
55 for coef in blk[1..].iter_mut() {
56 if *coef != 0 {
57 *coef /= VP56_AC_QUANTS[q] * 4;
58 }
59 }
60 }
61 }
62 fn dequant(&mut self, q: usize) {
63 for blk in self.coeffs.iter_mut() {
64 if blk[0] != 0 {
65 blk[0] *= VP56_DC_QUANTS[q] * 4;
66 }
67 for coef in blk[1..].iter_mut() {
68 if *coef != 0 {
69 *coef *= VP56_AC_QUANTS[q] * 4;
70 }
71 }
72 }
73 }
74 fn dequant_from(&mut self, src: &Self, q: usize) {
75 for (dblk, sblk) in self.coeffs.iter_mut().zip(src.coeffs.iter()) {
76 dblk[0] = if sblk[0] != 0 { sblk[0] * VP56_DC_QUANTS[q] * 4 } else { 0 };
77 for (dcoef, &scoef) in dblk[1..].iter_mut().zip(sblk[1..].iter()) {
78 *dcoef = if scoef != 0 { scoef * VP56_AC_QUANTS[q] * 4 } else { 0 };
79 }
80 }
81 }
82 fn fill(&self, dst: &mut [[u8; 64]; 6]) {
83 for (dblk, sblk) in dst.iter_mut().zip(self.coeffs.iter()) {
84 for (dcoef, &scoef) in dblk.iter_mut().zip(sblk.iter()) {
85 *dcoef = scoef as u8;
86 }
87 }
88 }
89 }
90
91 #[derive(Clone)]
92 pub struct InterMB {
93 pub residue: ResidueMB,
94 pub reference: Coeffs,
95 pub mv: [MV; 4],
96 }
97
98 impl InterMB {
99 fn new() -> Self {
100 Self {
101 residue: ResidueMB::new(),
102 reference: [[0; 64]; 6],
103 mv: [ZERO_MV; 4],
104 }
105 }
106 }
107
108 const VP56_DC_QUANTS: [i16; 64] = [
109 47, 47, 47, 47, 45, 43, 43, 43,
110 43, 43, 42, 41, 41, 40, 40, 40,
111 40, 35, 35, 35, 35, 33, 33, 33,
112 33, 32, 32, 32, 27, 27, 26, 26,
113 25, 25, 24, 24, 23, 23, 19, 19,
114 19, 19, 18, 18, 17, 16, 16, 16,
115 16, 16, 15, 11, 11, 11, 10, 10,
116 9, 8, 7, 5, 3, 3, 2, 2
117 ];
118 const VP56_AC_QUANTS: [i16; 64] = [
119 94, 92, 90, 88, 86, 82, 78, 74,
120 70, 66, 62, 58, 54, 53, 52, 51,
121 50, 49, 48, 47, 46, 45, 44, 43,
122 42, 40, 39, 37, 36, 35, 34, 33,
123 32, 31, 30, 29, 28, 27, 26, 25,
124 24, 23, 22, 21, 20, 19, 18, 17,
125 16, 15, 14, 13, 12, 11, 10, 9,
126 8, 7, 6, 5, 4, 3, 2, 1
127 ];
128
129 const VP56_FILTER_LIMITS: [u8; 64] = [
130 14, 14, 13, 13, 12, 12, 10, 10,
131 10, 10, 8, 8, 8, 8, 8, 8,
132 8, 8, 8, 8, 8, 8, 8, 8,
133 8, 8, 8, 8, 8, 8, 8, 8,
134 8, 8, 8, 8, 7, 7, 7, 7,
135 7, 7, 6, 6, 6, 6, 6, 6,
136 5, 5, 5, 5, 4, 4, 4, 4,
137 4, 4, 4, 3, 3, 3, 3, 2
138 ];
139
140 #[derive(Default)]
141 pub struct FrameEncoder {
142 pub quant: usize,
143 pub src_mbs: Vec<ResidueMB>,
144 pub intra_mbs: Vec<ResidueMB>,
145 pub inter_mbs: Vec<InterMB>,
146 pub fourmv_mbs: Vec<InterMB>,
147 pub golden_mbs: Vec<InterMB>,
148
149 pub mb_types: Vec<VPMBType>,
150 pub num_mv: Vec<u8>,
151 pub coded_mv: Vec<[MV; 4]>,
152 pub fmv_sub: Vec<[VPMBType; 4]>,
153
154 pub mb_w: usize,
155 pub mb_h: usize,
156
157 pub me_mode: MVSearchMode,
158 pub me_range: i16,
159 }
160
161 macro_rules! read_block {
162 ($dst: expr, $src: expr, $stride: expr) => {
163 for (drow, srow) in $dst.chunks_mut(8).zip($src.chunks($stride).take(8)) {
164 for (dst, &src) in drow.iter_mut().zip(srow.iter()) {
165 *dst = i16::from(src);
166 }
167 }
168 }
169 }
170
171 macro_rules! write_block {
172 ($dst: expr, $src: expr, $stride: expr) => {
173 for (drow, srow) in $dst.chunks_mut($stride).take(8).zip($src.chunks(8)) {
174 drow[..8].copy_from_slice(srow);
175 }
176 }
177 }
178
179 impl FrameEncoder {
180 pub fn new() -> Self { Self::default() }
181 pub fn resize(&mut self, mb_w: usize, mb_h: usize) {
182 self.mb_w = mb_w;
183 self.mb_h = mb_h;
184
185 let num_mbs = self.mb_w * self.mb_h;
186 self.src_mbs.clear();
187 self.src_mbs.reserve(num_mbs);
188 self.intra_mbs.clear();
189 self.intra_mbs.reserve(num_mbs);
190 self.inter_mbs.clear();
191 self.inter_mbs.reserve(num_mbs);
192 self.fourmv_mbs.clear();
193 self.fourmv_mbs.reserve(num_mbs);
194 self.golden_mbs.clear();
195 self.golden_mbs.reserve(num_mbs);
196
197 self.mb_types.clear();
198 self.mb_types.reserve(num_mbs);
199 self.num_mv.clear();
200 self.num_mv.reserve(num_mbs);
201 self.coded_mv.clear();
202 self.coded_mv.reserve(num_mbs);
203 self.fmv_sub.clear();
204 self.fmv_sub.reserve(num_mbs);
205 }
206 pub fn set_quant(&mut self, quant: usize) { self.quant = quant; }
207 pub fn read_mbs(&mut self, vbuf: &NAVideoBuffer<u8>) {
208 let src = vbuf.get_data();
209 let y = &src[vbuf.get_offset(0)..];
210 let ystride = vbuf.get_stride(0);
211 let u = &src[vbuf.get_offset(1)..];
212 let ustride = vbuf.get_stride(1);
213 let v = &src[vbuf.get_offset(2)..];
214 let vstride = vbuf.get_stride(2);
215 let (w, _) = vbuf.get_dimensions(0);
216
217 self.src_mbs.clear();
218 for (ys, (us, vs)) in y.chunks(ystride * 16).zip(u.chunks(ustride * 8).zip(v.chunks(vstride * 8))) {
219 for x in (0..w).step_by(16) {
220 let mut mb = ResidueMB::new();
221 for (i, blk) in mb.coeffs[..4].iter_mut().enumerate() {
222 read_block!(blk, ys[x + (i & 1) * 8 + (i >> 1) * 8 * ystride..], ystride);
223 }
224 read_block!(mb.coeffs[4], us[x/2..], ustride);
225 read_block!(mb.coeffs[5], vs[x/2..], vstride);
226 self.src_mbs.push(mb);
227 }
228 }
229 }
230 pub fn reconstruct_frame(&mut self, dc_pred: &mut VP56DCPred, mut vbuf: NAVideoBufferRef<u8>) {
231 let mut blocks = [[0u8; 64]; 6];
232
233 let mut yoff = vbuf.get_offset(0);
234 let mut uoff = vbuf.get_offset(1);
235 let mut voff = vbuf.get_offset(2);
236 let ystride = vbuf.get_stride(0);
237 let ustride = vbuf.get_stride(1);
238 let vstride = vbuf.get_stride(2);
239 let dst = vbuf.get_data_mut().unwrap();
240
241 dc_pred.reset();
242
243 let quant = self.quant;
244 let mut mb_pos = 0;
245 for _mb_y in 0..self.mb_h {
246 for mb_x in 0..self.mb_w {
247 let mb_type = self.mb_types[mb_pos];
248 let mb = self.get_mb_mut(mb_pos);
249 for (i, blk) in mb.coeffs.iter_mut().enumerate() {
250 dc_pred.predict_dc(mb_type, i, blk, false);
251 }
252 mb.dequant(quant);
253 mb.idct();
254 let mb = self.get_mb(mb_pos);
255 if mb_type.is_intra() {
256 for (dblk, sblk) in blocks.iter_mut().zip(mb.coeffs.iter()) {
257 for (dcoef, &scoef) in dblk.iter_mut().zip(sblk.iter()) {
258 *dcoef = (scoef + 128).max(0).min(255) as u8;
259 }
260 }
261 } else {
262 let res_mb = match mb_type.get_ref_id() {
263 0 => unreachable!(),
264 1 => if mb_type != VPMBType::InterFourMV {
265 &self.inter_mbs[mb_pos].reference
266 } else {
267 &self.fourmv_mbs[mb_pos].reference
268 },
269 _ => &self.golden_mbs[mb_pos].reference,
270 };
271
272 for (dblk, (sblk1, sblk2)) in blocks.iter_mut().zip(mb.coeffs.iter().zip(res_mb.iter())) {
273 for (dcoef, (&scoef1, &scoef2)) in dblk.iter_mut().zip(sblk1.iter().zip(sblk2.iter())) {
274 *dcoef = (scoef1 + scoef2).max(0).min(255) as u8;
275 }
276 }
277 }
278
279 for i in 0..4 {
280 write_block!(&mut dst[yoff + mb_x * 16 + (i & 1) * 8 + (i >> 1) * 8 * ystride..],
281 blocks[i], ystride);
282 }
283 write_block!(&mut dst[uoff + mb_x * 8..], blocks[4], ustride);
284 write_block!(&mut dst[voff + mb_x * 8..], blocks[5], vstride);
285
286 dc_pred.next_mb();
287 mb_pos += 1;
288 }
289 yoff += ystride * 16;
290 uoff += ustride * 8;
291 voff += vstride * 8;
292 dc_pred.update_row();
293 }
294 /*#[cfg(debug_assertions)]
295 dump_pgm(&vbuf, "/home/kst/devel/NihAV-rust/assets/test_out/debug.pgm");*/
296 }
297 pub fn get_mb(&self, mb_pos: usize) -> &ResidueMB {
298 let mb_type = self.mb_types[mb_pos];
299 match mb_type.get_ref_id() {
300 0 => &self.intra_mbs[mb_pos],
301 1 => if mb_type != VPMBType::InterFourMV {
302 &self.inter_mbs[mb_pos].residue
303 } else {
304 &self.fourmv_mbs[mb_pos].residue
305 },
306 _ => &self.golden_mbs[mb_pos].residue,
307 }
308 }
309 fn get_mb_mut(&mut self, mb_pos: usize) -> &mut ResidueMB {
310 let mb_type = self.mb_types[mb_pos];
311 match mb_type.get_ref_id() {
312 0 => &mut self.intra_mbs[mb_pos],
313 1 => if mb_type != VPMBType::InterFourMV {
314 &mut self.inter_mbs[mb_pos].residue
315 } else {
316 &mut self.fourmv_mbs[mb_pos].residue
317 },
318 _ => &mut self.golden_mbs[mb_pos].residue,
319 }
320 }
321 pub fn prepare_intra_blocks(&mut self) {
322 self.intra_mbs.clear();
323 self.mb_types.clear();
324 for smb in self.src_mbs.iter() {
325 let mut dmb = smb.clone();
326 dmb.fdct();
327 for blk in dmb.coeffs.iter_mut() {
328 blk[0] -= 4096;
329 }
330 dmb.quant(self.quant);
331 self.mb_types.push(VPMBType::Intra);
332 self.intra_mbs.push(dmb);
333 }
334 }
335 pub fn prepare_inter_blocks(&mut self, golden: bool) {
336 let inter_mbs = if !golden { &mut self.inter_mbs } else { &mut self.golden_mbs };
337 for (mb_idx, mb) in inter_mbs.iter_mut().enumerate() {
338 mb.residue.fdct();
339 mb.residue.quant(self.quant);
340 self.mb_types[mb_idx] = VPMBType::InterMV;
341 }
342 }
343 pub fn estimate_mvs(&mut self, ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, golden: bool) {
344 let loop_thr = i16::from(VP56_FILTER_LIMITS[self.quant as usize]);
345
346 let inter_mbs = if !golden { &mut self.inter_mbs } else { &mut self.golden_mbs };
347
348 if inter_mbs.is_empty() {
349 for _ in 0..self.mb_w * self.mb_h {
350 inter_mbs.push(InterMB::new());
351 }
352 }
353
354 let mut cur_blk = [[0u8; 64]; 6];
355
356 let mut mv_est = MVEstimator::new(ref_frame, mc_buf, loop_thr, self.me_range);
357
358 let mut mv_search = self.me_mode.create_search();
359
360 let mut mb_pos = 0;
361 for (mb_y, row) in inter_mbs.chunks_mut(self.mb_w).enumerate() {
362 for (mb_x, mb) in row.iter_mut().enumerate() {
363 self.src_mbs[mb_pos].fill(&mut cur_blk);
364
365 let (best_mv, _best_dist) = mv_search.search_mb(&mut mv_est, &cur_blk, mb_x, mb_y);
366 mb.mv[3] = best_mv;
367
368 for i in 0..4 {
369 mv_est.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, best_mv);
370 sub_blk(&mut mb.residue.coeffs[i], &cur_blk[i], &mv_est.ref_blk[i]);
371 }
372 for plane in 1..3 {
373 mv_est.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, best_mv);
374 sub_blk(&mut mb.residue.coeffs[plane + 3], &cur_blk[plane + 3], &mv_est.ref_blk[plane + 3]);
375 }
376
377 for (dblk, sblk) in mb.reference.iter_mut().zip(mv_est.ref_blk.iter()) {
378 for (dst, &src) in dblk.iter_mut().zip(sblk.iter()) {
379 *dst = i16::from(src);
380 }
381 }
382 mb_pos += 1;
383 }
384 }
385 }
386 fn estimate_fourmv(&mut self, ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, mb_x: usize, mb_y: usize) -> bool {
387 let loop_thr = i16::from(VP56_FILTER_LIMITS[self.quant as usize]);
388
389 if self.fourmv_mbs.is_empty() {
390 for _ in 0..self.mb_w * self.mb_h {
391 self.fourmv_mbs.push(InterMB::new());
392 }
393 }
394 if self.fmv_sub.is_empty() {
395 self.fmv_sub.resize(self.mb_w * self.mb_h, [VPMBType::Intra; 4]);
396 }
397
398 let mb_pos = mb_x + mb_y * self.mb_w;
399 let mb = &mut self.fourmv_mbs[mb_pos];
400
401 let mut cur_blk = [[0u8; 64]; 6];
402 self.src_mbs[mb_pos].fill(&mut cur_blk);
403
404 let mut mv_est = MVEstimator::new(ref_frame, mc_buf, loop_thr, self.me_range);
405
406 let mut mv_search = self.me_mode.create_search();
407
408 for i in 0..4 {
409 let xpos = mb_x * 16 + (i & 1) * 8;
410 let ypos = mb_y * 16 + (i >> 1) * 8;
411 let (best_mv, _best_dist) = mv_search.search_blk(&mut mv_est, &cur_blk[i], xpos, ypos);
412 mb.mv[i] = best_mv;
413 }
414 let mvsum = mb.mv[0] + mb.mv[1] + mb.mv[2] + mb.mv[3];
415 let chroma_mv = MV{ x: mvsum.x / 4, y: mvsum.y / 4};
416
417 for (i, blk) in mb.residue.coeffs[..4].iter_mut().enumerate() {
418 let xpos = mb_x * 16 + (i & 1) * 8;
419 let ypos = mb_y * 16 + (i >> 1) * 8;
420 mv_est.mc_block(i, 0, xpos, ypos, mb.mv[i]);
421 sub_blk(blk, &cur_blk[i], &mv_est.ref_blk[i]);
422 }
423 for plane in 1..3 {
424 mv_est.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, chroma_mv);
425 sub_blk(&mut mb.residue.coeffs[plane + 3], &cur_blk[plane + 3], &mv_est.ref_blk[plane + 3]);
426 }
427
428 for (dblk, sblk) in mb.reference.iter_mut().zip(mv_est.ref_blk.iter()) {
429 for (dst, &src) in dblk.iter_mut().zip(sblk.iter()) {
430 *dst = i16::from(src);
431 }
432 }
433
434 (mb.mv[0] != mb.mv[1]) || (mb.mv[0] != mb.mv[2]) || (mb.mv[0] != mb.mv[3])
435 }
436 pub fn select_inter_blocks(&mut self, ref_frame: NAVideoBufferRef<u8>, mc_buf: NAVideoBufferRef<u8>, has_golden_frame: bool, lambda: f32) {
437 let mut tmp_mb = ResidueMB::new();
438 for mb_idx in 0..self.mb_w * self.mb_h {
439 tmp_mb.dequant_from(&self.intra_mbs[mb_idx], self.quant);
440 tmp_mb.idct();
441 for blk in tmp_mb.coeffs.iter_mut() {
442 for coef in blk.iter_mut() {
443 *coef = (*coef + 128).max(0).min(255);
444 }
445 }
446 let intra_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb);
447 let intra_nits = estimate_intra_mb_nits(&self.intra_mbs[mb_idx].coeffs, self.quant);
448 let intra_cost = (intra_dist as f32) + lambda * (intra_nits as f32);
449
450 tmp_mb.dequant_from(&self.inter_mbs[mb_idx].residue, self.quant);
451 tmp_mb.idct();
452 for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.inter_mbs[mb_idx].reference.iter()) {
453 for (coef, add) in blk.iter_mut().zip(res.iter()) {
454 *coef = (*coef + add).max(0).min(255);
455 }
456 }
457 let inter_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb);
458 let mut inter_nits = estimate_inter_mb_nits(&self.inter_mbs[mb_idx], self.quant, false);
459 if self.inter_mbs[mb_idx].mv[3] != ZERO_MV {
460 inter_nits += estimate_mv_nits(self.inter_mbs[mb_idx].mv[3]);
461 }
462 let mut inter_cost = (inter_dist as f32) + lambda * (inter_nits as f32);
463
464 if inter_cost < intra_cost {
465 self.mb_types[mb_idx] = VPMBType::InterMV;
466
467 if inter_dist > 512 {
468 self.estimate_fourmv(ref_frame.clone(), mc_buf.clone(), mb_idx % self.mb_w, mb_idx / self.mb_w);
469 self.fourmv_mbs[mb_idx].residue.fdct();
470 self.fourmv_mbs[mb_idx].residue.quant(self.quant);
471
472 tmp_mb.dequant_from(&self.fourmv_mbs[mb_idx].residue, self.quant);
473 tmp_mb.idct();
474 for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.fourmv_mbs[mb_idx].reference.iter()) {
475 for (coef, add) in blk.iter_mut().zip(res.iter()) {
476 *coef = (*coef + add).max(0).min(255);
477 }
478 }
479 let fourmv_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb);
480 let fourmv_nits = estimate_inter_mb_nits(&self.fourmv_mbs[mb_idx], self.quant, true);
481 let fourmv_cost = (fourmv_dist as f32) + lambda * (fourmv_nits as f32);
482 if fourmv_cost < inter_cost {
483 self.mb_types[mb_idx] = VPMBType::InterFourMV;
484 inter_cost = fourmv_cost;
485 }
486 }
487 }
488
489 if has_golden_frame {
490 tmp_mb.dequant_from(&self.golden_mbs[mb_idx].residue, self.quant);
491 tmp_mb.idct();
492 for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.golden_mbs[mb_idx].reference.iter()) {
493 for (coef, add) in blk.iter_mut().zip(res.iter()) {
494 *coef = (*coef + add).max(0).min(255);
495 }
496 }
497 let golden_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb);
498 let golden_nits = estimate_inter_mb_nits(&self.golden_mbs[mb_idx], self.quant, false);
499 let golden_cost = (golden_dist as f32) + lambda * (golden_nits as f32);
500
501 if (self.mb_types[mb_idx].is_intra() && golden_cost < intra_cost) ||
502 (!self.mb_types[mb_idx].is_intra() && golden_cost < inter_cost) {
503 self.mb_types[mb_idx] = VPMBType::GoldenMV;
504 }
505 }
506 }
507 }
508 fn motion_est_mb(src_mb: &ResidueMB, cur_blk: &mut [[u8; 64]; 6], mb: &mut InterMB, mv_search: &mut Box<dyn MVSearch+Send>, mv_est: &mut MVEstimator, mb_x: usize, mb_y: usize) {
509 src_mb.fill(cur_blk);
510 let (best_mv, _best_dist) = mv_search.search_mb(mv_est, &cur_blk, mb_x, mb_y);
511 mb.mv[3] = best_mv;
512
513 for i in 0..4 {
514 mv_est.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, best_mv);
515 sub_blk(&mut mb.residue.coeffs[i], &cur_blk[i], &mv_est.ref_blk[i]);
516 }
517 for plane in 1..3 {
518 mv_est.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, best_mv);
519 sub_blk(&mut mb.residue.coeffs[plane + 3], &cur_blk[plane + 3], &mv_est.ref_blk[plane + 3]);
520 }
521
522 for (dblk, sblk) in mb.reference.iter_mut().zip(mv_est.ref_blk.iter()) {
523 for (dst, &src) in dblk.iter_mut().zip(sblk.iter()) {
524 *dst = i16::from(src);
525 }
526 }
527 }
528 pub fn select_inter_blocks_fast(&mut self, ref_frame: NAVideoBufferRef<u8>, gold_frame: Option<NAVideoBufferRef<u8>>, mc_buf: NAVideoBufferRef<u8>, lambda: f32) {
529 let loop_thr = i16::from(VP56_FILTER_LIMITS[self.quant as usize]);
530
531 if self.inter_mbs.is_empty() {
532 for _ in 0..self.mb_w * self.mb_h {
533 self.inter_mbs.push(InterMB::new());
534 }
535 }
536 if self.golden_mbs.is_empty() {
537 for _ in 0..self.mb_w * self.mb_h {
538 self.golden_mbs.push(InterMB::new());
539 }
540 }
541
542 let mut cur_blk = [[0u8; 64]; 6];
543
544 let mut mv_est = MVEstimator::new(ref_frame.clone(), mc_buf.clone(), loop_thr, self.me_range);
545 let mut mv_est_g = if let Some(gold_frm) = gold_frame {
546 Some(MVEstimator::new(gold_frm, mc_buf.clone(), loop_thr, self.me_range))
547 } else {
548 None
549 };
550
551 let mut mv_search = self.me_mode.create_search();
552
553 let mut tmp_mb = ResidueMB::new();
554
555 let mut mb_idx = 0;
556 for mb_y in 0..self.mb_h {
557 for mb_x in 0..self.mb_w {
558 let smb = &self.src_mbs[mb_idx];
559
560 let inter_mb = &mut self.inter_mbs[mb_idx];
561 Self::motion_est_mb(smb, &mut cur_blk, inter_mb, &mut mv_search, &mut mv_est, mb_x, mb_y);
562 inter_mb.residue.fdct();
563 inter_mb.residue.quant(self.quant);
564 self.mb_types[mb_idx] = VPMBType::InterMV;
565
566 tmp_mb.dequant_from(&inter_mb.residue, self.quant);
567 tmp_mb.idct();
568 for (blk, res) in tmp_mb.coeffs.iter_mut().zip(inter_mb.reference.iter()) {
569 for (coef, add) in blk.iter_mut().zip(res.iter()) {
570 *coef = (*coef + add).max(0).min(255);
571 }
572 }
573 let mut best_dist = calc_mb_dist(smb, &tmp_mb);
574 let mut inter_nits = estimate_inter_mb_nits(inter_mb, self.quant, false);
575 if inter_mb.mv[3] != ZERO_MV {
576 inter_nits += estimate_mv_nits(inter_mb.mv[3]);
577 }
578 let mut best_cost = (best_dist as f32) + lambda * (inter_nits as f32);
579 if best_dist > 512 {
580 self.estimate_fourmv(ref_frame.clone(), mc_buf.clone(), mb_idx % self.mb_w, mb_idx / self.mb_w);
581 self.fourmv_mbs[mb_idx].residue.fdct();
582 self.fourmv_mbs[mb_idx].residue.quant(self.quant);
583
584 let smb = &self.src_mbs[mb_idx];
585 tmp_mb.dequant_from(&self.fourmv_mbs[mb_idx].residue, self.quant);
586 tmp_mb.idct();
587 for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.fourmv_mbs[mb_idx].reference.iter()) {
588 for (coef, add) in blk.iter_mut().zip(res.iter()) {
589 *coef = (*coef + add).max(0).min(255);
590 }
591 }
592 let fourmv_dist = calc_mb_dist(smb, &tmp_mb);
593 let fourmv_nits = estimate_inter_mb_nits(&self.fourmv_mbs[mb_idx], self.quant, true);
594 let fourmv_cost = (fourmv_dist as f32) + lambda * (fourmv_nits as f32);
595 if fourmv_cost < best_cost {
596 self.mb_types[mb_idx] = VPMBType::InterFourMV;
597 best_cost = fourmv_cost;
598 best_dist = fourmv_dist;
599 }
600 }
601 let smb = &self.src_mbs[mb_idx];
602 if best_dist > 512 {
603 if let Some(ref mut mve_gold) = mv_est_g {
604 let gold_mb = &mut self.golden_mbs[mb_idx];
605 Self::motion_est_mb(smb, &mut cur_blk, gold_mb, &mut mv_search, mve_gold, mb_x, mb_y);
606 gold_mb.residue.fdct();
607 gold_mb.residue.quant(self.quant);
608
609 tmp_mb.dequant_from(&gold_mb.residue, self.quant);
610 tmp_mb.idct();
611 for (blk, res) in tmp_mb.coeffs.iter_mut().zip(gold_mb.reference.iter()) {
612 for (coef, add) in blk.iter_mut().zip(res.iter()) {
613 *coef = (*coef + add).max(0).min(255);
614 }
615 }
616 let golden_dist = calc_mb_dist(smb, &tmp_mb);
617 let golden_nits = estimate_inter_mb_nits(gold_mb, self.quant, false);
618 let golden_cost = (golden_dist as f32) + lambda * (golden_nits as f32);
619 if golden_cost < best_cost {
620 self.mb_types[mb_idx] = VPMBType::GoldenMV;
621 best_cost = golden_cost;
622 best_dist = golden_dist;
623 }
624 }
625 }
626 if best_dist > 512 {
627 let intra_mb = &mut self.intra_mbs[mb_idx];
628 *intra_mb = smb.clone();
629 intra_mb.fdct();
630 for blk in intra_mb.coeffs.iter_mut() {
631 blk[0] -= 4096;
632 }
633 intra_mb.quant(self.quant);
634
635 tmp_mb.dequant_from(intra_mb, self.quant);
636 tmp_mb.idct();
637 for blk in tmp_mb.coeffs.iter_mut() {
638 for coef in blk.iter_mut() {
639 *coef = (*coef + 128).max(0).min(255);
640 }
641 }
642 let intra_dist = calc_mb_dist(smb, &tmp_mb);
643 let intra_nits = estimate_intra_mb_nits(&intra_mb.coeffs, self.quant);
644 let intra_cost = (intra_dist as f32) + lambda * (intra_nits as f32);
645 if intra_cost < best_cost {
646 self.mb_types[mb_idx] = VPMBType::Intra;
647 }
648 }
649
650 mb_idx += 1;
651 }
652 }
653 }
654 pub fn decide_frame_type(&self) -> (bool, bool) {
655 let mut intra_count = 0usize;
656 let mut non_intra = 0usize;
657 for mb_type in self.mb_types.iter() {
658 if mb_type.is_intra() {
659 intra_count += 1;
660 } else {
661 non_intra += 1;
662 }
663 }
664 (intra_count > non_intra * 3, intra_count > non_intra)
665 }
666 fn find_mv_pred(&self, mb_x: usize, mb_y: usize, ref_id: u8) -> (usize, MV, MV, MV) {
667 const CAND_POS: [(i8, i8); 12] = [
668 (-1, 0), ( 0, -1),
669 (-1, -1), (-1, 1),
670 (-2, 0), ( 0, -2),
671 (-1, -2), (-2, -1),
672 (-2, 1), (-1, 2),
673 (-2, -2), (-2, 2)
674 ];
675
676 let mut nearest_mv = ZERO_MV;
677 let mut near_mv = ZERO_MV;
678 let mut pred_mv = ZERO_MV;
679 let mut num_mv: usize = 0;
680
681 for (i, (yoff, xoff)) in CAND_POS.iter().enumerate() {
682 let cx = (mb_x as isize) + (*xoff as isize);
683 let cy = (mb_y as isize) + (*yoff as isize);
684 if (cx < 0) || (cy < 0) {
685 continue;
686 }
687 let cx = cx as usize;
688 let cy = cy as usize;
689 if (cx >= self.mb_w) || (cy >= self.mb_h) {
690 continue;
691 }
692 let mb_pos = cx + cy * self.mb_w;
693 let mv = match self.mb_types[mb_pos].get_ref_id() {
694 0 => ZERO_MV,
695 1 => if self.mb_types[mb_pos] != VPMBType::InterFourMV {
696 self.inter_mbs[mb_pos].mv[3]
697 } else {
698 self.fourmv_mbs[mb_pos].mv[3]
699 },
700 _ => self.golden_mbs[mb_pos].mv[3],
701 };
702 if (self.mb_types[mb_pos].get_ref_id() != ref_id) || (mv == ZERO_MV) {
703 continue;
704 }
705 if num_mv == 0 {
706 nearest_mv = mv;
707 num_mv += 1;
708 if i < 2 {
709 pred_mv = mv;
710 }
711 } else if mv != nearest_mv {
712 near_mv = mv;
713 num_mv += 1;
714 break;
715 }
716 }
717
718 (num_mv, nearest_mv, near_mv, pred_mv)
719 }
720 pub fn predict_mvs(&mut self) {
721 let mut mb_idx = 0;
722 self.num_mv.clear();
723 if self.coded_mv.is_empty() {
724 self.coded_mv.resize(self.mb_w * self.mb_h, [ZERO_MV; 4]);
725 }
726 for mb_y in 0..self.mb_h {
727 for mb_x in 0..self.mb_w {
728 let (num_mv, nearest_mv, near_mv, pred_mv) = self.find_mv_pred(mb_x, mb_y, VP_REF_INTER);
729 let mb_type = self.mb_types[mb_idx];
730 self.num_mv.push(num_mv as u8);
731 let golden = mb_type.get_ref_id() == VP_REF_GOLDEN;
732 let mv = if !golden { self.inter_mbs[mb_idx].mv[3] } else { self.golden_mbs[mb_idx].mv[3] };
733
734 let mb_type = if mb_type == VPMBType::Intra {
735 VPMBType::Intra
736 } else if mb_type == VPMBType::InterFourMV {
737 for i in 0..4 {
738 let mv = self.fourmv_mbs[mb_idx].mv[i];
739 self.coded_mv[mb_idx][i] = ZERO_MV;
740 if mv == ZERO_MV {
741 self.fmv_sub[mb_idx][i] = VPMBType::InterNoMV;
742 } else {
743 self.fmv_sub[mb_idx][i] = match num_mv {
744 0 => {
745 self.coded_mv[mb_idx][i] = mv - pred_mv;
746 VPMBType::InterMV
747 },
748 1 => {
749 if nearest_mv == mv {
750 VPMBType::InterNearest
751 } else {
752 self.coded_mv[mb_idx][i] = mv - pred_mv;
753 VPMBType::InterMV
754 }
755 },
756 _ => {
757 if nearest_mv == mv {
758 VPMBType::InterNearest
759 } else if near_mv == mv {
760 VPMBType::InterNear
761 } else {
762 self.coded_mv[mb_idx][i] = mv - pred_mv;
763 VPMBType::InterMV
764 }
765 },
766 };
767 }
768 }
769 VPMBType::InterFourMV
770 } else if mv == ZERO_MV {
771 if !golden {
772 VPMBType::InterNoMV
773 } else {
774 VPMBType::GoldenNoMV
775 }
776 } else if mb_type.get_ref_id() == VP_REF_INTER {
777 self.coded_mv[mb_idx][3] = mv;
778 match num_mv {
779 0 => VPMBType::InterMV,
780 1 => {
781 if nearest_mv == mv {
782 VPMBType::InterNearest
783 } else {
784 self.coded_mv[mb_idx][3] = mv - pred_mv;
785 VPMBType::InterMV
786 }
787 },
788 _ => {
789 if nearest_mv == mv {
790 VPMBType::InterNearest
791 } else if near_mv == mv {
792 VPMBType::InterNear
793 } else {
794 self.coded_mv[mb_idx][3] = mv - pred_mv;
795 VPMBType::InterMV
796 }
797 },
798 }
799 } else {
800 let (num_mv, nearest_mv, near_mv, pred_mv) = self.find_mv_pred(mb_x, mb_y, VP_REF_GOLDEN);
801 self.coded_mv[mb_idx][3] = ZERO_MV;
802 match num_mv {
803 0 => {
804 self.coded_mv[mb_idx][3] = mv - pred_mv;
805 VPMBType::GoldenMV
806 },
807 1 => {
808 if nearest_mv == mv {
809 VPMBType::GoldenNearest
810 } else {
811 self.coded_mv[mb_idx][3] = mv - pred_mv;
812 VPMBType::GoldenMV
813 }
814 },
815 _ => {
816 if nearest_mv == mv {
817 VPMBType::GoldenNearest
818 } else if near_mv == mv {
819 VPMBType::GoldenNear
820 } else {
821 self.coded_mv[mb_idx][3] = mv - pred_mv;
822 VPMBType::GoldenMV
823 }
824 },
825 }
826 };
827 self.mb_types[mb_idx] = mb_type;
828 mb_idx += 1;
829 }
830 }
831 }
832 pub fn apply_dc_prediction(&mut self, dc_pred: &mut VP56DCPred) {
833 dc_pred.reset();
834
835 let mut mb_idx = 0;
836 for _mb_y in 0..self.mb_h {
837 for _mb_x in 0..self.mb_w {
838 let mb_type = self.mb_types[mb_idx];
839 let mb = self.get_mb_mut(mb_idx);
840 for (i, blk) in mb.coeffs.iter_mut().enumerate() {
841 dc_pred.predict_dc(mb_type, i, blk, true);
842 }
843 dc_pred.next_mb();
844 mb_idx += 1;
845 }
846 dc_pred.update_row();
847 }
848 }
849 }