RealVideo 4 encoder
[nihav.git] / nihav-realmedia / src / codecs / rv40enc / mb_coding.rs
CommitLineData
4965a5e5
KS
1use nihav_codec_support::codecs::ZERO_MV;
2
3use super::super::rv40data::*;
4
5use super::*;
6use super::dsp::*;
7use super::motion_est::MotionEstimator;
8
9const PRED_TYPES8: [PredType8x8; 4] = [
10 PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane
11];
12
13fn calc_dist(src1: &[u8], stride1: usize, src2: &[u8], stride2: usize, width: usize, height: usize) -> u32 {
14 let mut sum = 0u32;
15 for (line1, line2) in src1.chunks(stride1).zip(src2.chunks(stride2)).take(height) {
16 sum += line1[..width].iter().zip(line2.iter()).fold(0u32,
17 |acc, (&a, &b)| { let diff = u32::from(a.max(b)) - u32::from(a.min(b)); acc + diff * diff });
18 }
19 sum
20}
21
22struct SingleMacroblock {
23 cand_blk: RefMBData,
24 pred_blk: RefMBData,
25 ref_blk: RefMBData,
26
27 wblk1: RefMBData,
28 wblk2: RefMBData,
29
30 tmpc: [Block; 25],
31
32 ratio1: u32,
33 ratio2: u32,
34
35 tmp_tx: [Block; 25],
36}
37
38impl SingleMacroblock {
39 fn new() -> Self {
40 Self {
41 cand_blk: RefMBData::new(),
42 pred_blk: RefMBData::new(),
43 ref_blk: RefMBData::new(),
44 wblk1: RefMBData::new(),
45 wblk2: RefMBData::new(),
46 tmpc: [Block::new(); 25],
47 ratio1: 0,
48 ratio2: 0,
49 tmp_tx: [Block::new(); 25],
50 }
51 }
52 fn load(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3]) {
53 for (dst, src) in self.ref_blk.y.chunks_mut(16).zip(src[offsets[0]..].chunks(strides[0])) {
54 dst.copy_from_slice(&src[..16]);
55 }
56 for (dst, src) in self.ref_blk.u.chunks_mut(8).zip(src[offsets[1]..].chunks(strides[1])) {
57 dst.copy_from_slice(&src[..8]);
58 }
59 for (dst, src) in self.ref_blk.v.chunks_mut(8).zip(src[offsets[2]..].chunks(strides[2])) {
60 dst.copy_from_slice(&src[..8]);
61 }
62 }
63 fn recon_pred_part(&mut self, mbt: MacroblockType, ref_p: &NAVideoBuffer<u8>, ref_n: &NAVideoBuffer<u8>, mb_x: usize, mb_y: usize) {
64 let (xpos, ypos) = (mb_x * 16, mb_y * 16);
65
66 match mbt {
67 MacroblockType::Intra16x16(_) => unreachable!(),
68 MacroblockType::Intra4x4(_) => unreachable!(),
69 MacroblockType::Inter16x16(mv) |
70 MacroblockType::InterMix(mv) |
71 MacroblockType::Backward(mv) => {
72 luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, mv, true);
73 chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, mv, true);
74 chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, mv, true);
75 },
76 MacroblockType::PSkip => {
77 luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, ZERO_MV, true);
78 chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, ZERO_MV, true);
79 chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, ZERO_MV, true);
80 },
81 MacroblockType::Inter16x8(mvs) => {
82 let mvs = [mvs[0], mvs[0], mvs[1], mvs[1]];
83 for (i, &mv) in mvs.iter().enumerate() {
84 let xadd = i & 1;
85 let yadd = i >> 1;
86 luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false);
87 chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false);
88 chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false);
89 }
90 },
91 MacroblockType::Inter8x16(mvs) => {
92 let mvs = [mvs[0], mvs[1], mvs[0], mvs[1]];
93 for (i, &mv) in mvs.iter().enumerate() {
94 let xadd = i & 1;
95 let yadd = i >> 1;
96 luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false);
97 chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false);
98 chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false);
99 }
100 },
101 MacroblockType::Inter8x8(mvs) => {
102 for (i, &mv) in mvs.iter().enumerate() {
103 let xadd = i & 1;
104 let yadd = i >> 1;
105 luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false);
106 chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false);
107 chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false);
108 }
109 },
110 MacroblockType::Forward(mv) => {
111 luma_mc(&mut self.pred_blk.y, 16, ref_p, xpos, ypos, mv, true);
112 chroma_mc(&mut self.pred_blk.u, 8, ref_p, xpos / 2, ypos / 2, 1, mv, true);
113 chroma_mc(&mut self.pred_blk.v, 8, ref_p, xpos / 2, ypos / 2, 2, mv, true);
114 },
115 MacroblockType::Bidir(fmv, bmv) => {
116 luma_mc(&mut self.wblk1.y, 16, ref_p, xpos, ypos, fmv, true);
117 chroma_mc(&mut self.wblk1.u, 8, ref_p, xpos / 2, ypos / 2, 1, fmv, true);
118 chroma_mc(&mut self.wblk1.v, 8, ref_p, xpos / 2, ypos / 2, 2, fmv, true);
119 luma_mc(&mut self.wblk2.y, 16, ref_n, xpos, ypos, bmv, true);
120 chroma_mc(&mut self.wblk2.u, 8, ref_n, xpos / 2, ypos / 2, 1, bmv, true);
121 chroma_mc(&mut self.wblk2.v, 8, ref_n, xpos / 2, ypos / 2, 2, bmv, true);
122 self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2);
123 },
124 MacroblockType::BSkip(fmvs, bmvs) => {
125 for (i, (&fmv, &bmv)) in fmvs.iter().zip(bmvs.iter()).enumerate() {
126 let xadd = i & 1;
127 let yadd = i >> 1;
128 luma_mc(&mut self.wblk1.y[xadd * 8 + yadd * 8 * 16..], 16, ref_p, xpos + xadd * 8, ypos + yadd * 8, fmv, false);
129 chroma_mc(&mut self.wblk1.u[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, fmv, false);
130 chroma_mc(&mut self.wblk1.v[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, fmv, false);
131 luma_mc(&mut self.wblk2.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, bmv, false);
132 chroma_mc(&mut self.wblk2.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, bmv, false);
133 chroma_mc(&mut self.wblk2.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, bmv, false);
134 }
135 self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2);
136 },
137 };
138 }
139 fn get_diff_metric(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, best_m: u32, q_dc: usize, q_ac: usize, is16: bool, mut bits: u32) -> (u32, u32) {
140 self.pred_blk.calc_coeffs(&self.ref_blk, &mut self.tmpc, q_dc, q_ac, is16);
141 self.tmp_tx.copy_from_slice(&self.tmpc);
142 if is16 {
143 bits += be.block_bits(&self.tmpc[24], 24);
144 }
145 for blk in self.tmpc[..16].iter() {
146 bits += be.block_bits(blk, 0);
147 }
148 for blk in self.tmpc[16..24].iter() {
149 bits += be.block_bits(blk, 16);
150 }
151 let cdist = rdm.get_metric(bits, 0);
152 if cdist > best_m {
153 return (cdist, 0);
154 }
155
156 for blk in self.tmpc[..16].iter_mut() {
157 blk.dequant(q_ac, q_ac);
158 }
159 let (cq_dc, cq_ac) = chroma_quants(q_ac);
160 for blk in self.tmpc[16..24].iter_mut() {
161 blk.dequant(cq_dc, cq_ac);
162 }
163 if is16 {
164 let (blocks, dc_blk) = self.tmpc.split_at_mut(24);
165 dc_blk[0].dequant_dcs(q_dc, q_ac);
166 dc_blk[0].itransform_dcs();
167 for (blk, &dc) in blocks.iter_mut().zip(dc_blk[0].coeffs.iter()) {
168 blk.coeffs[0] = dc;
169 }
170 }
171
172 self.cand_blk.copy_from(&self.pred_blk);
173 let mut dist = 0;
174 for (i, blk) in self.tmpc[..16].iter_mut().enumerate() {
175 let off = (i & 3) * 4 + (i >> 2) * 4 * 16;
176 if !blk.is_empty() {
177 blk.itransform_4x4();
178 blk.add_to(&mut self.cand_blk.y[off..], 16);
179 }
180 dist += calc_dist(&self.cand_blk.y[off..], 16, &self.ref_blk.y[off..], 16, 4, 4);
181 let cdist = rdm.get_metric(bits, dist);
182 if cdist > best_m {
183 return (cdist, 0);
184 }
185 }
186 let (_, cpart) = self.tmpc.split_at_mut(16);
187 let (upart, vpart) = cpart.split_at_mut(4);
188 for (i, (ublk, vblk)) in upart.iter_mut().zip(vpart.iter_mut()).enumerate() {
189 let off = (i & 1) * 4 + (i >> 1) * 4 * 8;
190 ublk.itransform_4x4();
191 vblk.itransform_4x4();
192 ublk.add_to(&mut self.cand_blk.u[off..], 8);
193 vblk.add_to(&mut self.cand_blk.v[off..], 8);
194 dist += calc_dist(&self.cand_blk.u[off..], 8, &self.ref_blk.u[off..], 8, 4, 4);
195 dist += calc_dist(&self.cand_blk.v[off..], 8, &self.ref_blk.v[off..], 8, 4, 4);
196
197 let cdist = rdm.get_metric(bits, dist);
198 if cdist > best_m {
199 return (cdist, 0);
200 }
201 }
202
203 (rdm.get_metric(bits, dist), bits)
204 }
205 fn get_skip_metric(&self, rdm: &RateDistMetric, best_m: u32) -> (u32, u32) {
206 let bits = 1;
207 let mut dist = calc_dist(&self.pred_blk.y, 16, &self.ref_blk.y, 16, 16, 16);
208 let cdist = rdm.get_metric(bits, dist);
209 if cdist > best_m {
210 return (cdist, 0);
211 }
212 dist += calc_dist(&self.pred_blk.u, 8, &self.ref_blk.u, 8, 8, 8);
213 let cdist = rdm.get_metric(bits, dist);
214 if cdist > best_m {
215 return (cdist, 0);
216 }
217 dist += calc_dist(&self.pred_blk.v, 8, &self.ref_blk.v, 8, 8, 8);
218
219 (rdm.get_metric(bits, dist), bits)
220 }
221 fn put_mb(dst: &mut NASimpleVideoFrame<u8>, cblk: &RefMBData, mb_x: usize, mb_y: usize) {
222 for (dline, sline) in dst.data[dst.offset[0] + mb_x * 16 + mb_y * 16 * dst.stride[0]..].chunks_mut(dst.stride[0]).zip(cblk.y.chunks(16)) {
223 dline[..16].copy_from_slice(sline);
224 }
225 for (dline, sline) in dst.data[dst.offset[1] + mb_x * 8 + mb_y * 8 * dst.stride[1]..].chunks_mut(dst.stride[1]).zip(cblk.u.chunks(8)) {
226 dline[..8].copy_from_slice(sline);
227 }
228 for (dline, sline) in dst.data[dst.offset[2] + mb_x * 8 + mb_y * 8 * dst.stride[2]..].chunks_mut(dst.stride[2]).zip(cblk.v.chunks(8)) {
229 dline[..8].copy_from_slice(sline);
230 }
231 }
232}
233
234pub struct MacroblockDecider {
235 pub q: usize,
236 has_top: bool,
237 has_left: bool,
238 has_tl: bool,
239 has_tr: bool,
240 mb_x: usize,
241 mb_y: usize,
242 best_mbt: MacroblockType,
243 best_dist: u32,
244 best_bits: u32,
245 ipred_y: IntraPred16x16,
246 ipred_u: IntraPred16x16,
247 ipred_v: IntraPred16x16,
248 top_y: Vec<u8>,
249 top_u: Vec<u8>,
250 top_v: Vec<u8>,
251 tr_d: u32,
252 tr_b: u32,
253 mb: SingleMacroblock,
254 best_coef: [Block; 25],
255 best_blk: RefMBData,
256}
257
258impl MacroblockDecider {
259 pub fn new() -> Self {
260 Self {
261 q: 0,
262 has_top: false,
263 has_left: false,
264 has_tl: false,
265 has_tr: false,
266 mb_x: 0,
267 mb_y: 0,
268 ipred_y: IntraPred16x16::new(),
269 ipred_u: IntraPred16x16::new(),
270 ipred_v: IntraPred16x16::new(),
271 top_y: Vec::new(),
272 top_u: Vec::new(),
273 top_v: Vec::new(),
274 tr_b: 0,
275 tr_d: 0,
276 best_mbt: MacroblockType::default(),
277 best_dist: 0,
278 best_bits: 0,
279 mb: SingleMacroblock::new(),
280 best_coef: [Block::new(); 25],
281 best_blk: RefMBData::new(),
282 }
283 }
284 pub fn resize(&mut self, mb_w: usize) {
285 self.top_y.resize((mb_w + 1) * 16 + 1, 0);
286 self.top_u.resize((mb_w + 1) * 8 + 1, 0);
287 self.top_v.resize((mb_w + 1) * 8 + 1, 0);
288 }
289 pub fn set_b_distance(&mut self, tr_b: u32, tr_d: u32) {
290 let (ratio1, ratio2) = if tr_d != 0 {
291 (((tr_d - tr_b) << 14) / tr_d, (tr_b << 14) / tr_d)
292 } else { (1 << 13, 1 << 13) };
293 self.tr_b = tr_b;
294 self.tr_d = tr_d;
295 self.mb.ratio1 = ratio1;
296 self.mb.ratio2 = ratio2;
297 }
298 pub fn load_mb(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3], sstate: &SliceState) {
299 self.has_top = sstate.has_t;
300 self.has_left = sstate.has_l;
301 self.has_tl = sstate.has_tl;
302 self.has_tr = sstate.has_tr;
303 self.mb_x = sstate.mb_x;
304 self.mb_y = sstate.mb_y;
305
306 self.ipred_y.top[1..].copy_from_slice(&self.top_y[self.mb_x * 16 + 1..][..16]);
307 self.ipred_u.top[1..9].copy_from_slice(&self.top_u[self.mb_x * 8 + 1..][..8]);
308 self.ipred_v.top[1..9].copy_from_slice(&self.top_v[self.mb_x * 8 + 1..][..8]);
309
310 self.mb.load(src, offsets, strides);
311
312 self.best_mbt = MacroblockType::default();
313 self.best_dist = std::u32::MAX;
314 self.best_bits = 0;
315 }
316 pub fn try_b_coding(&mut self, ref_p: &NAVideoBuffer<u8>, ref_n: &NAVideoBuffer<u8>, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState, refine: bool) {
317 let q_dc = usize::from(RV40_QUANT_DC[1][self.q]);
318
319 let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y);
320 let mut smb_f = [ZERO_MV; 4];
321 let mut smb_b = [ZERO_MV; 4];
322 for (i, (fwd, bwd)) in smb_f.iter_mut().zip(smb_b.iter_mut()).enumerate() {
323 let ref_mv = mbstate.ref_mv[blk8_idx + (i & 1) + (i >> 1) * mbstate.blk8_stride];
324 let (fm, bm) = ref_mv.scale(self.tr_d, self.tr_b);
325 *fwd = fm;
326 *bwd = bm;
327 }
328 self.mb.recon_pred_part(MacroblockType::BSkip(smb_f, smb_b), ref_p, ref_n, self.mb_x, self.mb_y);
329 be.set_mb_type(MBType::Skip);
330 let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist);
331 if cur_dist < self.best_dist {
332 self.best_dist = cur_dist;
333 self.best_bits = cur_bits;
334 self.best_mbt = MacroblockType::BSkip(smb_f, smb_b);
335 self.best_blk.copy_from(&self.mb.pred_blk);
336 if self.best_dist < rdm.good_enough {
337 return;
338 }
339 }
340
341 let fwd_cand = [
342 -mbstate.ref_mv[blk8_idx],
343 mbstate.fwd_mv[blk8_idx - 1],
344 mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
345 mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride],
346 mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride]
347 ];
348 let (fmv, _fdist) = me.search_mb_p(ref_p, &self.mb.ref_blk, self.mb_x, self.mb_y, &fwd_cand);
349 be.set_mb_type(MBType::Forward);
350 let bcost = be.estimate_mb_hdr(&[fmv]);
351 self.mb.recon_pred_part(MacroblockType::Forward(fmv), ref_p, ref_n, self.mb_x, self.mb_y);
352 let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost);
353 if cur_dist < self.best_dist {
354 self.best_dist = cur_dist;
355 self.best_bits = cur_bits;
356 self.best_mbt = MacroblockType::Forward(fmv);
357 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
358 self.best_blk.copy_from(&self.mb.cand_blk);
359 if self.best_dist < rdm.good_enough {
360 return;
361 }
362 }
363
364 let bwd_cand = [
365 mbstate.ref_mv[blk8_idx],
366 mbstate.bwd_mv[blk8_idx - 1],
367 mbstate.bwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
368 mbstate.bwd_mv[blk8_idx - mbstate.blk8_stride],
369 mbstate.bwd_mv[blk8_idx + 2 - mbstate.blk8_stride]
370 ];
371 let (bmv, _bdist) = me.search_mb_p(ref_n, &self.mb.ref_blk, self.mb_x, self.mb_y, &bwd_cand);
372 be.set_mb_type(MBType::Backward);
373 let bcost = be.estimate_mb_hdr(&[bmv]);
374 self.mb.recon_pred_part(MacroblockType::Backward(bmv), ref_p, ref_n, self.mb_x, self.mb_y);
375 let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost);
376 if cur_dist < self.best_dist {
377 self.best_dist = cur_dist;
378 self.best_bits = cur_bits;
379 self.best_mbt = MacroblockType::Backward(bmv);
380 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
381 self.best_blk.copy_from(&self.mb.cand_blk);
382 if self.best_dist < rdm.good_enough {
383 return;
384 }
385 }
386
387 be.set_mb_type(MBType::Bidir);
388 let (i_fmv, i_bmv) = if !refine {
389 (fmv, bmv)
390 } else {
391 let mut b_searcher = SearchB::new(ref_p, ref_n, self.mb_x, self.mb_y, [self.mb.ratio1, self.mb.ratio2]);
392 b_searcher.search_mb(&self.mb.ref_blk, [fmv, bmv])
393 };
394
395 let bcost = be.estimate_mb_hdr(&[i_fmv, i_bmv]);
396 self.mb.recon_pred_part(MacroblockType::Bidir(i_fmv, i_bmv), ref_p, ref_n, self.mb_x, self.mb_y);
397 let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost);
398 if cur_dist < self.best_dist {
399 self.best_dist = cur_dist;
400 self.best_bits = cur_bits;
401 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
402 self.best_blk.copy_from(&self.mb.cand_blk);
403 self.best_mbt = MacroblockType::Bidir(i_fmv, i_bmv);
404 }
405 }
406 pub fn try_p_coding(&mut self, ref_pic: &NAVideoBuffer<u8>, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState) {
407 let q_dc = usize::from(RV40_QUANT_DC[1][self.q]);
408
409 self.mb.recon_pred_part(MacroblockType::Inter16x16(ZERO_MV), ref_pic, ref_pic, self.mb_x, self.mb_y);
410 be.set_mb_type(MBType::Skip);
411 let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist);
412 if cur_dist < self.best_dist {
413 self.best_dist = cur_dist;
414 self.best_bits = cur_bits;
415 self.best_mbt = MacroblockType::PSkip;
416 self.best_blk.copy_from(&self.mb.pred_blk);
417 if self.best_dist < rdm.good_enough {
418 return;
419 }
420 }
421
422 let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y);
423 let mv_cand = [
424 mbstate.fwd_mv[blk8_idx - 1],
425 mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
426 mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride],
427 mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride]
428 ];
429 let (mv, pdist) = me.search_mb_p(ref_pic, &self.mb.ref_blk, self.mb_x, self.mb_y, &mv_cand);
430
431 self.mb.recon_pred_part(MacroblockType::Inter16x16(mv), ref_pic, ref_pic, self.mb_x, self.mb_y);
432
433 be.set_mb_type(MBType::P16x16);
434 let pcost = be.estimate_mb_hdr(&[mv]);
435 let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost);
436 if cur_dist < self.best_dist {
437 self.best_mbt = MacroblockType::Inter16x16(mv);
438 self.best_dist = cur_dist;
439 self.best_bits = cur_bits;
440 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
441 self.best_blk.copy_from(&self.mb.cand_blk);
442 }
443 be.set_mb_type(MBType::P16x16Mix);
444 let p16cost = be.estimate_mb_hdr(&[mv]);
445 let (cur_dist16, cur_bits16) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, p16cost);
446 if cur_dist16 < self.best_dist {
447 self.best_mbt = MacroblockType::InterMix(mv);
448 self.best_dist = cur_dist16;
449 self.best_bits = cur_bits16;
450 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
451 self.best_blk.copy_from(&self.mb.cand_blk);
452 }
453
454 if pdist > rdm.p_split_thr {
455 let xpos = self.mb_x * 16;
456 let ypos = self.mb_y * 16;
457
458 let mv_cand = [
459 mv,
460 mbstate.fwd_mv[blk8_idx - 1],
461 mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride],
462 mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride],
463 mbstate.fwd_mv[blk8_idx - 1 + mbstate.blk8_stride],
464 mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride],
465 mbstate.fwd_mv[blk8_idx + 1 - mbstate.blk8_stride]
466 ];
467
468 let (mv0, pdist0) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos, &mv_cand);
469 let (mv1, pdist1) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos, &mv_cand);
470 let (mv2, pdist2) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos + 8, &mv_cand);
471 let (mv3, pdist3) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos + 8, &mv_cand);
472 if pdist0 + pdist1 + pdist2 + pdist3 < pdist - pdist / 4 {
473 let mvs = [mv0, mv1, mv2, mv3];
474 let (cand_mbt, cand_mbtype) = if mv0 == mv1 && mv2 == mv3 {
475 (MBType::P16x8, MacroblockType::Inter16x8([mv0, mv2]))
476 } else if mv0 == mv2 && mv1 == mv3 {
477 (MBType::P8x16, MacroblockType::Inter8x16([mv0, mv1]))
478 } else {
479 (MBType::P8x8, MacroblockType::Inter8x8(mvs))
480 };
481 be.set_mb_type(cand_mbt);
482 let pcost = be.estimate_mb_hdr(&mvs);
483
484 self.mb.recon_pred_part(MacroblockType::Inter8x8(mvs), ref_pic, ref_pic, self.mb_x, self.mb_y);
485 let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost);
486 if cur_dist < self.best_dist {
487 self.best_dist = cur_dist;
488 self.best_mbt = cand_mbtype;
489 self.best_bits = cur_bits;
490 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
491 self.best_blk.copy_from(&self.mb.cand_blk);
492 }
493 }
494 }
495 }
496 fn recon_intra_16_pred(&mut self, ptype: PredType8x8) {
497 self.ipred_y.apply16(ptype, &mut self.mb.pred_blk.y, 16);
498 self.ipred_u.apply8(ptype, &mut self.mb.pred_blk.u, 8);
499 self.ipred_v.apply8(ptype, &mut self.mb.pred_blk.v, 8);
500 }
501 pub fn try_intra_16_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric) {
502 if self.best_dist < rdm.good_enough {
503 return;
504 }
505 let pred_types_try: &[PredType8x8] = match (self.has_top, self.has_left) {
506 (false, false) => &[PredType8x8::DC128],
507 (true, false) => &[PredType8x8::TopDC],
508 (false, true) => &[PredType8x8::LeftDC],
509 _ => &PRED_TYPES8,
510 };
511
512 be.set_mb_type(MBType::Intra16);
513 let hdr_cost = be.estimate_mb_hdr(&[]);
514 for &ptype in pred_types_try.iter() {
515 if !self.has_tl && matches!(ptype, PredType8x8::Plane) {
516 continue;
517 }
518 self.recon_intra_16_pred(ptype);
519 let q_dc = usize::from(RV40_QUANT_DC[0][self.q]);
520 let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, hdr_cost);
521 if cur_dist < self.best_dist {
522 self.best_mbt = MacroblockType::Intra16x16(ptype);
523 self.best_dist = cur_dist;
524 self.best_bits = cur_bits;
525 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
526 self.best_blk.copy_from(&self.mb.cand_blk);
527 if cur_dist < rdm.good_enough {
528 break;
529 }
530 }
531 }
532 }
533 pub fn try_intra_4x4_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, mbstate: &mut MBState) {
534 const PRED4_DEF: &[PredType4x4] = &[ PredType4x4::DC128 ];
535 const PRED4_NO_TOP: &[PredType4x4] = &[ PredType4x4::Hor, PredType4x4::LeftDC ];
536 const PRED4_NO_LEFT: &[PredType4x4] = &[ PredType4x4::Ver, PredType4x4::TopDC ];
537 const PRED4_FULL: &[PredType4x4] = &[
538 PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC,
539 PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight,
540 PredType4x4::VerRight, PredType4x4::HorDown,
541 PredType4x4::VerLeft, PredType4x4::HorUp
542 ];
543 const PRED4_FULL_NO_LD: &[PredType4x4] = &[
544 PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC,
545 PredType4x4::DiagDownLeftNoDown, PredType4x4::DiagDownRight,
546 PredType4x4::VerRight, PredType4x4::HorDown,
547 PredType4x4::VerLeftNoDown, PredType4x4::HorUpNoDown
548 ];
549
550 if self.best_dist < rdm.good_enough {
551 return;
552 }
553 be.set_mb_type(MBType::Intra);
554
555 let (tr_y, tr_u, tr_v) = if self.has_tr {
556 let mut tr_y = [0; 4];
557 let mut tr_u = [0; 4];
558 let mut tr_v = [0; 4];
559 tr_y.copy_from_slice(&self.top_y[self.mb_x * 16 + 16 + 1..][..4]);
560 tr_u.copy_from_slice(&self.top_u[self.mb_x * 8 + 8 + 1..][..4]);
561 tr_v.copy_from_slice(&self.top_v[self.mb_x * 8 + 8 + 1..][..4]);
562 (tr_y, tr_u, tr_v)
563 } else {
564 ([self.ipred_y.top[16]; 4], [self.ipred_u.top[8]; 4], [self.ipred_v.top[8]; 4])
565 };
566 let mut ipred4 = BlockIntra4Pred::new(&self.ipred_y, &self.ipred_u, &self.ipred_v, tr_y, tr_u, tr_v, self.has_left);
567
568 let q_ac = self.q;
569 let (cq_dc, cq_ac) = chroma_quants(self.q);
570 let mut tot_dist = 0;
571 let mut tot_bits = be.estimate_mb_hdr(&[]);
572 let mut modes = [PredType4x4::DC; 16];
573 let mut tblk = Block::new();
574 let mut has_t = self.has_top;
575
576 for y in 0..4 {
577 let mut has_l = self.has_left;
578 let mut has_ld = has_l && y != 3;
579 for x in 0..4 {
580 let list = match (has_l, has_t) {
581 (true, true) if has_ld => PRED4_FULL,
582 (true, true) => PRED4_FULL_NO_LD,
583 (false, true) => PRED4_NO_LEFT,
584 (true, false) => PRED4_NO_TOP,
585 _ => PRED4_DEF,
586 };
587
588 let do_chroma = ((x & 1) == 0) && ((y & 1) == 0);
589
590 let mut best_mode = PRED4_DEF[0];
591 let mut best_cdist = std::u32::MAX;
592 let mut best_dist = 0;
593 let mut best_bits = 0;
594 for &try_mode in list.iter() {
595 ipred4.pred_block(&mut self.mb.cand_blk, x, y, try_mode);
596 let off = x * 4 + y * 4 * 16;
597 let (mut cur_dist, mut cur_bits) = Self::blk4_diff(&self.mb.cand_blk.y[off..], &self.mb.ref_blk.y[off..], 16, q_ac, q_ac, be);
598 if do_chroma {
599 let off = x * 2 + y * 2 * 8;
600 let (du, bu) = Self::blk4_diff(&self.mb.cand_blk.u[off..], &self.mb.ref_blk.u[off..], 8, cq_dc, cq_ac, be);
601 let (dv, bv) = Self::blk4_diff(&self.mb.cand_blk.v[off..], &self.mb.ref_blk.v[off..], 8, cq_dc, cq_ac, be);
602 cur_dist += du + dv;
603 cur_bits += bu + bv;
604 }
605
606 let cand_dist = rdm.get_metric(cur_bits, cur_dist);
607 if cand_dist < best_cdist {
608 best_cdist = cand_dist;
609 best_mode = try_mode;
610 best_dist = cur_dist;
611 best_bits = cur_bits;
612 }
613 }
614
615 ipred4.pred_block(&mut self.mb.cand_blk, x, y, best_mode);
616
617 let off = x * 4 + y * 4 * 16;
618 tblk.from_diff(&self.mb.ref_blk.y[off..], &self.mb.cand_blk.y[off..], 16);
619 tblk.transform_4x4();
620 tblk.quant(q_ac, q_ac);
621 self.mb.tmp_tx[x + y * 4] = tblk;
622 if !tblk.is_empty() {
623 tblk.dequant(q_ac, q_ac);
624 tblk.itransform_4x4();
625 tblk.add_to(&mut self.mb.cand_blk.y[off..], 16);
626 }
627 if do_chroma {
628 let off = x * 2 + y * 2 * 8;
629 let mut dests = [&mut self.mb.cand_blk.u[off..], &mut self.mb.cand_blk.v[off..]];
630 let sources = [&self.mb.ref_blk.u[off..], &self.mb.ref_blk.v[off..]];
631 for (comp, (dblk, &sblk)) in dests.iter_mut().zip(sources.iter()).enumerate() {
632 tblk.from_diff(sblk, dblk, 8);
633 tblk.transform_4x4();
634 tblk.quant(cq_dc, cq_ac);
635 self.mb.tmp_tx[16 + comp * 4 + x / 2 + y] = tblk;
636 if !tblk.is_empty() {
637 tblk.dequant(cq_dc, cq_ac);
638 tblk.itransform_4x4();
639 tblk.add_to(dblk, 8);
640 }
641 }
642 }
643
644 ipred4.update_from(&self.mb.cand_blk, x, y);
645
646 tot_dist += best_dist;
647 tot_bits += best_bits;
648
649 let cand_dist = rdm.get_metric(tot_bits, tot_dist);
650 if cand_dist > self.best_dist {
651 return;
652 }
653
654 modes[x + y * 4] = best_mode;
655
656 has_l = true;
657 has_ld = false;
658 }
659 has_t = true;
660 }
661
662 mbstate.set_ipred4x4(self.mb_x, self.mb_y, &modes);
663
664 if !self.has_top {
665 let mut code = 0usize;
666 for &el in modes[..4].iter() {
667 code = code * 2 + if el.to_index() == 0 { 0 } else { 1 };
668 }
669 tot_bits += u32::from(RV40_AIC_TOP_BITS[code]);
670 }
671
672 let ystart = if self.has_top { 0 } else { 1 };
673 for y in ystart..4 {
674 let mut x = 0;
675 while x < 4 {
676 let (lctx, tctx, trctx) = mbstate.get_ipred4x4_ctx(self.mb_x, self.mb_y, x, y);
677 let ctx_word = if x < 3 {
678 ((trctx & 0xF) as u16) + (((tctx & 0xF) as u16) << 4) + (((lctx & 0xF) as u16) << 8)
679 } else { 0xFFF };
680 if let Some(idx) = RV40_AIC_PATTERNS.iter().position(|&x| x == ctx_word) {
681 let code = modes[x + y * 4].to_index() * 9 + modes[x + y * 4 + 1].to_index();
682 tot_bits += u32::from(RV40_AIC_MODE2_BITS[idx][code as usize]);
683 x += 2;
684 } else if tctx != -1 && lctx != -1 {
685 let idx = (tctx + lctx * 10) as usize;
686 let code = modes[x + y * 4].to_index() as usize;
687 tot_bits += u32::from(RV40_AIC_MODE1_BITS[idx][code]);
688 x += 1;
689 } else {
690 match lctx {
691 -1 if tctx < 2 => tot_bits += 1,
692 0 | 2 => tot_bits += 1,
693 _ => {},
694 };
695 x += 1;
696 }
697 }
698 }
699
700 let cand_dist = rdm.get_metric(tot_bits, tot_dist);
701 if cand_dist < self.best_dist {
702 self.best_dist = cand_dist;
703 self.best_mbt = MacroblockType::Intra4x4(modes);
704 self.best_bits = tot_bits;
705 self.best_coef.copy_from_slice(&self.mb.tmp_tx);
706 self.best_blk.copy_from(&self.mb.cand_blk);
707 }
708 }
709 pub fn get_est_bits(&self) -> u32 { self.best_bits }
710 pub fn get_macroblock(&mut self) -> Macroblock {
711 let mut coeffs = [Block::new(); 25];
712 if !self.best_mbt.is_skip() {
713 coeffs.copy_from_slice(&self.best_coef);
714 }
715 Macroblock {
716 mb_type: self.best_mbt.clone(),
717 coeffs,
718 }
719 }
720 pub fn recon_mb(&mut self, dst: &mut NASimpleVideoFrame<u8>) {
721 let src_mb = &self.best_blk;
722 SingleMacroblock::put_mb(dst, src_mb, self.mb_x, self.mb_y);
723
724 self.top_y[self.mb_x * 16 + 1..][..16].copy_from_slice(&src_mb.y[15 * 16..]);
725 self.top_u[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.u[7 * 8..]);
726 self.top_v[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.v[7 * 8..]);
727
728 self.ipred_y.top[0] = self.ipred_y.top[16];
729 self.ipred_y.left[0] = self.ipred_y.top[0];
730 self.ipred_u.top[0] = self.ipred_u.top[8];
731 self.ipred_u.left[0] = self.ipred_u.top[0];
732 self.ipred_v.top[0] = self.ipred_v.top[8];
733 self.ipred_v.left[0] = self.ipred_v.top[0];
734
735 for (left, src) in self.ipred_y.left[1..].iter_mut().zip(src_mb.y.chunks_exact(16)) {
736 *left = src[15];
737 }
738 for (left, src) in self.ipred_u.left[1..9].iter_mut().zip(src_mb.u.chunks_exact(8)) {
739 *left = src[7];
740 }
741 for (left, src) in self.ipred_v.left[1..9].iter_mut().zip(src_mb.v.chunks_exact(8)) {
742 *left = src[7];
743 }
744 }
745 fn blk4_diff(pred: &[u8], refsrc: &[u8], stride: usize, q_dc: usize, q_ac: usize, be: &mut BitsEstimator) -> (u32, u32) {
746 let mut blk = Block::new();
747 blk.from_diff(refsrc, pred, stride);
748 blk.transform_4x4();
749 blk.quant(q_dc, q_ac);
750 let bits = be.block_bits(&blk, 0);
751 if !blk.is_empty() {
752 blk.dequant(q_dc, q_ac);
753 blk.itransform_4x4();
754 }
755 let mut dist = 0u32;
756 for (diffs, (pred, refsrc)) in blk.coeffs.chunks(4).zip(pred.chunks(stride).zip(refsrc.chunks(stride))) {
757 for (&diff, (&p, &r)) in diffs.iter().zip(pred.iter().zip(refsrc.iter())) {
758 let new = (i32::from(p) + i32::from(diff)).max(0).min(255);
759 let expected = i32::from(r);
760 dist += ((new - expected) * (new - expected)) as u32;
761 }
762 }
763 (dist, bits)
764 }
765}