]>
Commit | Line | Data |
---|---|---|
4965a5e5 KS |
1 | use nihav_codec_support::codecs::ZERO_MV; |
2 | ||
3 | use super::super::rv40data::*; | |
4 | ||
5 | use super::*; | |
6 | use super::dsp::*; | |
7 | use super::motion_est::MotionEstimator; | |
8 | ||
9 | const PRED_TYPES8: [PredType8x8; 4] = [ | |
10 | PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane | |
11 | ]; | |
12 | ||
13 | fn calc_dist(src1: &[u8], stride1: usize, src2: &[u8], stride2: usize, width: usize, height: usize) -> u32 { | |
14 | let mut sum = 0u32; | |
15 | for (line1, line2) in src1.chunks(stride1).zip(src2.chunks(stride2)).take(height) { | |
16 | sum += line1[..width].iter().zip(line2.iter()).fold(0u32, | |
17 | |acc, (&a, &b)| { let diff = u32::from(a.max(b)) - u32::from(a.min(b)); acc + diff * diff }); | |
18 | } | |
19 | sum | |
20 | } | |
21 | ||
22 | struct SingleMacroblock { | |
23 | cand_blk: RefMBData, | |
24 | pred_blk: RefMBData, | |
25 | ref_blk: RefMBData, | |
26 | ||
27 | wblk1: RefMBData, | |
28 | wblk2: RefMBData, | |
29 | ||
30 | tmpc: [Block; 25], | |
31 | ||
32 | ratio1: u32, | |
33 | ratio2: u32, | |
34 | ||
35 | tmp_tx: [Block; 25], | |
36 | } | |
37 | ||
38 | impl SingleMacroblock { | |
39 | fn new() -> Self { | |
40 | Self { | |
41 | cand_blk: RefMBData::new(), | |
42 | pred_blk: RefMBData::new(), | |
43 | ref_blk: RefMBData::new(), | |
44 | wblk1: RefMBData::new(), | |
45 | wblk2: RefMBData::new(), | |
46 | tmpc: [Block::new(); 25], | |
47 | ratio1: 0, | |
48 | ratio2: 0, | |
49 | tmp_tx: [Block::new(); 25], | |
50 | } | |
51 | } | |
52 | fn load(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3]) { | |
53 | for (dst, src) in self.ref_blk.y.chunks_mut(16).zip(src[offsets[0]..].chunks(strides[0])) { | |
54 | dst.copy_from_slice(&src[..16]); | |
55 | } | |
56 | for (dst, src) in self.ref_blk.u.chunks_mut(8).zip(src[offsets[1]..].chunks(strides[1])) { | |
57 | dst.copy_from_slice(&src[..8]); | |
58 | } | |
59 | for (dst, src) in self.ref_blk.v.chunks_mut(8).zip(src[offsets[2]..].chunks(strides[2])) { | |
60 | dst.copy_from_slice(&src[..8]); | |
61 | } | |
62 | } | |
63 | fn recon_pred_part(&mut self, mbt: MacroblockType, ref_p: &NAVideoBuffer<u8>, ref_n: &NAVideoBuffer<u8>, mb_x: usize, mb_y: usize) { | |
64 | let (xpos, ypos) = (mb_x * 16, mb_y * 16); | |
65 | ||
66 | match mbt { | |
67 | MacroblockType::Intra16x16(_) => unreachable!(), | |
68 | MacroblockType::Intra4x4(_) => unreachable!(), | |
69 | MacroblockType::Inter16x16(mv) | | |
70 | MacroblockType::InterMix(mv) | | |
71 | MacroblockType::Backward(mv) => { | |
72 | luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, mv, true); | |
73 | chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, mv, true); | |
74 | chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, mv, true); | |
75 | }, | |
76 | MacroblockType::PSkip => { | |
77 | luma_mc(&mut self.pred_blk.y, 16, ref_n, xpos, ypos, ZERO_MV, true); | |
78 | chroma_mc(&mut self.pred_blk.u, 8, ref_n, xpos / 2, ypos / 2, 1, ZERO_MV, true); | |
79 | chroma_mc(&mut self.pred_blk.v, 8, ref_n, xpos / 2, ypos / 2, 2, ZERO_MV, true); | |
80 | }, | |
81 | MacroblockType::Inter16x8(mvs) => { | |
82 | let mvs = [mvs[0], mvs[0], mvs[1], mvs[1]]; | |
83 | for (i, &mv) in mvs.iter().enumerate() { | |
84 | let xadd = i & 1; | |
85 | let yadd = i >> 1; | |
86 | luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false); | |
87 | chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false); | |
88 | chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false); | |
89 | } | |
90 | }, | |
91 | MacroblockType::Inter8x16(mvs) => { | |
92 | let mvs = [mvs[0], mvs[1], mvs[0], mvs[1]]; | |
93 | for (i, &mv) in mvs.iter().enumerate() { | |
94 | let xadd = i & 1; | |
95 | let yadd = i >> 1; | |
96 | luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false); | |
97 | chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false); | |
98 | chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false); | |
99 | } | |
100 | }, | |
101 | MacroblockType::Inter8x8(mvs) => { | |
102 | for (i, &mv) in mvs.iter().enumerate() { | |
103 | let xadd = i & 1; | |
104 | let yadd = i >> 1; | |
105 | luma_mc(&mut self.pred_blk.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, mv, false); | |
106 | chroma_mc(&mut self.pred_blk.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, mv, false); | |
107 | chroma_mc(&mut self.pred_blk.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, mv, false); | |
108 | } | |
109 | }, | |
110 | MacroblockType::Forward(mv) => { | |
111 | luma_mc(&mut self.pred_blk.y, 16, ref_p, xpos, ypos, mv, true); | |
112 | chroma_mc(&mut self.pred_blk.u, 8, ref_p, xpos / 2, ypos / 2, 1, mv, true); | |
113 | chroma_mc(&mut self.pred_blk.v, 8, ref_p, xpos / 2, ypos / 2, 2, mv, true); | |
114 | }, | |
115 | MacroblockType::Bidir(fmv, bmv) => { | |
116 | luma_mc(&mut self.wblk1.y, 16, ref_p, xpos, ypos, fmv, true); | |
117 | chroma_mc(&mut self.wblk1.u, 8, ref_p, xpos / 2, ypos / 2, 1, fmv, true); | |
118 | chroma_mc(&mut self.wblk1.v, 8, ref_p, xpos / 2, ypos / 2, 2, fmv, true); | |
119 | luma_mc(&mut self.wblk2.y, 16, ref_n, xpos, ypos, bmv, true); | |
120 | chroma_mc(&mut self.wblk2.u, 8, ref_n, xpos / 2, ypos / 2, 1, bmv, true); | |
121 | chroma_mc(&mut self.wblk2.v, 8, ref_n, xpos / 2, ypos / 2, 2, bmv, true); | |
122 | self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2); | |
123 | }, | |
124 | MacroblockType::BSkip(fmvs, bmvs) => { | |
125 | for (i, (&fmv, &bmv)) in fmvs.iter().zip(bmvs.iter()).enumerate() { | |
126 | let xadd = i & 1; | |
127 | let yadd = i >> 1; | |
128 | luma_mc(&mut self.wblk1.y[xadd * 8 + yadd * 8 * 16..], 16, ref_p, xpos + xadd * 8, ypos + yadd * 8, fmv, false); | |
129 | chroma_mc(&mut self.wblk1.u[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, fmv, false); | |
130 | chroma_mc(&mut self.wblk1.v[xadd * 4 + yadd * 4 * 8..], 8, ref_p, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, fmv, false); | |
131 | luma_mc(&mut self.wblk2.y[xadd * 8 + yadd * 8 * 16..], 16, ref_n, xpos + xadd * 8, ypos + yadd * 8, bmv, false); | |
132 | chroma_mc(&mut self.wblk2.u[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 1, bmv, false); | |
133 | chroma_mc(&mut self.wblk2.v[xadd * 4 + yadd * 4 * 8..], 8, ref_n, xpos / 2 + xadd * 4, ypos / 2 + xadd * 4, 2, bmv, false); | |
134 | } | |
135 | self.pred_blk.avg(&self.wblk1, self.ratio1, &self.wblk2, self.ratio2); | |
136 | }, | |
137 | }; | |
138 | } | |
139 | fn get_diff_metric(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, best_m: u32, q_dc: usize, q_ac: usize, is16: bool, mut bits: u32) -> (u32, u32) { | |
140 | self.pred_blk.calc_coeffs(&self.ref_blk, &mut self.tmpc, q_dc, q_ac, is16); | |
141 | self.tmp_tx.copy_from_slice(&self.tmpc); | |
142 | if is16 { | |
143 | bits += be.block_bits(&self.tmpc[24], 24); | |
144 | } | |
145 | for blk in self.tmpc[..16].iter() { | |
146 | bits += be.block_bits(blk, 0); | |
147 | } | |
148 | for blk in self.tmpc[16..24].iter() { | |
149 | bits += be.block_bits(blk, 16); | |
150 | } | |
151 | let cdist = rdm.get_metric(bits, 0); | |
152 | if cdist > best_m { | |
153 | return (cdist, 0); | |
154 | } | |
155 | ||
156 | for blk in self.tmpc[..16].iter_mut() { | |
157 | blk.dequant(q_ac, q_ac); | |
158 | } | |
159 | let (cq_dc, cq_ac) = chroma_quants(q_ac); | |
160 | for blk in self.tmpc[16..24].iter_mut() { | |
161 | blk.dequant(cq_dc, cq_ac); | |
162 | } | |
163 | if is16 { | |
164 | let (blocks, dc_blk) = self.tmpc.split_at_mut(24); | |
165 | dc_blk[0].dequant_dcs(q_dc, q_ac); | |
166 | dc_blk[0].itransform_dcs(); | |
167 | for (blk, &dc) in blocks.iter_mut().zip(dc_blk[0].coeffs.iter()) { | |
168 | blk.coeffs[0] = dc; | |
169 | } | |
170 | } | |
171 | ||
172 | self.cand_blk.copy_from(&self.pred_blk); | |
173 | let mut dist = 0; | |
174 | for (i, blk) in self.tmpc[..16].iter_mut().enumerate() { | |
175 | let off = (i & 3) * 4 + (i >> 2) * 4 * 16; | |
176 | if !blk.is_empty() { | |
177 | blk.itransform_4x4(); | |
178 | blk.add_to(&mut self.cand_blk.y[off..], 16); | |
179 | } | |
180 | dist += calc_dist(&self.cand_blk.y[off..], 16, &self.ref_blk.y[off..], 16, 4, 4); | |
181 | let cdist = rdm.get_metric(bits, dist); | |
182 | if cdist > best_m { | |
183 | return (cdist, 0); | |
184 | } | |
185 | } | |
186 | let (_, cpart) = self.tmpc.split_at_mut(16); | |
187 | let (upart, vpart) = cpart.split_at_mut(4); | |
188 | for (i, (ublk, vblk)) in upart.iter_mut().zip(vpart.iter_mut()).enumerate() { | |
189 | let off = (i & 1) * 4 + (i >> 1) * 4 * 8; | |
190 | ublk.itransform_4x4(); | |
191 | vblk.itransform_4x4(); | |
192 | ublk.add_to(&mut self.cand_blk.u[off..], 8); | |
193 | vblk.add_to(&mut self.cand_blk.v[off..], 8); | |
194 | dist += calc_dist(&self.cand_blk.u[off..], 8, &self.ref_blk.u[off..], 8, 4, 4); | |
195 | dist += calc_dist(&self.cand_blk.v[off..], 8, &self.ref_blk.v[off..], 8, 4, 4); | |
196 | ||
197 | let cdist = rdm.get_metric(bits, dist); | |
198 | if cdist > best_m { | |
199 | return (cdist, 0); | |
200 | } | |
201 | } | |
202 | ||
203 | (rdm.get_metric(bits, dist), bits) | |
204 | } | |
205 | fn get_skip_metric(&self, rdm: &RateDistMetric, best_m: u32) -> (u32, u32) { | |
206 | let bits = 1; | |
207 | let mut dist = calc_dist(&self.pred_blk.y, 16, &self.ref_blk.y, 16, 16, 16); | |
208 | let cdist = rdm.get_metric(bits, dist); | |
209 | if cdist > best_m { | |
210 | return (cdist, 0); | |
211 | } | |
212 | dist += calc_dist(&self.pred_blk.u, 8, &self.ref_blk.u, 8, 8, 8); | |
213 | let cdist = rdm.get_metric(bits, dist); | |
214 | if cdist > best_m { | |
215 | return (cdist, 0); | |
216 | } | |
217 | dist += calc_dist(&self.pred_blk.v, 8, &self.ref_blk.v, 8, 8, 8); | |
218 | ||
219 | (rdm.get_metric(bits, dist), bits) | |
220 | } | |
221 | fn put_mb(dst: &mut NASimpleVideoFrame<u8>, cblk: &RefMBData, mb_x: usize, mb_y: usize) { | |
222 | for (dline, sline) in dst.data[dst.offset[0] + mb_x * 16 + mb_y * 16 * dst.stride[0]..].chunks_mut(dst.stride[0]).zip(cblk.y.chunks(16)) { | |
223 | dline[..16].copy_from_slice(sline); | |
224 | } | |
225 | for (dline, sline) in dst.data[dst.offset[1] + mb_x * 8 + mb_y * 8 * dst.stride[1]..].chunks_mut(dst.stride[1]).zip(cblk.u.chunks(8)) { | |
226 | dline[..8].copy_from_slice(sline); | |
227 | } | |
228 | for (dline, sline) in dst.data[dst.offset[2] + mb_x * 8 + mb_y * 8 * dst.stride[2]..].chunks_mut(dst.stride[2]).zip(cblk.v.chunks(8)) { | |
229 | dline[..8].copy_from_slice(sline); | |
230 | } | |
231 | } | |
232 | } | |
233 | ||
234 | pub struct MacroblockDecider { | |
235 | pub q: usize, | |
236 | has_top: bool, | |
237 | has_left: bool, | |
238 | has_tl: bool, | |
239 | has_tr: bool, | |
240 | mb_x: usize, | |
241 | mb_y: usize, | |
242 | best_mbt: MacroblockType, | |
243 | best_dist: u32, | |
244 | best_bits: u32, | |
245 | ipred_y: IntraPred16x16, | |
246 | ipred_u: IntraPred16x16, | |
247 | ipred_v: IntraPred16x16, | |
248 | top_y: Vec<u8>, | |
249 | top_u: Vec<u8>, | |
250 | top_v: Vec<u8>, | |
251 | tr_d: u32, | |
252 | tr_b: u32, | |
253 | mb: SingleMacroblock, | |
254 | best_coef: [Block; 25], | |
255 | best_blk: RefMBData, | |
256 | } | |
257 | ||
258 | impl MacroblockDecider { | |
259 | pub fn new() -> Self { | |
260 | Self { | |
261 | q: 0, | |
262 | has_top: false, | |
263 | has_left: false, | |
264 | has_tl: false, | |
265 | has_tr: false, | |
266 | mb_x: 0, | |
267 | mb_y: 0, | |
268 | ipred_y: IntraPred16x16::new(), | |
269 | ipred_u: IntraPred16x16::new(), | |
270 | ipred_v: IntraPred16x16::new(), | |
271 | top_y: Vec::new(), | |
272 | top_u: Vec::new(), | |
273 | top_v: Vec::new(), | |
274 | tr_b: 0, | |
275 | tr_d: 0, | |
276 | best_mbt: MacroblockType::default(), | |
277 | best_dist: 0, | |
278 | best_bits: 0, | |
279 | mb: SingleMacroblock::new(), | |
280 | best_coef: [Block::new(); 25], | |
281 | best_blk: RefMBData::new(), | |
282 | } | |
283 | } | |
284 | pub fn resize(&mut self, mb_w: usize) { | |
285 | self.top_y.resize((mb_w + 1) * 16 + 1, 0); | |
286 | self.top_u.resize((mb_w + 1) * 8 + 1, 0); | |
287 | self.top_v.resize((mb_w + 1) * 8 + 1, 0); | |
288 | } | |
289 | pub fn set_b_distance(&mut self, tr_b: u32, tr_d: u32) { | |
290 | let (ratio1, ratio2) = if tr_d != 0 { | |
291 | (((tr_d - tr_b) << 14) / tr_d, (tr_b << 14) / tr_d) | |
292 | } else { (1 << 13, 1 << 13) }; | |
293 | self.tr_b = tr_b; | |
294 | self.tr_d = tr_d; | |
295 | self.mb.ratio1 = ratio1; | |
296 | self.mb.ratio2 = ratio2; | |
297 | } | |
298 | pub fn load_mb(&mut self, src: &[u8], offsets: [usize; 3], strides: [usize; 3], sstate: &SliceState) { | |
299 | self.has_top = sstate.has_t; | |
300 | self.has_left = sstate.has_l; | |
301 | self.has_tl = sstate.has_tl; | |
302 | self.has_tr = sstate.has_tr; | |
303 | self.mb_x = sstate.mb_x; | |
304 | self.mb_y = sstate.mb_y; | |
305 | ||
306 | self.ipred_y.top[1..].copy_from_slice(&self.top_y[self.mb_x * 16 + 1..][..16]); | |
307 | self.ipred_u.top[1..9].copy_from_slice(&self.top_u[self.mb_x * 8 + 1..][..8]); | |
308 | self.ipred_v.top[1..9].copy_from_slice(&self.top_v[self.mb_x * 8 + 1..][..8]); | |
309 | ||
310 | self.mb.load(src, offsets, strides); | |
311 | ||
312 | self.best_mbt = MacroblockType::default(); | |
313 | self.best_dist = std::u32::MAX; | |
314 | self.best_bits = 0; | |
315 | } | |
316 | pub fn try_b_coding(&mut self, ref_p: &NAVideoBuffer<u8>, ref_n: &NAVideoBuffer<u8>, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState, refine: bool) { | |
317 | let q_dc = usize::from(RV40_QUANT_DC[1][self.q]); | |
318 | ||
319 | let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y); | |
320 | let mut smb_f = [ZERO_MV; 4]; | |
321 | let mut smb_b = [ZERO_MV; 4]; | |
322 | for (i, (fwd, bwd)) in smb_f.iter_mut().zip(smb_b.iter_mut()).enumerate() { | |
323 | let ref_mv = mbstate.ref_mv[blk8_idx + (i & 1) + (i >> 1) * mbstate.blk8_stride]; | |
324 | let (fm, bm) = ref_mv.scale(self.tr_d, self.tr_b); | |
325 | *fwd = fm; | |
326 | *bwd = bm; | |
327 | } | |
328 | self.mb.recon_pred_part(MacroblockType::BSkip(smb_f, smb_b), ref_p, ref_n, self.mb_x, self.mb_y); | |
329 | be.set_mb_type(MBType::Skip); | |
330 | let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist); | |
331 | if cur_dist < self.best_dist { | |
332 | self.best_dist = cur_dist; | |
333 | self.best_bits = cur_bits; | |
334 | self.best_mbt = MacroblockType::BSkip(smb_f, smb_b); | |
335 | self.best_blk.copy_from(&self.mb.pred_blk); | |
336 | if self.best_dist < rdm.good_enough { | |
337 | return; | |
338 | } | |
339 | } | |
340 | ||
341 | let fwd_cand = [ | |
342 | -mbstate.ref_mv[blk8_idx], | |
343 | mbstate.fwd_mv[blk8_idx - 1], | |
344 | mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride], | |
345 | mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride], | |
346 | mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride] | |
347 | ]; | |
348 | let (fmv, _fdist) = me.search_mb_p(ref_p, &self.mb.ref_blk, self.mb_x, self.mb_y, &fwd_cand); | |
349 | be.set_mb_type(MBType::Forward); | |
350 | let bcost = be.estimate_mb_hdr(&[fmv]); | |
351 | self.mb.recon_pred_part(MacroblockType::Forward(fmv), ref_p, ref_n, self.mb_x, self.mb_y); | |
352 | let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost); | |
353 | if cur_dist < self.best_dist { | |
354 | self.best_dist = cur_dist; | |
355 | self.best_bits = cur_bits; | |
356 | self.best_mbt = MacroblockType::Forward(fmv); | |
357 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
358 | self.best_blk.copy_from(&self.mb.cand_blk); | |
359 | if self.best_dist < rdm.good_enough { | |
360 | return; | |
361 | } | |
362 | } | |
363 | ||
364 | let bwd_cand = [ | |
365 | mbstate.ref_mv[blk8_idx], | |
366 | mbstate.bwd_mv[blk8_idx - 1], | |
367 | mbstate.bwd_mv[blk8_idx - 1 - mbstate.blk8_stride], | |
368 | mbstate.bwd_mv[blk8_idx - mbstate.blk8_stride], | |
369 | mbstate.bwd_mv[blk8_idx + 2 - mbstate.blk8_stride] | |
370 | ]; | |
371 | let (bmv, _bdist) = me.search_mb_p(ref_n, &self.mb.ref_blk, self.mb_x, self.mb_y, &bwd_cand); | |
372 | be.set_mb_type(MBType::Backward); | |
373 | let bcost = be.estimate_mb_hdr(&[bmv]); | |
374 | self.mb.recon_pred_part(MacroblockType::Backward(bmv), ref_p, ref_n, self.mb_x, self.mb_y); | |
375 | let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost); | |
376 | if cur_dist < self.best_dist { | |
377 | self.best_dist = cur_dist; | |
378 | self.best_bits = cur_bits; | |
379 | self.best_mbt = MacroblockType::Backward(bmv); | |
380 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
381 | self.best_blk.copy_from(&self.mb.cand_blk); | |
382 | if self.best_dist < rdm.good_enough { | |
383 | return; | |
384 | } | |
385 | } | |
386 | ||
387 | be.set_mb_type(MBType::Bidir); | |
388 | let (i_fmv, i_bmv) = if !refine { | |
389 | (fmv, bmv) | |
390 | } else { | |
391 | let mut b_searcher = SearchB::new(ref_p, ref_n, self.mb_x, self.mb_y, [self.mb.ratio1, self.mb.ratio2]); | |
392 | b_searcher.search_mb(&self.mb.ref_blk, [fmv, bmv]) | |
393 | }; | |
394 | ||
395 | let bcost = be.estimate_mb_hdr(&[i_fmv, i_bmv]); | |
396 | self.mb.recon_pred_part(MacroblockType::Bidir(i_fmv, i_bmv), ref_p, ref_n, self.mb_x, self.mb_y); | |
397 | let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, bcost); | |
398 | if cur_dist < self.best_dist { | |
399 | self.best_dist = cur_dist; | |
400 | self.best_bits = cur_bits; | |
401 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
402 | self.best_blk.copy_from(&self.mb.cand_blk); | |
403 | self.best_mbt = MacroblockType::Bidir(i_fmv, i_bmv); | |
404 | } | |
405 | } | |
406 | pub fn try_p_coding(&mut self, ref_pic: &NAVideoBuffer<u8>, be: &mut BitsEstimator, me: &mut MotionEstimator, rdm: &RateDistMetric, mbstate: &MBState) { | |
407 | let q_dc = usize::from(RV40_QUANT_DC[1][self.q]); | |
408 | ||
409 | self.mb.recon_pred_part(MacroblockType::Inter16x16(ZERO_MV), ref_pic, ref_pic, self.mb_x, self.mb_y); | |
410 | be.set_mb_type(MBType::Skip); | |
411 | let (cur_dist, cur_bits) = self.mb.get_skip_metric(rdm, self.best_dist); | |
412 | if cur_dist < self.best_dist { | |
413 | self.best_dist = cur_dist; | |
414 | self.best_bits = cur_bits; | |
415 | self.best_mbt = MacroblockType::PSkip; | |
416 | self.best_blk.copy_from(&self.mb.pred_blk); | |
417 | if self.best_dist < rdm.good_enough { | |
418 | return; | |
419 | } | |
420 | } | |
421 | ||
422 | let blk8_idx = mbstate.get_blk8_idx(self.mb_x, self.mb_y); | |
423 | let mv_cand = [ | |
424 | mbstate.fwd_mv[blk8_idx - 1], | |
425 | mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride], | |
426 | mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride], | |
427 | mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride] | |
428 | ]; | |
429 | let (mv, pdist) = me.search_mb_p(ref_pic, &self.mb.ref_blk, self.mb_x, self.mb_y, &mv_cand); | |
430 | ||
431 | self.mb.recon_pred_part(MacroblockType::Inter16x16(mv), ref_pic, ref_pic, self.mb_x, self.mb_y); | |
432 | ||
433 | be.set_mb_type(MBType::P16x16); | |
434 | let pcost = be.estimate_mb_hdr(&[mv]); | |
435 | let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost); | |
436 | if cur_dist < self.best_dist { | |
437 | self.best_mbt = MacroblockType::Inter16x16(mv); | |
438 | self.best_dist = cur_dist; | |
439 | self.best_bits = cur_bits; | |
440 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
441 | self.best_blk.copy_from(&self.mb.cand_blk); | |
442 | } | |
443 | be.set_mb_type(MBType::P16x16Mix); | |
444 | let p16cost = be.estimate_mb_hdr(&[mv]); | |
445 | let (cur_dist16, cur_bits16) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, p16cost); | |
446 | if cur_dist16 < self.best_dist { | |
447 | self.best_mbt = MacroblockType::InterMix(mv); | |
448 | self.best_dist = cur_dist16; | |
449 | self.best_bits = cur_bits16; | |
450 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
451 | self.best_blk.copy_from(&self.mb.cand_blk); | |
452 | } | |
453 | ||
454 | if pdist > rdm.p_split_thr { | |
455 | let xpos = self.mb_x * 16; | |
456 | let ypos = self.mb_y * 16; | |
457 | ||
458 | let mv_cand = [ | |
459 | mv, | |
460 | mbstate.fwd_mv[blk8_idx - 1], | |
461 | mbstate.fwd_mv[blk8_idx - 1 - mbstate.blk8_stride], | |
462 | mbstate.fwd_mv[blk8_idx - mbstate.blk8_stride], | |
463 | mbstate.fwd_mv[blk8_idx - 1 + mbstate.blk8_stride], | |
464 | mbstate.fwd_mv[blk8_idx + 2 - mbstate.blk8_stride], | |
465 | mbstate.fwd_mv[blk8_idx + 1 - mbstate.blk8_stride] | |
466 | ]; | |
467 | ||
468 | let (mv0, pdist0) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos, &mv_cand); | |
469 | let (mv1, pdist1) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos, &mv_cand); | |
470 | let (mv2, pdist2) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos, ypos + 8, &mv_cand); | |
471 | let (mv3, pdist3) = me.search_blk8(ref_pic, &self.mb.ref_blk, xpos + 8, ypos + 8, &mv_cand); | |
472 | if pdist0 + pdist1 + pdist2 + pdist3 < pdist - pdist / 4 { | |
473 | let mvs = [mv0, mv1, mv2, mv3]; | |
474 | let (cand_mbt, cand_mbtype) = if mv0 == mv1 && mv2 == mv3 { | |
475 | (MBType::P16x8, MacroblockType::Inter16x8([mv0, mv2])) | |
476 | } else if mv0 == mv2 && mv1 == mv3 { | |
477 | (MBType::P8x16, MacroblockType::Inter8x16([mv0, mv1])) | |
478 | } else { | |
479 | (MBType::P8x8, MacroblockType::Inter8x8(mvs)) | |
480 | }; | |
481 | be.set_mb_type(cand_mbt); | |
482 | let pcost = be.estimate_mb_hdr(&mvs); | |
483 | ||
484 | self.mb.recon_pred_part(MacroblockType::Inter8x8(mvs), ref_pic, ref_pic, self.mb_x, self.mb_y); | |
485 | let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, false, pcost); | |
486 | if cur_dist < self.best_dist { | |
487 | self.best_dist = cur_dist; | |
488 | self.best_mbt = cand_mbtype; | |
489 | self.best_bits = cur_bits; | |
490 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
491 | self.best_blk.copy_from(&self.mb.cand_blk); | |
492 | } | |
493 | } | |
494 | } | |
495 | } | |
496 | fn recon_intra_16_pred(&mut self, ptype: PredType8x8) { | |
497 | self.ipred_y.apply16(ptype, &mut self.mb.pred_blk.y, 16); | |
498 | self.ipred_u.apply8(ptype, &mut self.mb.pred_blk.u, 8); | |
499 | self.ipred_v.apply8(ptype, &mut self.mb.pred_blk.v, 8); | |
500 | } | |
501 | pub fn try_intra_16_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric) { | |
502 | if self.best_dist < rdm.good_enough { | |
503 | return; | |
504 | } | |
505 | let pred_types_try: &[PredType8x8] = match (self.has_top, self.has_left) { | |
506 | (false, false) => &[PredType8x8::DC128], | |
507 | (true, false) => &[PredType8x8::TopDC], | |
508 | (false, true) => &[PredType8x8::LeftDC], | |
509 | _ => &PRED_TYPES8, | |
510 | }; | |
511 | ||
512 | be.set_mb_type(MBType::Intra16); | |
513 | let hdr_cost = be.estimate_mb_hdr(&[]); | |
514 | for &ptype in pred_types_try.iter() { | |
515 | if !self.has_tl && matches!(ptype, PredType8x8::Plane) { | |
516 | continue; | |
517 | } | |
518 | self.recon_intra_16_pred(ptype); | |
519 | let q_dc = usize::from(RV40_QUANT_DC[0][self.q]); | |
520 | let (cur_dist, cur_bits) = self.mb.get_diff_metric(be, rdm, self.best_dist, q_dc, self.q, true, hdr_cost); | |
521 | if cur_dist < self.best_dist { | |
522 | self.best_mbt = MacroblockType::Intra16x16(ptype); | |
523 | self.best_dist = cur_dist; | |
524 | self.best_bits = cur_bits; | |
525 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
526 | self.best_blk.copy_from(&self.mb.cand_blk); | |
527 | if cur_dist < rdm.good_enough { | |
528 | break; | |
529 | } | |
530 | } | |
531 | } | |
532 | } | |
533 | pub fn try_intra_4x4_pred(&mut self, be: &mut BitsEstimator, rdm: &RateDistMetric, mbstate: &mut MBState) { | |
534 | const PRED4_DEF: &[PredType4x4] = &[ PredType4x4::DC128 ]; | |
535 | const PRED4_NO_TOP: &[PredType4x4] = &[ PredType4x4::Hor, PredType4x4::LeftDC ]; | |
536 | const PRED4_NO_LEFT: &[PredType4x4] = &[ PredType4x4::Ver, PredType4x4::TopDC ]; | |
537 | const PRED4_FULL: &[PredType4x4] = &[ | |
538 | PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, | |
539 | PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight, | |
540 | PredType4x4::VerRight, PredType4x4::HorDown, | |
541 | PredType4x4::VerLeft, PredType4x4::HorUp | |
542 | ]; | |
543 | const PRED4_FULL_NO_LD: &[PredType4x4] = &[ | |
544 | PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, | |
545 | PredType4x4::DiagDownLeftNoDown, PredType4x4::DiagDownRight, | |
546 | PredType4x4::VerRight, PredType4x4::HorDown, | |
547 | PredType4x4::VerLeftNoDown, PredType4x4::HorUpNoDown | |
548 | ]; | |
549 | ||
550 | if self.best_dist < rdm.good_enough { | |
551 | return; | |
552 | } | |
553 | be.set_mb_type(MBType::Intra); | |
554 | ||
555 | let (tr_y, tr_u, tr_v) = if self.has_tr { | |
556 | let mut tr_y = [0; 4]; | |
557 | let mut tr_u = [0; 4]; | |
558 | let mut tr_v = [0; 4]; | |
559 | tr_y.copy_from_slice(&self.top_y[self.mb_x * 16 + 16 + 1..][..4]); | |
560 | tr_u.copy_from_slice(&self.top_u[self.mb_x * 8 + 8 + 1..][..4]); | |
561 | tr_v.copy_from_slice(&self.top_v[self.mb_x * 8 + 8 + 1..][..4]); | |
562 | (tr_y, tr_u, tr_v) | |
563 | } else { | |
564 | ([self.ipred_y.top[16]; 4], [self.ipred_u.top[8]; 4], [self.ipred_v.top[8]; 4]) | |
565 | }; | |
566 | let mut ipred4 = BlockIntra4Pred::new(&self.ipred_y, &self.ipred_u, &self.ipred_v, tr_y, tr_u, tr_v, self.has_left); | |
567 | ||
568 | let q_ac = self.q; | |
569 | let (cq_dc, cq_ac) = chroma_quants(self.q); | |
570 | let mut tot_dist = 0; | |
571 | let mut tot_bits = be.estimate_mb_hdr(&[]); | |
572 | let mut modes = [PredType4x4::DC; 16]; | |
573 | let mut tblk = Block::new(); | |
574 | let mut has_t = self.has_top; | |
575 | ||
576 | for y in 0..4 { | |
577 | let mut has_l = self.has_left; | |
578 | let mut has_ld = has_l && y != 3; | |
579 | for x in 0..4 { | |
580 | let list = match (has_l, has_t) { | |
581 | (true, true) if has_ld => PRED4_FULL, | |
582 | (true, true) => PRED4_FULL_NO_LD, | |
583 | (false, true) => PRED4_NO_LEFT, | |
584 | (true, false) => PRED4_NO_TOP, | |
585 | _ => PRED4_DEF, | |
586 | }; | |
587 | ||
588 | let do_chroma = ((x & 1) == 0) && ((y & 1) == 0); | |
589 | ||
590 | let mut best_mode = PRED4_DEF[0]; | |
591 | let mut best_cdist = std::u32::MAX; | |
592 | let mut best_dist = 0; | |
593 | let mut best_bits = 0; | |
594 | for &try_mode in list.iter() { | |
595 | ipred4.pred_block(&mut self.mb.cand_blk, x, y, try_mode); | |
596 | let off = x * 4 + y * 4 * 16; | |
597 | let (mut cur_dist, mut cur_bits) = Self::blk4_diff(&self.mb.cand_blk.y[off..], &self.mb.ref_blk.y[off..], 16, q_ac, q_ac, be); | |
598 | if do_chroma { | |
599 | let off = x * 2 + y * 2 * 8; | |
600 | let (du, bu) = Self::blk4_diff(&self.mb.cand_blk.u[off..], &self.mb.ref_blk.u[off..], 8, cq_dc, cq_ac, be); | |
601 | let (dv, bv) = Self::blk4_diff(&self.mb.cand_blk.v[off..], &self.mb.ref_blk.v[off..], 8, cq_dc, cq_ac, be); | |
602 | cur_dist += du + dv; | |
603 | cur_bits += bu + bv; | |
604 | } | |
605 | ||
606 | let cand_dist = rdm.get_metric(cur_bits, cur_dist); | |
607 | if cand_dist < best_cdist { | |
608 | best_cdist = cand_dist; | |
609 | best_mode = try_mode; | |
610 | best_dist = cur_dist; | |
611 | best_bits = cur_bits; | |
612 | } | |
613 | } | |
614 | ||
615 | ipred4.pred_block(&mut self.mb.cand_blk, x, y, best_mode); | |
616 | ||
617 | let off = x * 4 + y * 4 * 16; | |
618 | tblk.from_diff(&self.mb.ref_blk.y[off..], &self.mb.cand_blk.y[off..], 16); | |
619 | tblk.transform_4x4(); | |
620 | tblk.quant(q_ac, q_ac); | |
621 | self.mb.tmp_tx[x + y * 4] = tblk; | |
622 | if !tblk.is_empty() { | |
623 | tblk.dequant(q_ac, q_ac); | |
624 | tblk.itransform_4x4(); | |
625 | tblk.add_to(&mut self.mb.cand_blk.y[off..], 16); | |
626 | } | |
627 | if do_chroma { | |
628 | let off = x * 2 + y * 2 * 8; | |
629 | let mut dests = [&mut self.mb.cand_blk.u[off..], &mut self.mb.cand_blk.v[off..]]; | |
630 | let sources = [&self.mb.ref_blk.u[off..], &self.mb.ref_blk.v[off..]]; | |
631 | for (comp, (dblk, &sblk)) in dests.iter_mut().zip(sources.iter()).enumerate() { | |
632 | tblk.from_diff(sblk, dblk, 8); | |
633 | tblk.transform_4x4(); | |
634 | tblk.quant(cq_dc, cq_ac); | |
635 | self.mb.tmp_tx[16 + comp * 4 + x / 2 + y] = tblk; | |
636 | if !tblk.is_empty() { | |
637 | tblk.dequant(cq_dc, cq_ac); | |
638 | tblk.itransform_4x4(); | |
639 | tblk.add_to(dblk, 8); | |
640 | } | |
641 | } | |
642 | } | |
643 | ||
644 | ipred4.update_from(&self.mb.cand_blk, x, y); | |
645 | ||
646 | tot_dist += best_dist; | |
647 | tot_bits += best_bits; | |
648 | ||
649 | let cand_dist = rdm.get_metric(tot_bits, tot_dist); | |
650 | if cand_dist > self.best_dist { | |
651 | return; | |
652 | } | |
653 | ||
654 | modes[x + y * 4] = best_mode; | |
655 | ||
656 | has_l = true; | |
657 | has_ld = false; | |
658 | } | |
659 | has_t = true; | |
660 | } | |
661 | ||
662 | mbstate.set_ipred4x4(self.mb_x, self.mb_y, &modes); | |
663 | ||
664 | if !self.has_top { | |
665 | let mut code = 0usize; | |
666 | for &el in modes[..4].iter() { | |
667 | code = code * 2 + if el.to_index() == 0 { 0 } else { 1 }; | |
668 | } | |
669 | tot_bits += u32::from(RV40_AIC_TOP_BITS[code]); | |
670 | } | |
671 | ||
672 | let ystart = if self.has_top { 0 } else { 1 }; | |
673 | for y in ystart..4 { | |
674 | let mut x = 0; | |
675 | while x < 4 { | |
676 | let (lctx, tctx, trctx) = mbstate.get_ipred4x4_ctx(self.mb_x, self.mb_y, x, y); | |
677 | let ctx_word = if x < 3 { | |
678 | ((trctx & 0xF) as u16) + (((tctx & 0xF) as u16) << 4) + (((lctx & 0xF) as u16) << 8) | |
679 | } else { 0xFFF }; | |
680 | if let Some(idx) = RV40_AIC_PATTERNS.iter().position(|&x| x == ctx_word) { | |
681 | let code = modes[x + y * 4].to_index() * 9 + modes[x + y * 4 + 1].to_index(); | |
682 | tot_bits += u32::from(RV40_AIC_MODE2_BITS[idx][code as usize]); | |
683 | x += 2; | |
684 | } else if tctx != -1 && lctx != -1 { | |
685 | let idx = (tctx + lctx * 10) as usize; | |
686 | let code = modes[x + y * 4].to_index() as usize; | |
687 | tot_bits += u32::from(RV40_AIC_MODE1_BITS[idx][code]); | |
688 | x += 1; | |
689 | } else { | |
690 | match lctx { | |
691 | -1 if tctx < 2 => tot_bits += 1, | |
692 | 0 | 2 => tot_bits += 1, | |
693 | _ => {}, | |
694 | }; | |
695 | x += 1; | |
696 | } | |
697 | } | |
698 | } | |
699 | ||
700 | let cand_dist = rdm.get_metric(tot_bits, tot_dist); | |
701 | if cand_dist < self.best_dist { | |
702 | self.best_dist = cand_dist; | |
703 | self.best_mbt = MacroblockType::Intra4x4(modes); | |
704 | self.best_bits = tot_bits; | |
705 | self.best_coef.copy_from_slice(&self.mb.tmp_tx); | |
706 | self.best_blk.copy_from(&self.mb.cand_blk); | |
707 | } | |
708 | } | |
709 | pub fn get_est_bits(&self) -> u32 { self.best_bits } | |
710 | pub fn get_macroblock(&mut self) -> Macroblock { | |
711 | let mut coeffs = [Block::new(); 25]; | |
712 | if !self.best_mbt.is_skip() { | |
713 | coeffs.copy_from_slice(&self.best_coef); | |
714 | } | |
715 | Macroblock { | |
716 | mb_type: self.best_mbt.clone(), | |
717 | coeffs, | |
718 | } | |
719 | } | |
720 | pub fn recon_mb(&mut self, dst: &mut NASimpleVideoFrame<u8>) { | |
721 | let src_mb = &self.best_blk; | |
722 | SingleMacroblock::put_mb(dst, src_mb, self.mb_x, self.mb_y); | |
723 | ||
724 | self.top_y[self.mb_x * 16 + 1..][..16].copy_from_slice(&src_mb.y[15 * 16..]); | |
725 | self.top_u[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.u[7 * 8..]); | |
726 | self.top_v[self.mb_x * 8 + 1..][..8].copy_from_slice(&src_mb.v[7 * 8..]); | |
727 | ||
728 | self.ipred_y.top[0] = self.ipred_y.top[16]; | |
729 | self.ipred_y.left[0] = self.ipred_y.top[0]; | |
730 | self.ipred_u.top[0] = self.ipred_u.top[8]; | |
731 | self.ipred_u.left[0] = self.ipred_u.top[0]; | |
732 | self.ipred_v.top[0] = self.ipred_v.top[8]; | |
733 | self.ipred_v.left[0] = self.ipred_v.top[0]; | |
734 | ||
735 | for (left, src) in self.ipred_y.left[1..].iter_mut().zip(src_mb.y.chunks_exact(16)) { | |
736 | *left = src[15]; | |
737 | } | |
738 | for (left, src) in self.ipred_u.left[1..9].iter_mut().zip(src_mb.u.chunks_exact(8)) { | |
739 | *left = src[7]; | |
740 | } | |
741 | for (left, src) in self.ipred_v.left[1..9].iter_mut().zip(src_mb.v.chunks_exact(8)) { | |
742 | *left = src[7]; | |
743 | } | |
744 | } | |
745 | fn blk4_diff(pred: &[u8], refsrc: &[u8], stride: usize, q_dc: usize, q_ac: usize, be: &mut BitsEstimator) -> (u32, u32) { | |
746 | let mut blk = Block::new(); | |
747 | blk.from_diff(refsrc, pred, stride); | |
748 | blk.transform_4x4(); | |
749 | blk.quant(q_dc, q_ac); | |
750 | let bits = be.block_bits(&blk, 0); | |
751 | if !blk.is_empty() { | |
752 | blk.dequant(q_dc, q_ac); | |
753 | blk.itransform_4x4(); | |
754 | } | |
755 | let mut dist = 0u32; | |
756 | for (diffs, (pred, refsrc)) in blk.coeffs.chunks(4).zip(pred.chunks(stride).zip(refsrc.chunks(stride))) { | |
757 | for (&diff, (&p, &r)) in diffs.iter().zip(pred.iter().zip(refsrc.iter())) { | |
758 | let new = (i32::from(p) + i32::from(diff)).max(0).min(255); | |
759 | let expected = i32::from(r); | |
760 | dist += ((new - expected) * (new - expected)) as u32; | |
761 | } | |
762 | } | |
763 | (dist, bits) | |
764 | } | |
765 | } |