]>
Commit | Line | Data |
---|---|---|
c5d5793c KS |
1 | use nihav_core::frame::*; |
2 | use nihav_codec_support::codecs::{MV, ZERO_MV}; | |
3 | use nihav_codec_support::data::GenericCache; | |
4 | use super::super::vp78::{PredMode, MVSplitMode, SubMVRef}; | |
5 | use super::super::vp78data::*; | |
6 | use super::super::vp78dsp::*; | |
7 | use super::super::vp7data::*; | |
8 | use super::super::vp7dsp::*; | |
9 | ||
10 | #[derive(Clone,Copy)] | |
11 | pub enum MBType { | |
12 | Intra(PredMode, PredMode), | |
13 | Intra4x4([PredMode; 16], [u8; 16], PredMode), | |
14 | InterNoMV(bool, [u8; 4]), | |
15 | InterNearest(bool, [u8; 4]), | |
16 | InterNear(bool, [u8; 4]), | |
17 | InterMV(bool, [u8; 4], MV), | |
18 | InterSplitMV(bool, [u8; 4], MVSplitMode, [SubMVRef; 16], [MV; 16]), | |
19 | } | |
20 | ||
21 | impl MBType { | |
22 | pub fn is_intra(&self) -> bool { | |
6f263099 | 23 | matches!(*self, MBType::Intra(_, _) | MBType::Intra4x4(_, _, _)) |
c5d5793c KS |
24 | } |
25 | pub fn get_last(&self) -> bool { | |
26 | match *self { | |
27 | MBType::InterNoMV(last, _) | | |
28 | MBType::InterNearest(last, _) | | |
29 | MBType::InterNear(last, _) | | |
30 | MBType::InterMV(last, _, _) | | |
31 | MBType::InterSplitMV(last, _, _, _, _) => last, | |
32 | _ => false, | |
33 | } | |
34 | } | |
35 | } | |
36 | ||
37 | impl Default for MBType { | |
38 | fn default() -> Self { MBType::Intra(PredMode::DCPred, PredMode::DCPred) } | |
39 | } | |
40 | ||
41 | pub fn get_block_difference(dst: &mut [i16; 16], src1: &[u8; 16], src2: &[u8; 16]) { | |
42 | for (dst, (&src1, &src2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) { | |
43 | *dst = i16::from(src1) - i16::from(src2); | |
44 | } | |
45 | } | |
46 | pub fn get_difference_dist(old: &[u8; 16], new: &[u8; 16], diff: &[i16; 16]) -> u32 { | |
47 | let mut dist = 0; | |
48 | for ((&old, &new), &diff) in old.iter().zip(new.iter()).zip(diff.iter()) { | |
49 | let nval = (i16::from(new) + diff).max(0).min(255); | |
50 | let oval = i16::from(old); | |
51 | dist += (i32::from(nval - oval) * i32::from(nval - oval)) as u32; | |
52 | } | |
53 | dist | |
54 | } | |
55 | ||
56 | pub fn requant_y2_dc(val: &mut i16, q: usize) { | |
57 | *val = *val / Y2_DC_QUANTS[q] * Y2_DC_QUANTS[q]; | |
58 | } | |
59 | ||
60 | pub trait DCTBlock { | |
61 | fn has_nz(&self) -> bool; | |
62 | fn fdct(&mut self); | |
63 | fn idct(&mut self); | |
64 | fn requant_y(&mut self, q: usize); | |
65 | fn quant(&mut self, q: usize, ctype: usize); | |
66 | fn dequant(&mut self, q: usize, ctype: usize); | |
67 | } | |
68 | ||
69 | impl DCTBlock for [i16; 16] { | |
70 | fn has_nz(&self) -> bool { | |
71 | for &el in self.iter() { | |
72 | if el != 0 { | |
73 | return true; | |
74 | } | |
75 | } | |
76 | false | |
77 | } | |
78 | #[allow(clippy::erasing_op)] | |
79 | #[allow(clippy::identity_op)] | |
80 | fn fdct(&mut self) { | |
81 | let mut tmp = [0i16; 16]; | |
82 | for i in 0..4 { | |
83 | let s0 = i32::from(self[i + 4 * 0]); | |
84 | let s1 = i32::from(self[i + 4 * 1]); | |
85 | let s2 = i32::from(self[i + 4 * 2]); | |
86 | let s3 = i32::from(self[i + 4 * 3]); | |
87 | ||
88 | let t0 = (s0 + s3).wrapping_mul(23170) + 0x2000; | |
89 | let t1 = (s1 + s2).wrapping_mul(23170); | |
90 | let t2 = s0 - s3; | |
91 | let t3 = s1 - s2; | |
92 | let t4 = t2.wrapping_mul(30274) + t3.wrapping_mul(12540) + 0x2000; | |
93 | let t5 = t2.wrapping_mul(12540) - t3.wrapping_mul(30274) + 0x2000; | |
94 | ||
95 | tmp[i + 0 * 4] = ((t0 + t1) >> 14) as i16; | |
96 | tmp[i + 1 * 4] = ( t4 >> 14) as i16; | |
97 | tmp[i + 2 * 4] = ((t0 - t1) >> 14) as i16; | |
98 | tmp[i + 3 * 4] = ( t5 >> 14) as i16; | |
99 | } | |
100 | for (src, dst) in tmp.chunks(4).zip(self.chunks_mut(4)) { | |
101 | let s0 = i32::from(src[0]); | |
102 | let s1 = i32::from(src[1]); | |
103 | let s2 = i32::from(src[2]); | |
104 | let s3 = i32::from(src[3]); | |
105 | ||
106 | let t0 = (s0 + s3).wrapping_mul(23170) + 0x8000; | |
107 | let t1 = (s1 + s2).wrapping_mul(23170); | |
108 | let t2 = s0 - s3; | |
109 | let t3 = s1 - s2; | |
110 | let t4 = t2.wrapping_mul(30274) + t3.wrapping_mul(12540) + 0x8000; | |
111 | let t5 = t2.wrapping_mul(12540) - t3.wrapping_mul(30274) + 0x8000; | |
112 | ||
113 | dst[0] = ((t0 + t1) >> 16) as i16; | |
114 | dst[1] = ( t4 >> 16) as i16; | |
115 | dst[2] = ((t0 - t1) >> 16) as i16; | |
116 | dst[3] = ( t5 >> 16) as i16; | |
117 | } | |
118 | } | |
119 | fn idct(&mut self) { idct4x4(self) } | |
120 | fn requant_y(&mut self, q: usize) { | |
121 | self[0] = self[0] / Y_DC_QUANTS[q] * Y_DC_QUANTS[q]; | |
122 | for el in self[1..].iter_mut() { | |
123 | *el = *el / Y_AC_QUANTS[q] * Y_AC_QUANTS[q]; | |
124 | } | |
125 | } | |
126 | fn quant(&mut self, q: usize, ctype: usize) { | |
127 | let (q_dc, q_ac) = match ctype { | |
128 | 0 | 3 => (Y_DC_QUANTS[q], Y_AC_QUANTS[q]), | |
129 | 2 => (UV_DC_QUANTS[q], UV_AC_QUANTS[q]), | |
130 | _ => (Y2_DC_QUANTS[q], Y2_AC_QUANTS[q]), | |
131 | }; | |
132 | self[0] /= q_dc; | |
133 | for el in self[1..].iter_mut() { | |
134 | *el /= q_ac; | |
135 | } | |
136 | } | |
137 | fn dequant(&mut self, q: usize, ctype: usize) { | |
138 | let (q_dc, q_ac) = match ctype { | |
139 | 0 | 3 => (Y_DC_QUANTS[q], Y_AC_QUANTS[q]), | |
140 | 2 => (UV_DC_QUANTS[q], UV_AC_QUANTS[q]), | |
141 | _ => (Y2_DC_QUANTS[q], Y2_AC_QUANTS[q]), | |
142 | }; | |
143 | self[0] *= q_dc; | |
144 | for el in self[1..].iter_mut() { | |
145 | *el *= q_ac; | |
146 | } | |
147 | } | |
148 | } | |
149 | ||
150 | pub trait IPredBlock16 { | |
151 | fn ipred16(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext); | |
152 | } | |
153 | pub trait IPredBlock8 { | |
154 | fn ipred8 (&mut self, stride: usize, mode: PredMode, ipred: &IPredContext); | |
155 | } | |
156 | pub trait IPredBlock4 { | |
157 | fn ipred4 (&mut self, stride: usize, mode: PredMode, ipred: &IPredContext); | |
158 | } | |
159 | ||
160 | impl IPredBlock16 for [u8; 256] { | |
161 | fn ipred16(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { | |
162 | match mode { | |
163 | PredMode::DCPred => IPred16x16::ipred_dc(self, 0, stride, ipred), | |
164 | PredMode::HPred => IPred16x16::ipred_h (self, 0, stride, ipred), | |
165 | PredMode::VPred => IPred16x16::ipred_v (self, 0, stride, ipred), | |
166 | PredMode::TMPred => IPred16x16::ipred_tm(self, 0, stride, ipred), | |
167 | _ => {}, | |
168 | } | |
169 | } | |
170 | } | |
171 | impl IPredBlock8 for [u8; 64] { | |
172 | fn ipred8(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { | |
173 | match mode { | |
174 | PredMode::DCPred => IPred8x8::ipred_dc(self, 0, stride, ipred), | |
175 | PredMode::HPred => IPred8x8::ipred_h (self, 0, stride, ipred), | |
176 | PredMode::VPred => IPred8x8::ipred_v (self, 0, stride, ipred), | |
177 | PredMode::TMPred => IPred8x8::ipred_tm(self, 0, stride, ipred), | |
178 | _ => {}, | |
179 | } | |
180 | } | |
181 | } | |
182 | impl IPredBlock4 for &mut [u8] { | |
183 | fn ipred4(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { | |
184 | match mode { | |
185 | PredMode::DCPred => IPred4x4::ipred_dc(self, 0, stride, ipred), | |
186 | PredMode::HPred => IPred4x4::ipred_he(self, 0, stride, ipred), | |
187 | PredMode::VPred => IPred4x4::ipred_ve(self, 0, stride, ipred), | |
188 | PredMode::TMPred => IPred4x4::ipred_tm(self, 0, stride, ipred), | |
189 | PredMode::LDPred => IPred4x4::ipred_ld(self, 0, stride, ipred), | |
190 | PredMode::RDPred => IPred4x4::ipred_rd(self, 0, stride, ipred), | |
191 | PredMode::VRPred => IPred4x4::ipred_vr(self, 0, stride, ipred), | |
192 | PredMode::VLPred => IPred4x4::ipred_vl(self, 0, stride, ipred), | |
193 | PredMode::HDPred => IPred4x4::ipred_hd(self, 0, stride, ipred), | |
194 | PredMode::HUPred => IPred4x4::ipred_hu(self, 0, stride, ipred), | |
195 | _ => {}, | |
196 | } | |
197 | } | |
198 | } | |
199 | impl IPredBlock4 for [u8; 16] { | |
200 | fn ipred4(&mut self, stride: usize, mode: PredMode, ipred: &IPredContext) { | |
201 | (self as &mut [u8]).ipred4(stride, mode, ipred); | |
202 | } | |
203 | } | |
204 | ||
205 | pub struct LumaIterator<'a> { | |
206 | luma: &'a [u8; 256], | |
207 | blkno: usize, | |
208 | } | |
209 | ||
210 | impl<'a> Iterator for LumaIterator<'a> { | |
211 | type Item = [u8; 16]; | |
212 | fn next(&mut self) -> Option<Self::Item> { | |
213 | if self.blkno < 16 { | |
214 | let mut blk = [0; 16]; | |
215 | let off = (self.blkno & 3) * 4 + (self.blkno >> 2) * 16 * 4; | |
216 | for (dst, src) in blk.chunks_exact_mut(4).zip(self.luma[off..].chunks(16)) { | |
217 | dst.copy_from_slice(&src[..4]); | |
218 | } | |
219 | self.blkno += 1; | |
220 | Some(blk) | |
221 | } else { | |
222 | None | |
223 | } | |
224 | } | |
225 | } | |
226 | ||
227 | pub struct ChromaIterator<'a> { | |
228 | chroma: &'a [u8; 64], | |
229 | blkno: usize, | |
230 | } | |
231 | ||
232 | impl<'a> Iterator for ChromaIterator<'a> { | |
233 | type Item = [u8; 16]; | |
234 | fn next(&mut self) -> Option<Self::Item> { | |
235 | if self.blkno < 4 { | |
236 | let mut blk = [0; 16]; | |
237 | let off = (self.blkno & 1) * 4 + (self.blkno >> 1) * 8 * 4; | |
238 | for (dst, src) in blk.chunks_exact_mut(4).zip(self.chroma[off..].chunks(8)) { | |
239 | dst.copy_from_slice(&src[..4]); | |
240 | } | |
241 | self.blkno += 1; | |
242 | Some(blk) | |
243 | } else { | |
244 | None | |
245 | } | |
246 | } | |
247 | } | |
248 | ||
249 | pub struct SrcBlock { | |
250 | pub luma: [u8; 256], | |
251 | pub chroma: [[u8; 64]; 2], | |
252 | } | |
253 | ||
254 | impl Default for SrcBlock { | |
255 | fn default() -> Self { | |
256 | unsafe { std::mem::zeroed() } | |
257 | } | |
258 | } | |
259 | ||
260 | impl SrcBlock { | |
261 | pub fn new() -> Self { Self::default() } | |
262 | pub fn is_flat(&self) -> bool { | |
263 | let y0 = self.luma[0]; | |
264 | for &el in self.luma[1..].iter() { | |
265 | if el != y0 { | |
266 | return false; | |
267 | } | |
268 | } | |
269 | true | |
270 | } | |
271 | pub fn apply_ipred_luma(&self, mode: PredMode, ipred: &IPredContext, dst: &mut Residue) { | |
272 | let mut tmp = [0; 256]; | |
e6aaad5c | 273 | tmp.ipred16(16, mode, ipred); |
c5d5793c KS |
274 | dst.set_luma_from_diff(&self.luma, &tmp); |
275 | } | |
276 | pub fn fill_ipred_luma(&mut self, mode: PredMode, ipred: &IPredContext) { | |
277 | self.luma.ipred16(16, mode, ipred); | |
278 | } | |
279 | pub fn apply_ipred_chroma(&self, mode: PredMode, ipred_u: &IPredContext, ipred_v: &IPredContext, dst: &mut Residue) { | |
280 | let mut tmp = [[0u8; 64]; 2]; | |
281 | tmp[0].ipred8(8, mode, ipred_u); | |
282 | tmp[1].ipred8(8, mode, ipred_v); | |
283 | dst.set_chroma_from_diff(&self.chroma, &tmp); | |
284 | } | |
285 | pub fn fill_ipred_chroma(&mut self, mode: PredMode, ipred_u: &IPredContext, ipred_v: &IPredContext) { | |
286 | self.chroma[0].ipred8(8, mode, ipred_u); | |
287 | self.chroma[1].ipred8(8, mode, ipred_v); | |
288 | } | |
289 | ||
290 | pub fn luma_blocks(&self) -> LumaIterator { | |
291 | LumaIterator{ luma: &self.luma, blkno: 0 } | |
292 | } | |
293 | pub fn chroma_blocks(&self, plane: usize) -> ChromaIterator { | |
294 | ChromaIterator{ chroma: &self.chroma[plane], blkno: 0 } | |
295 | } | |
296 | } | |
297 | ||
298 | #[derive(Clone)] | |
299 | pub struct Residue { | |
300 | pub luma: [[i16; 16]; 16], | |
301 | pub dcs: [i16; 16], | |
302 | pub chroma: [[[i16; 16]; 4]; 2], | |
303 | pub has_dc: bool, | |
304 | pub q: u8, | |
305 | } | |
306 | ||
307 | impl Default for Residue { | |
308 | fn default() -> Self { | |
309 | unsafe { std::mem::zeroed() } | |
310 | } | |
311 | } | |
312 | ||
313 | impl Residue { | |
314 | pub fn new() -> Self { Self::default() } | |
315 | pub fn reset(&mut self) { | |
316 | self.has_dc = false; | |
317 | self.q = 242; | |
318 | } | |
319 | pub fn add_residue(&mut self, dst: &mut SrcBlock) { | |
320 | self.dequant(); | |
321 | self.idct(); | |
322 | ||
323 | for (dst, src) in dst.luma.chunks_mut(16 * 4).zip(self.luma.chunks(4)) { | |
324 | for (x, blk) in src.iter().enumerate() { | |
325 | for (drow, srow) in dst[x * 4..].chunks_mut(16).zip(blk.chunks(4)) { | |
326 | for (del, &sel) in drow.iter_mut().zip(srow.iter()) { | |
327 | *del = (i16::from(*del) + sel).max(0).min(255) as u8; | |
328 | } | |
329 | } | |
330 | } | |
331 | } | |
332 | for (dchroma, schroma) in dst.chroma.iter_mut().zip(self.chroma.iter()) { | |
333 | for (dst, src) in dchroma.chunks_mut(8 * 4).zip(schroma.chunks(2)) { | |
334 | for (x, blk) in src.iter().enumerate() { | |
335 | for (drow, srow) in dst[x * 4..].chunks_mut(8).zip(blk.chunks(4)) { | |
336 | for (del, &sel) in drow.iter_mut().zip(srow.iter()) { | |
337 | *del = (i16::from(*del) + sel).max(0).min(255) as u8; | |
338 | } | |
339 | } | |
340 | } | |
341 | } | |
342 | } | |
343 | } | |
344 | pub fn add_residue_chroma(&mut self, dst: &mut SrcBlock) { | |
345 | let q = self.q as usize; | |
346 | for (dchroma, schroma) in dst.chroma.iter_mut().zip(self.chroma.iter_mut()) { | |
347 | for (dst, src) in dchroma.chunks_mut(8 * 4).zip(schroma.chunks_mut(2)) { | |
348 | for (x, blk) in src.iter_mut().enumerate() { | |
349 | blk[0] *= UV_DC_QUANTS[q]; | |
350 | for el in blk[1..].iter_mut() { | |
351 | if *el != 0 { | |
352 | *el *= UV_AC_QUANTS[q]; | |
353 | } | |
354 | } | |
355 | blk.idct(); | |
356 | for (drow, srow) in dst[x * 4..].chunks_mut(8).zip(blk.chunks(4)) { | |
357 | for (del, &sel) in drow.iter_mut().zip(srow.iter()) { | |
358 | *del = (i16::from(*del) + sel).max(0).min(255) as u8; | |
359 | } | |
360 | } | |
361 | } | |
362 | } | |
363 | } | |
364 | } | |
365 | pub fn set_luma_from_diff(&mut self, blk1: &[u8; 256], blk2: &[u8; 256]) { | |
366 | for (dst, (src1, src2)) in self.luma.chunks_mut(4).zip(blk1.chunks(16 * 4).zip(blk2.chunks(16 * 4))) { | |
367 | for (x, blk) in dst.iter_mut().enumerate() { | |
368 | for (dst, (row1, row2)) in blk.chunks_mut(4).zip(src1[x * 4..].chunks(16).zip(src2[x * 4..].chunks(16))) { | |
369 | for (dst, (&a, &b)) in dst.iter_mut().zip(row1.iter().zip(row2.iter())) { | |
370 | *dst = i16::from(a) - i16::from(b); | |
371 | } | |
372 | } | |
373 | } | |
374 | } | |
375 | } | |
376 | pub fn set_chroma_from_diff(&mut self, blk1: &[[u8; 64]; 2], blk2: &[[u8; 64]; 2]) { | |
377 | for (chroma, (src1, src2)) in self.chroma.iter_mut().zip(blk1.iter().zip(blk2.iter())) { | |
378 | for (dst, (src1, src2)) in chroma.chunks_mut(2).zip(src1.chunks(8 * 4).zip(src2.chunks(8 * 4))) { | |
379 | for (x, blk) in dst.iter_mut().enumerate() { | |
380 | for (dst, (row1, row2)) in blk.chunks_mut(4).zip(src1[x * 4..].chunks(8).zip(src2[x * 4..].chunks(8))) { | |
381 | for (dst, (&a, &b)) in dst.iter_mut().zip(row1.iter().zip(row2.iter())) { | |
382 | *dst = i16::from(a) - i16::from(b); | |
383 | } | |
384 | } | |
385 | } | |
386 | } | |
387 | } | |
388 | } | |
389 | pub fn fdct(&mut self) { | |
390 | self.fdct_luma(); | |
391 | self.fdct_chroma(); | |
392 | } | |
393 | pub fn fdct_luma(&mut self) { | |
394 | for blk in self.luma.iter_mut() { | |
395 | blk.fdct(); | |
396 | } | |
397 | } | |
398 | pub fn fdct_chroma(&mut self) { | |
399 | for chroma in self.chroma.iter_mut() { | |
400 | for blk in chroma.iter_mut() { | |
401 | blk.fdct(); | |
402 | } | |
403 | } | |
404 | } | |
405 | pub fn fdct_dc_block(&mut self) { | |
406 | for (dc, blk) in self.dcs.iter_mut().zip(self.luma.iter_mut()) { | |
407 | *dc = blk[0]; | |
408 | blk[0] = 0; | |
409 | } | |
410 | self.dcs.fdct(); | |
411 | self.has_dc = true; | |
412 | } | |
413 | pub fn idct(&mut self) { | |
414 | self.idct_luma(); | |
415 | self.idct_chroma(); | |
416 | } | |
417 | pub fn idct_luma(&mut self) { | |
418 | if self.has_dc { | |
419 | self.dcs.idct(); | |
420 | for (&dc, blk) in self.dcs.iter().zip(self.luma.iter_mut()) { | |
421 | blk[0] = dc; | |
422 | } | |
423 | } | |
424 | for blk in self.luma.iter_mut() { | |
425 | blk.idct(); | |
426 | } | |
427 | } | |
428 | pub fn idct_chroma(&mut self) { | |
429 | for chroma in self.chroma.iter_mut() { | |
430 | for blk in chroma.iter_mut() { | |
431 | blk.idct(); | |
432 | } | |
433 | } | |
434 | } | |
435 | pub fn quant(&mut self, q: usize) { | |
436 | self.quant_luma(q); | |
437 | self.quant_chroma(q); | |
438 | self.q = q as u8; | |
439 | } | |
440 | pub fn quant_luma(&mut self, q: usize) { | |
441 | if self.has_dc { | |
442 | self.dcs[0] /= Y2_DC_QUANTS[q]; | |
443 | for el in self.dcs[1..].iter_mut() { | |
444 | if *el != 0 { | |
445 | *el /= Y2_AC_QUANTS[q]; | |
446 | } | |
447 | } | |
448 | } | |
449 | for blk in self.luma.iter_mut() { | |
450 | blk[0] /= Y_DC_QUANTS[q]; | |
451 | for el in blk[1..].iter_mut() { | |
452 | if *el != 0 { | |
453 | *el /= Y_AC_QUANTS[q]; | |
454 | } | |
455 | } | |
456 | } | |
457 | self.q = q as u8; | |
458 | } | |
459 | pub fn quant_chroma(&mut self, q: usize) { | |
460 | for chroma in self.chroma.iter_mut() { | |
461 | for blk in chroma.iter_mut() { | |
462 | blk[0] /= UV_DC_QUANTS[q]; | |
463 | for el in blk[1..].iter_mut() { | |
464 | if *el != 0 { | |
465 | *el /= UV_AC_QUANTS[q]; | |
466 | } | |
467 | } | |
468 | } | |
469 | } | |
470 | self.q = q as u8; | |
471 | } | |
472 | pub fn dequant(&mut self) { | |
473 | self.dequant_luma(); | |
474 | self.dequant_chroma(); | |
475 | } | |
476 | pub fn dequant_luma(&mut self) { | |
477 | let q = self.q as usize; | |
478 | if self.has_dc { | |
479 | self.dcs[0] *= Y2_DC_QUANTS[q]; | |
480 | for el in self.dcs[1..].iter_mut() { | |
481 | if *el != 0 { | |
482 | *el *= Y2_AC_QUANTS[q]; | |
483 | } | |
484 | } | |
485 | } | |
486 | for blk in self.luma.iter_mut() { | |
487 | blk[0] *= Y_DC_QUANTS[q]; | |
488 | for el in blk[1..].iter_mut() { | |
489 | if *el != 0 { | |
490 | *el *= Y_AC_QUANTS[q]; | |
491 | } | |
492 | } | |
493 | } | |
494 | } | |
495 | pub fn dequant_chroma(&mut self) { | |
496 | let q = self.q as usize; | |
497 | for chroma in self.chroma.iter_mut() { | |
498 | for blk in chroma.iter_mut() { | |
499 | blk[0] *= UV_DC_QUANTS[q]; | |
500 | for el in blk[1..].iter_mut() { | |
501 | if *el != 0 { | |
502 | *el *= UV_AC_QUANTS[q]; | |
503 | } | |
504 | } | |
505 | } | |
506 | } | |
507 | } | |
508 | } | |
509 | ||
510 | pub fn load_blocks(src: &NAVideoBuffer<u8>, sblocks: &mut Vec<SrcBlock>) { | |
511 | let data = src.get_data(); | |
512 | let y = &data[src.get_offset(0)..]; | |
513 | let u = &data[src.get_offset(1)..]; | |
514 | let v = &data[src.get_offset(2)..]; | |
515 | let ystride = src.get_stride(0); | |
516 | let ustride = src.get_stride(1); | |
517 | let vstride = src.get_stride(2); | |
518 | let (width, height) = src.get_dimensions(0); | |
519 | ||
520 | sblocks.clear(); | |
521 | for (ystrip, (ustrip, vstrip)) in y.chunks(ystride * 16).take((height + 15) / 16).zip(u.chunks(ustride * 8).zip(v.chunks(vstride * 8))) { | |
522 | for x in (0..width).step_by(16) { | |
523 | let mut sblk = SrcBlock::default(); | |
524 | ||
525 | for (dst, src) in sblk.luma.chunks_mut(16).zip(ystrip[x..].chunks(ystride)) { | |
526 | dst.copy_from_slice(&src[..16]); | |
527 | } | |
528 | for (dst, src) in sblk.chroma[0].chunks_mut(8).zip(ustrip[x / 2..].chunks(ustride)) { | |
529 | dst.copy_from_slice(&src[..8]); | |
530 | } | |
531 | for (dst, src) in sblk.chroma[1].chunks_mut(8).zip(vstrip[x / 2..].chunks(vstride)) { | |
532 | dst.copy_from_slice(&src[..8]); | |
533 | } | |
534 | sblocks.push(sblk); | |
535 | } | |
536 | } | |
537 | } | |
538 | ||
539 | pub struct YModePred { | |
540 | pub cache: GenericCache<PredMode>, | |
541 | } | |
542 | ||
543 | impl YModePred { | |
544 | fn resize(&mut self, mb_w: usize) { | |
545 | self.cache = GenericCache::new(4, mb_w * 4 + 1, PredMode::DCPred); | |
546 | } | |
547 | pub fn set_mode(&mut self, mb_x: usize, mode: PredMode) { | |
548 | for row in self.cache.data[self.cache.xpos + mb_x * 4..].chunks_mut(self.cache.stride).take(4) { | |
549 | for el in row[..4].iter_mut() { | |
550 | *el = mode.to_b_mode(); | |
551 | } | |
552 | } | |
553 | } | |
554 | pub fn set_modes4x4(&mut self, mb_x: usize, imodes: &[PredMode; 16], ctx: &mut [u8; 16]) { | |
555 | let mut off = self.cache.xpos + mb_x * 4; | |
556 | for y in 0..4 { | |
557 | for x in 0..4 { | |
558 | let top_idx = self.cache.data[off + x - self.cache.stride].to_b_index(); | |
559 | let left_idx = self.cache.data[off + x - 1].to_b_index(); | |
560 | self.cache.data[off + x] = imodes[x + y * 4]; | |
561 | ctx[x + y * 4] = ((top_idx * 10) + left_idx) as u8; | |
562 | } | |
563 | off += self.cache.stride; | |
564 | } | |
565 | } | |
566 | } | |
567 | ||
568 | impl Default for YModePred { | |
569 | fn default() -> Self { | |
570 | Self { | |
571 | cache: GenericCache::new(0, 0, PredMode::DCPred) | |
572 | } | |
573 | } | |
574 | } | |
575 | ||
576 | #[derive(Default)] | |
577 | pub struct BlockPCtx { | |
578 | pub nz_y2: u8, | |
579 | pub nz_y_top: [bool; 4], | |
580 | pub nz_y_left: [bool; 4], | |
581 | pub nz_c_top: [[bool; 2]; 2], | |
582 | pub nz_c_left: [[bool; 2]; 2], | |
583 | } | |
584 | ||
585 | #[derive(Default)] | |
586 | pub struct PredContext { | |
587 | pub mb_w: usize, | |
588 | pub mb_h: usize, | |
589 | ||
590 | pub top_line_y: Vec<u8>, | |
591 | pub top_line_u: Vec<u8>, | |
592 | pub top_line_v: Vec<u8>, | |
593 | pub tl_y: u8, | |
594 | pub tl_u: u8, | |
595 | pub tl_v: u8, | |
596 | ||
597 | pub left_y: [u8; 16], | |
598 | pub left_u: [u8; 16], | |
599 | pub left_v: [u8; 16], | |
600 | ||
601 | pub dc_last: [i16; 2], | |
602 | pub dc_count: [usize; 2], | |
603 | dc_last_saved: [i16; 2], | |
604 | dc_count_saved: [usize; 2], | |
605 | pub nz_y2_top: Vec<bool>, | |
606 | pub nz_y2_left: bool, | |
607 | pub nz_y_top: Vec<bool>, | |
608 | pub nz_y_left: [bool; 4], | |
609 | pub nz_c_top: [Vec<bool>; 2], | |
610 | pub nz_c_left: [[bool; 2]; 2], | |
611 | ||
612 | pub ymodes: YModePred, | |
613 | ||
614 | pub mvs: Vec<MV>, | |
615 | pub mv_stride: usize, | |
616 | pub version: u8, | |
617 | } | |
618 | ||
619 | impl PredContext { | |
620 | pub fn new() -> Self { Self::default() } | |
621 | pub fn resize(&mut self, mb_w: usize, mb_h: usize) { | |
622 | self.mb_w = mb_w; | |
623 | self.mb_h = mb_h; | |
624 | ||
625 | self.top_line_y.resize(mb_w * 16 + 1, 0); | |
626 | self.top_line_u.resize(mb_w * 8 + 1, 0); | |
627 | self.top_line_v.resize(mb_w * 8 + 1, 0); | |
628 | ||
629 | self.nz_y2_top.resize(mb_w, false); | |
630 | self.nz_y_top.resize(mb_w * 4, false); | |
631 | self.nz_c_top[0].resize(mb_w * 2, false); | |
632 | self.nz_c_top[1].resize(mb_w * 2, false); | |
633 | ||
634 | self.ymodes.resize(mb_w); | |
635 | ||
636 | self.mv_stride = mb_w * 4; | |
637 | self.mvs.resize(self.mv_stride * mb_h * 4, ZERO_MV); | |
638 | } | |
639 | ||
640 | pub fn reset(&mut self) { | |
641 | for el in self.top_line_y.iter_mut() { *el = 0x80; } | |
642 | for el in self.top_line_u.iter_mut() { *el = 0x80; } | |
643 | for el in self.top_line_v.iter_mut() { *el = 0x80; } | |
644 | self.left_y = [0x80; 16]; | |
645 | self.left_u = [0x80; 16]; | |
646 | self.left_v = [0x80; 16]; | |
647 | self.tl_y = 0x80; | |
648 | self.tl_u = 0x80; | |
649 | self.tl_v = 0x80; | |
650 | ||
651 | for el in self.nz_y_top.iter_mut() { *el = false; } | |
652 | self.nz_y_left = [false; 4]; | |
653 | for el in self.nz_y2_top.iter_mut() { *el = false; } | |
654 | self.nz_y2_left = false; | |
655 | for el in self.nz_c_top[0].iter_mut() { *el = false; } | |
656 | for el in self.nz_c_top[1].iter_mut() { *el = false; } | |
657 | self.nz_c_left = [[false; 2]; 2]; | |
658 | ||
659 | self.ymodes.cache.reset(); | |
660 | ||
661 | for mv in self.mvs.iter_mut() { *mv = ZERO_MV; } | |
662 | } | |
663 | pub fn reset_intra(&mut self) { | |
664 | self.dc_last = [0; 2]; | |
665 | self.dc_count = [0; 2]; | |
666 | self.dc_last_saved = [0; 2]; | |
667 | self.dc_count_saved = [0; 2]; | |
668 | } | |
669 | pub fn save_dc_pred(&mut self) { | |
670 | self.dc_last_saved = self.dc_last; | |
671 | self.dc_count_saved = self.dc_count; | |
672 | } | |
673 | #[allow(dead_code)] | |
674 | pub fn restore_dc_pred(&mut self) { | |
675 | self.dc_last = self.dc_last_saved; | |
676 | self.dc_count = self.dc_count_saved; | |
677 | } | |
678 | pub fn update_mb_row(&mut self) { | |
679 | self.left_y = [0x80; 16]; | |
680 | self.left_u = [0x80; 16]; | |
681 | self.left_v = [0x80; 16]; | |
682 | self.tl_y = 0x80; | |
683 | self.tl_u = 0x80; | |
684 | self.tl_v = 0x80; | |
685 | self.ymodes.cache.update_row(); | |
686 | } | |
687 | pub fn update_mb(&mut self, sblk: &SrcBlock, mb_x: usize) { | |
688 | for (dst, src) in self.left_y.iter_mut().zip(sblk.luma.chunks_exact(16)) { | |
689 | *dst = src[15]; | |
690 | } | |
691 | self.tl_y = self.top_line_y[mb_x * 16 + 16]; | |
692 | self.top_line_y[mb_x * 16 + 1..][..16].copy_from_slice(&sblk.luma[15 * 16..]); | |
693 | ||
694 | for (dst, src) in self.left_u.iter_mut().zip(sblk.chroma[0].chunks_exact(8)) { | |
695 | *dst = src[7]; | |
696 | } | |
697 | self.tl_u = self.top_line_u[mb_x * 8 + 8]; | |
698 | self.top_line_u[mb_x * 8 + 1..][..8].copy_from_slice(&sblk.chroma[0][7 * 8..]); | |
699 | ||
700 | for (dst, src) in self.left_v.iter_mut().zip(sblk.chroma[1].chunks_exact(8)) { | |
701 | *dst = src[7]; | |
702 | } | |
703 | self.tl_v = self.top_line_v[mb_x * 8 + 8]; | |
704 | self.top_line_v[mb_x * 8 + 1..][..8].copy_from_slice(&sblk.chroma[1][7 * 8..]); | |
705 | } | |
706 | pub fn fill_ipred(&mut self, plane: usize, mb_x: usize, ipred: &mut IPredContext) { | |
707 | match plane { | |
708 | 0 => { | |
709 | if ipred.has_top { | |
710 | ipred.top.copy_from_slice(&self.top_line_y[mb_x * 16 + 1..][..16]); | |
711 | ipred.tl = self.tl_y; | |
712 | } | |
713 | ipred.left.copy_from_slice(&self.left_y); | |
714 | ipred.has_left = mb_x > 0; | |
715 | }, | |
716 | 1 => { | |
717 | if ipred.has_top { | |
718 | ipred.top[..8].copy_from_slice(&self.top_line_u[mb_x * 8 + 1..][..8]); | |
719 | ipred.tl = self.tl_u; | |
720 | } | |
721 | ipred.left.copy_from_slice(&self.left_u); | |
722 | ipred.has_left = mb_x > 0; | |
723 | }, | |
724 | _ => { | |
725 | if ipred.has_top { | |
726 | ipred.top[..8].copy_from_slice(&self.top_line_v[mb_x * 8 + 1..][..8]); | |
727 | ipred.tl = self.tl_v; | |
728 | } | |
729 | ipred.left.copy_from_slice(&self.left_v); | |
730 | ipred.has_left = mb_x > 0; | |
731 | }, | |
732 | } | |
733 | } | |
734 | pub fn get_ipred_tr(&self, mb_x: usize) -> [u8; 4] { | |
735 | if mb_x < self.mb_w - 1 { | |
736 | let mut tr = [0; 4]; | |
737 | tr.copy_from_slice(&self.top_line_y[mb_x * 16 + 1 + 16..][..4]); | |
738 | tr | |
739 | } else { | |
740 | [0x80; 4] | |
741 | } | |
742 | } | |
743 | pub fn fill_pctx(&self, mb_x: usize, pctx: &mut BlockPCtx) { | |
744 | pctx.nz_y2 = (self.nz_y2_left as u8) + (self.nz_y2_top[mb_x] as u8); | |
745 | pctx.nz_y_left = self.nz_y_left; | |
746 | pctx.nz_y_top.copy_from_slice(&self.nz_y_top[mb_x * 4..][..4]); | |
747 | pctx.nz_c_left = self.nz_c_left; | |
748 | pctx.nz_c_top = [[self.nz_c_top[0][mb_x * 2], self.nz_c_top[0][mb_x * 2 + 1]], | |
749 | [self.nz_c_top[1][mb_x * 2], self.nz_c_top[1][mb_x * 2 + 1]]]; | |
750 | } | |
751 | pub fn set_nz(&mut self, mb_x: usize, blk: &Residue) { | |
752 | if blk.has_dc { | |
753 | let has_nz = blk.dcs.has_nz(); | |
754 | self.nz_y2_left = has_nz; | |
755 | self.nz_y2_top[mb_x] = has_nz; | |
756 | } | |
757 | for (y, blk_row) in blk.luma.chunks(4).enumerate() { | |
758 | for (x, blk) in blk_row.iter().enumerate() { | |
759 | let has_nz = blk.has_nz(); | |
760 | self.nz_y_left[y] = has_nz; | |
761 | self.nz_y_top[mb_x * 4 + x] = has_nz; | |
762 | } | |
763 | } | |
764 | for (c, chroma) in blk.chroma.iter().enumerate() { | |
765 | for (y, blk_row) in chroma.chunks(2).enumerate() { | |
766 | for (x, blk) in blk_row.iter().enumerate() { | |
767 | let has_nz = blk.has_nz(); | |
768 | self.nz_c_left[c][y] = has_nz; | |
769 | self.nz_c_top[c][mb_x * 2 + x] = has_nz; | |
770 | } | |
771 | } | |
772 | } | |
773 | } | |
774 | ||
775 | pub fn get_y2_dc_pred(&self, last: bool) -> i16 { | |
776 | let ref_id = !last as usize; | |
777 | if self.dc_count[ref_id] > 3 { | |
778 | self.dc_last[ref_id] | |
779 | } else { | |
780 | 0 | |
781 | } | |
782 | } | |
783 | pub fn predict_y2_dc(&mut self, dc: &mut i16, last: bool) { | |
784 | let ref_id = !last as usize; | |
785 | let pdc = self.dc_last[ref_id]; | |
786 | let orig_dc = *dc; | |
787 | ||
788 | if self.dc_count[ref_id] > 3 { | |
789 | *dc -= pdc; | |
790 | } | |
791 | ||
792 | if (pdc == 0) || (orig_dc == 0) || ((pdc ^ orig_dc) < 0) { | |
793 | self.dc_count[ref_id] = 0; | |
794 | } else if pdc == orig_dc { | |
795 | self.dc_count[ref_id] += 1; | |
796 | } | |
797 | self.dc_last[ref_id] = orig_dc; | |
798 | } | |
799 | ||
800 | pub fn fill_mv(&mut self, mb_x: usize, mb_y: usize, mv: MV) { | |
801 | let mut iidx = mb_x * 4 + mb_y * 4 * self.mv_stride; | |
802 | for _ in 0..4 { | |
803 | for x in 0..4 { | |
804 | self.mvs[iidx + x] = mv; | |
805 | } | |
806 | iidx += self.mb_w * 4; | |
807 | } | |
808 | } | |
809 | pub fn find_mv_pred(&self, mb_x: usize, mb_y: usize) -> ([u8; 4], MV, MV, MV) { | |
810 | let mut nearest_mv = ZERO_MV; | |
811 | let mut near_mv = ZERO_MV; | |
812 | ||
813 | let mut ct: [u8; 4] = [0; 4]; | |
814 | ||
815 | let start = if self.version == 0 { 1 } else { 0 }; | |
816 | let mvwrap = (self.mb_w as isize) + 1; | |
817 | for (yoff, xoff, weight, blk_no) in CAND_POS.iter() { | |
818 | let cx = (mb_x as isize) + (*xoff as isize); | |
819 | let cy = (mb_y as isize) + (*yoff as isize); | |
820 | let mvpos = cx + cy * mvwrap; | |
821 | if (mvpos < start) || ((mvpos % mvwrap) == (mvwrap - 1)) { | |
822 | ct[0] += weight; | |
823 | continue; | |
824 | } | |
825 | let cx = (mvpos % mvwrap) as usize; | |
826 | let cy = (mvpos / mvwrap) as usize; | |
827 | let bx = (*blk_no as usize) & 3; | |
828 | let by = (*blk_no as usize) >> 2; | |
829 | let blk_pos = cx * 4 + bx + (cy * 4 + by) * self.mv_stride; | |
830 | let mv = self.mvs[blk_pos]; | |
831 | if mv == ZERO_MV { | |
832 | ct[0] += weight; | |
833 | continue; | |
834 | } | |
835 | let idx; | |
836 | if (nearest_mv == ZERO_MV) || (nearest_mv == mv) { | |
837 | nearest_mv = mv; | |
838 | idx = 1; | |
839 | } else if near_mv == ZERO_MV { | |
840 | near_mv = mv; | |
841 | idx = 2; | |
842 | } else { | |
843 | idx = if mv == near_mv { 2 } else { 3 }; | |
844 | } | |
845 | ct[idx] += weight; | |
846 | } | |
847 | let pred_mv = if ct[1] > ct[2] { | |
848 | if ct[1] >= ct[0] { nearest_mv } else { ZERO_MV } | |
849 | } else { | |
850 | if ct[2] >= ct[0] { near_mv } else { ZERO_MV } | |
851 | }; | |
852 | ||
853 | let mvprobs = [INTER_MODE_PROBS[ct[0] as usize][0], | |
854 | INTER_MODE_PROBS[ct[1] as usize][1], | |
855 | INTER_MODE_PROBS[ct[2] as usize][2], | |
856 | INTER_MODE_PROBS[ct[2] as usize][3]]; | |
857 | ||
858 | (mvprobs, nearest_mv, near_mv, pred_mv) | |
859 | } | |
860 | } |