rv6: change loop filter to a version used in newer binary specifications
[nihav.git] / nihav-realmedia / src / codecs / rv60dsp.rs
CommitLineData
cd830591 1use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame};
b4d5b851
KS
2use nihav_codec_support::codecs::MV;
3use nihav_codec_support::codecs::blockdsp::edge_emu;
52aad9fe
KS
4
5fn clip8(val: i16) -> u8 { val.min(255).max(0) as u8 }
6
7macro_rules! el {
8 ($s: ident, $o: expr) => ( $s[$o] as i16 )
9}
10
11macro_rules! filter {
12 (01; $s: ident, $o: expr, $step: expr) => (
13 clip8((( el!($s, $o - 2 * $step)
14 -5 * el!($s, $o - 1 * $step)
15 +52 * el!($s, $o - 0 * $step)
16 +20 * el!($s, $o + 1 * $step)
17 -5 * el!($s, $o + 2 * $step)
18 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
19 );
20 (02; $s: ident, $o: expr, $step: expr) => (
21 clip8((( el!($s, $o - 2 * $step)
22 -5 * el!($s, $o - 1 * $step)
23 +20 * el!($s, $o - 0 * $step)
24 +20 * el!($s, $o + 1 * $step)
25 -5 * el!($s, $o + 2 * $step)
26 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
27 );
28 (03; $s: ident, $o: expr, $step: expr) => (
29 clip8((( el!($s, $o - 2 * $step)
30 -5 * el!($s, $o - 1 * $step)
31 +20 * el!($s, $o - 0 * $step)
32 +52 * el!($s, $o + 1 * $step)
33 -5 * el!($s, $o + 2 * $step)
34 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
35 );
36}
37
38macro_rules! filter_row {
39 ($d: ident, $do: expr, $s: ident, $so: expr, $step: expr, $size: expr, $mode: expr) => ({
40 match $mode {
41 1 => {
42 for x in 0..$size {
43 $d[$do + x] = filter!(01; $s, $so + x, $step);
44 }
45 },
46 2 => {
47 for x in 0..$size {
48 $d[$do + x] = filter!(02; $s, $so + x, $step);
49 }
50 },
51 3 => {
52 for x in 0..$size {
53 $d[$do + x] = filter!(03; $s, $so + x, $step);
54 }
55 },
56 _ => {},
57 };
58 });
59}
60
b7c882c1 61#[allow(clippy::cognitive_complexity)]
52aad9fe
KS
62fn luma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, w: usize, h: usize, cx: usize, cy: usize) {
63 if (cx == 0) && (cy == 0) {
64 for _ in 0..h {
fa57381e 65 dst[didx..][..w].copy_from_slice(&src[sidx..][..w]);
52aad9fe
KS
66 didx += dstride;
67 sidx += sstride;
68 }
69 } else if cy == 0 {
70 for _ in 0..h {
71 filter_row!(dst, didx, src, sidx, 1, w, cx);
72 didx += dstride;
73 sidx += sstride;
74 }
75 } else if cx == 0 {
76 for _ in 0..h {
77 filter_row!(dst, didx, src, sidx, sstride, w, cy);
78 didx += dstride;
79 sidx += sstride;
80 }
81 } else if (cx != 3) || (cy != 3) {
82 let mut tmp: [u8; 70 * 64] = [0; 70 * 64];
83 for y in 0..h+5 {
84 filter_row!(tmp, y * 64, src, sidx - sstride * 2, 1, w, cx);
85 sidx += sstride;
86 }
87 for y in 0..h {
88 filter_row!(dst, didx, tmp, (y + 2) * 64, 64, w, cy);
89 didx += dstride;
90 }
91 } else {
92 for _ in 0..h {
93 for x in 0..w {
94 dst[didx + x] = ((el!(src, sidx + x) + el!(src, sidx + x + 1) +
95 el!(src, sidx + x + sstride) + el!(src, sidx + x + 1 + sstride) + 2) >> 2) as u8;
96 }
97 didx += dstride;
98 sidx += sstride;
99 }
100 }
101}
102
103fn chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, w: usize, h: usize, x: usize, y: usize) {
104 if (x == 0) && (y == 0) {
105 for _ in 0..h {
fa57381e 106 dst[didx..][..w].copy_from_slice(&src[sidx..][..w]);
52aad9fe
KS
107 didx += dstride;
108 sidx += sstride;
109 }
110 return;
111 }
112 if (x > 0) && (y > 0) {
2eefcf79
KS
113 // 3,3 case is the same as 3,2 for some reason
114 let ymod = if (x == 3) && (y == 3) { 2 } else { y };
115 let a = ((4 - x) * (4 - ymod)) as u16;
116 let b = (( x) * (4 - ymod)) as u16;
117 let c = ((4 - x) * ( ymod)) as u16;
118 let d = (( x) * ( ymod)) as u16;
52aad9fe
KS
119 for _ in 0..h {
120 for x in 0..w {
121 dst[didx + x] = ((a * (src[sidx + x] as u16)
122 + b * (src[sidx + x + 1] as u16)
123 + c * (src[sidx + x + sstride] as u16)
124 + d * (src[sidx + x + 1 + sstride] as u16) + 8) >> 4) as u8;
125 }
126 didx += dstride;
127 sidx += sstride;
128 }
129 } else {
130 let a = ((4 - x) * (4 - y)) as u16;
131 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
132 let step = if y > 0 { sstride } else { 1 };
133 for _ in 0..h {
134 for x in 0..w {
135 dst[didx + x] = ((a * (src[sidx + x] as u16)
136 + e * (src[sidx + x + step] as u16) + 8) >> 4) as u8;
137 }
138 didx += dstride;
139 sidx += sstride;
140 }
141 }
142}
143
144fn check_pos(x: usize, y: usize, cw: usize, ch: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
145 let xn = (x as isize) + (dx as isize);
146 let yn = (y as isize) + (dy as isize);
147
148 (xn - e0 >= 0) && (xn + (cw as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (ch as isize) + e3 <= (h as isize))
149}
150
151macro_rules! diff{
152 ($src: ident, $e1: expr, $e2: expr) => (
153 ($src[$e1] as i16) - ($src[$e2] as i16)
154 )
155}
156macro_rules! strength{
d649acc9 157 ($el: expr, $lim: expr) => (if $el.abs() < $lim { 3 } else { 1 })
52aad9fe
KS
158}
159fn clip_symm(val: i16, lim: i16) -> i16 { val.max(-lim).min(lim) }
160
161fn filter_luma_edge(dst: &mut [u8], mut offset: usize, step: usize, stride: usize, mode1: u8, mode2: u8, lim1: i16, lim2: i16) {
162 let mut diff_q1q0: [i16; 4] = [0; 4];
163 let mut diff_p1p0: [i16; 4] = [0; 4];
164 for i in 0..4 {
165 let off = offset + i * stride;
166 diff_q1q0[i] = diff!(dst, off - 2 * step, off - step);
167 diff_p1p0[i] = diff!(dst, off + step, off);
168 }
169 let str_p = strength!(diff_q1q0[0] + diff_q1q0[1] + diff_q1q0[2] + diff_q1q0[3], lim2);
170 let str_q = strength!(diff_p1p0[0] + diff_p1p0[1] + diff_p1p0[2] + diff_p1p0[3], lim2);
171 if str_p + str_q > 2 {
172 let msum = ((mode1 + mode2 + str_q + str_p) >> 1) as i16;
9e4a63b2 173 let (maxprod, weak) = if (str_q == 1) || (str_p == 1) { (384, true) } else { (256, false) };
52aad9fe
KS
174 for y in 0..4 {
175 let diff_p0q0 = diff!(dst, offset, offset - step);
9e4a63b2 176 if (diff_p0q0 != 0) && (((lim1 * diff_p0q0.abs()) & !0x7F) <= maxprod) {
52aad9fe
KS
177 let diff_q1q2 = diff!(dst, offset - 2 * step, offset - 3 * step);
178 let diff_p1p2 = diff!(dst, offset + step, offset + 2 * step);
179 let delta = if weak {
180 clip_symm((diff_p0q0 + 1) >> 1, msum >> 1)
181 } else {
182 let diff_strg = (diff!(dst, offset - 2 * step, offset + step) + 4 * diff_p0q0 + 4) >> 3;
183 clip_symm(diff_strg, msum)
184 };
185 dst[offset - step] = clip8((dst[offset - step] as i16) + delta);
186 dst[offset] = clip8((dst[offset] as i16) - delta);
d649acc9 187 if (str_q != 1) && (diff_q1q2.abs() <= (lim2 >> 2)) {
52aad9fe
KS
188 let diff = (diff_q1q0[y] + diff_q1q2 - delta) >> 1;
189 let delta_q1 = if weak {
190 clip_symm(diff, (mode1 >> 1) as i16)
191 } else {
192 clip_symm(diff, mode1 as i16)
193 };
194 dst[offset - 2 * step] = clip8((dst[offset - 2 * step] as i16) - delta_q1);
195 }
d649acc9 196 if (str_p != 1) && (diff_p1p2.abs() <= (lim2 >> 2)) {
52aad9fe
KS
197 let diff = (diff_p1p0[y] + diff_p1p2 + delta) >> 1;
198 let delta_p1 = if weak {
199 clip_symm(diff, (mode2 >> 1) as i16)
200 } else {
201 clip_symm(diff, mode2 as i16)
202 };
203 dst[offset + step] = clip8((dst[offset + step] as i16) - delta_p1);
204 }
205 }
206 offset += stride;
207 }
208 }
209}
210fn filter_chroma_edge(dst: &mut [u8], mut offset: usize, step: usize, stride: usize, mode1: u8, mode2: u8, lim1: i16, lim2: i16) {
211 let diff_q = 4 * diff!(dst, offset - 2 * step, offset - step).abs();
212 let diff_p = 4 * diff!(dst, offset + step, offset ).abs();
213 let str_q = strength!(diff_q, lim2);
214 let str_p = strength!(diff_p, lim2);
215 if str_p + str_q > 2 {
216 let msum = ((mode1 + mode2 + str_q + str_p) >> 1) as i16;
9e4a63b2 217 let (maxprod, weak) = if (str_q == 1) || (str_p == 1) { (384, true) } else { (256, false) };
52aad9fe
KS
218 for _ in 0..2 {
219 let diff_pq = diff!(dst, offset, offset - step);
9e4a63b2 220 if (diff_pq != 0) && (((lim1 * diff_pq.abs()) & !0x7F) <= maxprod) {
52aad9fe
KS
221 let delta = if weak {
222 clip_symm((diff_pq + 1) >> 1, msum >> 1)
223 } else {
224 let diff_strg = (diff!(dst, offset - 2 * step, offset + step) + 4 * diff_pq + 4) >> 3;
225 clip_symm(diff_strg, msum)
226 };
227 dst[offset - step] = clip8((dst[offset - step] as i16) + delta);
228 dst[offset] = clip8((dst[offset] as i16) - delta);
229 }
230 offset += stride;
231 }
232 }
233}
234
235pub struct RV60DeblockParams {
236 pub deblock_chroma: bool,
237 pub width: usize,
238 pub height: usize,
239 pub dblkstride: usize,
240}
241
242pub struct RV60DSP {}
243/*pub fn rv6_transform4x4_dc(coeffs: &mut [i16]) {
244 let dc = (((coeffs[0] * 13 + 0x10) >> 5) * 13 + 0x10) >> 5;
245 for el in coeffs.iter_mut().take(16) {
246 *el = dc;
247 }
248}*/
249
250impl RV60DSP {
251 pub fn new() -> Self { Self{} }
252 pub fn transform4x4(&self, blk: &mut [i16]) {
253 let mut tmp: [i32; 4 * 4] = [0; 4 * 4];
254
255 for i in 0..4 {
256 let a = blk[i + 0 * 4] as i32;
257 let b = blk[i + 1 * 4] as i32;
258 let c = blk[i + 2 * 4] as i32;
259 let d = blk[i + 3 * 4] as i32;
260
261 let t0 = 13 * (a + c);
262 let t1 = 13 * (a - c);
263 let t2 = 7 * b - 17 * d;
264 let t3 = 7 * d + 17 * b;
265 tmp[i + 0 * 4] = (t0 + t3 + 0x10) >> 5;
266 tmp[i + 1 * 4] = (t1 + t2 + 0x10) >> 5;
267 tmp[i + 2 * 4] = (t1 - t2 + 0x10) >> 5;
268 tmp[i + 3 * 4] = (t0 - t3 + 0x10) >> 5;
269 }
270 for (dst, src) in blk.chunks_mut(4).zip(tmp.chunks(4)) {
271 let a = src[0];
272 let b = src[1];
273 let c = src[2];
274 let d = src[3];
275
276 let t0 = 13 * (a + c);
277 let t1 = 13 * (a - c);
278 let t2 = 7 * b - 17 * d;
279 let t3 = 7 * d + 17 * b;
280 dst[0] = ((t0 + t3 + 0x10) >> 5) as i16;
281 dst[1] = ((t1 + t2 + 0x10) >> 5) as i16;
282 dst[2] = ((t1 - t2 + 0x10) >> 5) as i16;
283 dst[3] = ((t0 - t3 + 0x10) >> 5) as i16;
284 }
285 }
286 /*pub fn transform8x8_dc(&self, blk: &mut [i16]) {
287 assert!(blk.len() >= 8 * 8);
288 let dc = (((coeffs[0] * 37 + 0x40) >> 7) * 37 + 0x40) >> 7;
289 for el in coeffs.iter_mut().take(8 * 8) {
290 *el = dc;
291 }
292 }*/
293 pub fn transform8x8(&self, blk: &mut [i16]) {
294 assert!(blk.len() >= 8 * 8);
295 let mut tmp: [i32; 8 * 8] = [0; 8 * 8];
296 for i in 0..8 {
297 let s0 = blk[i + 0 * 8] as i32;
298 let s1 = blk[i + 1 * 8] as i32;
299 let s2 = blk[i + 2 * 8] as i32;
300 let s3 = blk[i + 3 * 8] as i32;
301 let s4 = blk[i + 4 * 8] as i32;
302 let s5 = blk[i + 5 * 8] as i32;
303 let s6 = blk[i + 6 * 8] as i32;
304 let s7 = blk[i + 7 * 8] as i32;
305
306 let t0 = 37 * (s0 + s4);
307 let t1 = 37 * (s0 - s4);
308 let t2 = 48 * s2 + 20 * s6;
309 let t3 = 20 * s2 - 48 * s6;
310 let t4 = t0 + t2;
311 let t5 = t0 - t2;
312 let t6 = t1 + t3;
313 let t7 = t1 - t3;
314 let t8 = 51 * s1 + 43 * s3 + 29 * s5 + 10 * s7;
315 let t9 = 43 * s1 - 10 * s3 - 51 * s5 - 29 * s7;
316 let ta = 29 * s1 - 51 * s3 + 10 * s5 + 43 * s7;
317 let tb = 10 * s1 - 29 * s3 + 43 * s5 - 51 * s7;
318 tmp[i + 0 * 8] = (t4 + t8 + 0x40) >> 7;
319 tmp[i + 1 * 8] = (t6 + t9 + 0x40) >> 7;
320 tmp[i + 2 * 8] = (t7 + ta + 0x40) >> 7;
321 tmp[i + 3 * 8] = (t5 + tb + 0x40) >> 7;
322 tmp[i + 4 * 8] = (t5 - tb + 0x40) >> 7;
323 tmp[i + 5 * 8] = (t7 - ta + 0x40) >> 7;
324 tmp[i + 6 * 8] = (t6 - t9 + 0x40) >> 7;
325 tmp[i + 7 * 8] = (t4 - t8 + 0x40) >> 7;
326 }
327 for (dst, src) in blk.chunks_mut(8).zip(tmp.chunks(8)) {
328 let s0 = src[0];
329 let s1 = src[1];
330 let s2 = src[2];
331 let s3 = src[3];
332 let s4 = src[4];
333 let s5 = src[5];
334 let s6 = src[6];
335 let s7 = src[7];
336
337 let t0 = 37 * (s0 + s4);
338 let t1 = 37 * (s0 - s4);
339 let t2 = 48 * s2 + 20 * s6;
340 let t3 = 20 * s2 - 48 * s6;
341 let t4 = t0 + t2;
342 let t5 = t0 - t2;
343 let t6 = t1 + t3;
344 let t7 = t1 - t3;
345 let t8 = 51 * s1 + 43 * s3 + 29 * s5 + 10 * s7;
346 let t9 = 43 * s1 - 10 * s3 - 51 * s5 - 29 * s7;
347 let ta = 29 * s1 - 51 * s3 + 10 * s5 + 43 * s7;
348 let tb = 10 * s1 - 29 * s3 + 43 * s5 - 51 * s7;
349 dst[0] = ((t4 + t8 + 0x40) >> 7) as i16;
350 dst[1] = ((t6 + t9 + 0x40) >> 7) as i16;
351 dst[2] = ((t7 + ta + 0x40) >> 7) as i16;
352 dst[3] = ((t5 + tb + 0x40) >> 7) as i16;
353 dst[4] = ((t5 - tb + 0x40) >> 7) as i16;
354 dst[5] = ((t7 - ta + 0x40) >> 7) as i16;
355 dst[6] = ((t6 - t9 + 0x40) >> 7) as i16;
356 dst[7] = ((t4 - t8 + 0x40) >> 7) as i16;
357 }
358 }
359 /*pub fn transform16x16_dc(&self, blk: &mut [i16; 16 * 16]) {
360 let dc = (((coeffs[0] * 26 + 0x40) >> 7) * 26 + 0x40) >> 7;
361 for el in coeffs.iter_mut() {
362 *el = dc;
363 }
364 }*/
365 #[allow(non_snake_case)]
366 fn transform16(blk: &mut [i16; 16 * 16], off: usize, step: usize) {
367 let src0 = blk[off + 0 * step] as i32;
368 let src1 = blk[off + 1 * step] as i32;
369 let src2 = blk[off + 2 * step] as i32;
370 let src3 = blk[off + 3 * step] as i32;
371 let src4 = blk[off + 4 * step] as i32;
372 let src5 = blk[off + 5 * step] as i32;
373 let src6 = blk[off + 6 * step] as i32;
374 let src7 = blk[off + 7 * step] as i32;
375 let src8 = blk[off + 8 * step] as i32;
376 let src9 = blk[off + 9 * step] as i32;
377 let srcA = blk[off + 10 * step] as i32;
378 let srcB = blk[off + 11 * step] as i32;
379 let srcC = blk[off + 12 * step] as i32;
380 let srcD = blk[off + 13 * step] as i32;
381 let srcE = blk[off + 14 * step] as i32;
382 let srcF = blk[off + 15 * step] as i32;
383 let t0 = 26 * (src0 + src8);
384 let t1 = 26 * (src0 - src8);
385 let t2 = 14 * src4 - 34 * srcC;
386 let t3 = 34 * src4 + 14 * srcC;
387 let t4 = t0 + t3;
388 let t5 = t0 - t3;
389 let t6 = t1 + t2;
390 let t7 = t1 - t2;
391 let tmp00 = 31 * src2 + -7 * src6 + -36 * srcA + -20 * srcE;
392 let tmp01 = 36 * src2 + 31 * src6 + 20 * srcA + 7 * srcE;
393 let tmp02 = 20 * src2 + -36 * src6 + 7 * srcA + 31 * srcE;
394 let tmp03 = 7 * src2 + -20 * src6 + 31 * srcA + -36 * srcE;
395 let tm0 = t4 + tmp01;
396 let tm1 = t4 - tmp01;
397 let tm2 = t5 + tmp03;
398 let tm3 = t5 - tmp03;
399 let tm4 = t6 + tmp00;
400 let tm5 = t6 - tmp00;
401 let tm6 = t7 + tmp02;
402 let tm7 = t7 - tmp02;
403 let tt0 = 37 * src1 + 35 * src3 + 32 * src5 + 28 * src7 + 23 * src9 + 17 * srcB + 11 * srcD + 4 * srcF;
404 let tt1 = 35 * src1 + 23 * src3 + 4 * src5 + -17 * src7 + -32 * src9 + -37 * srcB + -28 * srcD + -11 * srcF;
405 let tt2 = 32 * src1 + 4 * src3 + -28 * src5 + -35 * src7 + -11 * src9 + 23 * srcB + 37 * srcD + 17 * srcF;
406 let tt3 = 28 * src1 + -17 * src3 + -35 * src5 + 4 * src7 + 37 * src9 + 11 * srcB + -32 * srcD + -23 * srcF;
407 let tt4 = 23 * src1 + -32 * src3 + -11 * src5 + 37 * src7 + -4 * src9 + -35 * srcB + 17 * srcD + 28 * srcF;
408 let tt5 = 17 * src1 + -37 * src3 + 23 * src5 + 11 * src7 + -35 * src9 + 28 * srcB + 4 * srcD + -32 * srcF;
409 let tt6 = 11 * src1 + -28 * src3 + 37 * src5 + -32 * src7 + 17 * src9 + 4 * srcB + -23 * srcD + 35 * srcF;
410 let tt7 = 4 * src1 + -11 * src3 + 17 * src5 + -23 * src7 + 28 * src9 + -32 * srcB + 35 * srcD + -37 * srcF;
411 blk[off + 0 * step] = ((tm0 + tt0 + 64) >> 7) as i16;
412 blk[off + 1 * step] = ((tm4 + tt1 + 64) >> 7) as i16;
413 blk[off + 2 * step] = ((tm6 + tt2 + 64) >> 7) as i16;
fc85bd90 414 blk[off + 3 * step] = ((tm2 + tt3 + 64) >> 7) as i16;
52aad9fe
KS
415 blk[off + 4 * step] = ((tm3 + tt4 + 64) >> 7) as i16;
416 blk[off + 5 * step] = ((tm7 + tt5 + 64) >> 7) as i16;
417 blk[off + 6 * step] = ((tm5 + tt6 + 64) >> 7) as i16;
418 blk[off + 7 * step] = ((tm1 + tt7 + 64) >> 7) as i16;
419 blk[off + 8 * step] = ((tm1 - tt7 + 64) >> 7) as i16;
420 blk[off + 9 * step] = ((tm5 - tt6 + 64) >> 7) as i16;
421 blk[off + 10 * step] = ((tm7 - tt5 + 64) >> 7) as i16;
422 blk[off + 11 * step] = ((tm3 - tt4 + 64) >> 7) as i16;
423 blk[off + 12 * step] = ((tm2 - tt3 + 64) >> 7) as i16;
424 blk[off + 13 * step] = ((tm6 - tt2 + 64) >> 7) as i16;
425 blk[off + 14 * step] = ((tm4 - tt1 + 64) >> 7) as i16;
426 blk[off + 15 * step] = ((tm0 - tt0 + 64) >> 7) as i16;
427 }
428 pub fn transform16x16(&self, blk: &mut [i16; 16 * 16]) {
429 for i in 0..16 {
430 Self::transform16(blk, i, 16);
431 }
432 for i in 0..16 {
433 Self::transform16(blk, i * 16, 1);
434 }
435 }
436
437 pub fn add_block(&self, dst: &mut [u8], mut doff: usize, dstride: usize, blk: &[i16], size: usize) {
438 for y in 0..size {
439 for x in 0..size {
440 dst[doff + x] = clip8((dst[doff + x] as i16) + blk[x + y * size]);
441 }
442 doff += dstride;
443 }
444 }
445 fn avg(&self, dst: &mut [u8], mut didx: usize, dstride: usize,
446 src: &[u8], mut sidx: usize, sstride: usize,
447 w: usize, h: usize) {
448 for _ in 0..h {
449 for x in 0..w {
450 dst[didx + x] = (((dst[didx + x] as u16) + (src[sidx + x] as u16)) >> 1) as u8;
451 }
452 didx += dstride;
453 sidx += sstride;
454 }
455 }
cd830591 456 pub fn do_avg(&self, frame: &mut NASimpleVideoFrame<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, w: usize, h: usize) {
52aad9fe 457 for comp in 0..3 {
cd830591 458 let dstride = frame.stride[comp];
52aad9fe 459 let sstride = prev_frame.get_stride(comp);
cd830591 460 let doff = if comp == 0 { x + y * dstride } else { frame.offset[comp] + (x >> 1) + (y >> 1) * dstride };
52aad9fe 461 let soff = prev_frame.get_offset(comp);
cd830591 462 let dst = &mut frame.data;
52aad9fe
KS
463 let sdata = prev_frame.get_data();
464 let src: &[u8] = sdata.as_slice();
465
466 if comp == 0 {
467 self.avg(dst, doff, dstride, src, soff, sstride, w, h);
468 } else {
469 self.avg(dst, doff, dstride, src, soff, sstride, w >> 1, h >> 1);
470 }
471 }
472 }
cd830591 473 pub fn do_mc(&self, frame: &mut NASimpleVideoFrame<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, w: usize, h: usize, mv: MV, avg: bool) {
52aad9fe 474 { // luma
cd830591
KS
475 let dstride = frame.stride[0];
476 let doffset = frame.offset[0] + (if !avg { x + y * dstride } else { 0 });
477 let dst = &mut frame.data;
52aad9fe
KS
478
479 let (w_, h_) = prev_frame.get_dimensions(0);
480 let fw = (w_ + 15) & !15;
481 let fh = (h_ + 15) & !15;
482
483 let dx = mv.x >> 2;
484 let cx = (mv.x & 3) as usize;
485 let dy = mv.y >> 2;
486 let cy = (mv.y & 3) as usize;
487
488 if check_pos(x, y, w, h, fw, fh, dx, dy, RV60_EDGE1[cx], RV60_EDGE2[cx], RV60_EDGE1[cy], RV60_EDGE2[cy]) {
489 let sstride = prev_frame.get_stride(0);
490 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
491 let data = prev_frame.get_data();
492 let src: &[u8] = data.as_slice();
493 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
494 luma_mc(dst, doffset, dstride, src, soffset, sstride, w, h, cx, cy);
495 } else {
496 let mut ebuf: [u8; 70*70] = [0; 70*70];
70d30944 497 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, w+5, h+5, &mut ebuf, 70, 0, 4);
52aad9fe
KS
498 luma_mc(dst, doffset, dstride, &ebuf, 70*2 + 2, 70, w, h, cx, cy);
499 }
500 }
501 let (w_, h_) = prev_frame.get_dimensions(1);
502 let fw = (w_ + 7) & !7;
503 let fh = (h_ + 7) & !7;
504 let mvx = mv.x / 2;
505 let mvy = mv.y / 2;
506 let dx = mvx >> 2;
507 let cx = (mvx & 3) as usize;
508 let dy = mvy >> 2;
509 let cy = (mvy & 3) as usize;
510 let cw = w >> 1;
511 let ch = h >> 1;
512
513 for comp in 1..3 { // chroma
cd830591
KS
514 let dstride = frame.stride[comp];
515 let doffset = frame.offset[comp] + (if !avg { (x >> 1) + (y >> 1) * dstride } else { 0 });
52aad9fe
KS
516 if check_pos(x >> 1, y >> 1, cw, ch, fw, fh, dx, dy, 0, 1, 0, 1) {
517 let sstride = prev_frame.get_stride(comp);
518 let mut soffset = prev_frame.get_offset(comp) + (x >> 1) + (y >> 1) * sstride;
519 let data = prev_frame.get_data();
520 let src: &[u8] = data.as_slice();
521 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
cd830591 522 chroma_mc(frame.data, doffset, dstride, src, soffset, sstride, cw, ch, cx, cy);
52aad9fe
KS
523 } else {
524 let mut ebuf: [u8; 40*40] = [0; 40*40];
70d30944 525 edge_emu(prev_frame, ((x >> 1) as isize) + (dx as isize), ((y >> 1) as isize) + (dy as isize), cw+1, ch+1, &mut ebuf, 40, comp, 3);
cd830591 526 chroma_mc(frame.data, doffset, dstride, &ebuf, 0, 40, cw, ch, cx, cy);
52aad9fe
KS
527 }
528 }
529 }
cd830591 530 fn deblock_edge4_ver(&self, frame: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize,
52aad9fe
KS
531 dblk_l: u8, dblk_r: u8, deblock_chroma: bool) {
532 let qp_l = dblk_l >> 2;
533 let str_l = dblk_l & 3;
534 let qp_r = dblk_r >> 2;
535 let str_r = dblk_r & 3;
536 let dl_l = &RV60_DEB_LIMITS[qp_l as usize];
537 let dl_r = &RV60_DEB_LIMITS[qp_r as usize];
538 let mode_l = if str_l != 0 { dl_l[(str_l - 1) as usize] } else { 0 };
539 let mode_r = if str_r != 0 { dl_r[(str_r - 1) as usize] } else { 0 };
540 let lim1 = dl_r[2] as i16;
541 let lim2 = (dl_r[3] * 4) as i16;
542 {
cd830591
KS
543 let stride = frame.stride[0];
544 let offset = frame.offset[0] + xpos + ypos * stride;
545 filter_luma_edge(frame.data, offset, 1, stride, mode_l, mode_r, lim1, lim2);
52aad9fe
KS
546 }
547 if ((str_l | str_r) >= 2) && deblock_chroma {
a15d97ad 548 for comp in 1..3 {
cd830591
KS
549 let stride = frame.stride[comp];
550 let offset = frame.offset[comp] + (xpos >> 1) + (ypos >> 1) * stride;
551 filter_chroma_edge(frame.data, offset, 1, stride, mode_l, mode_r, lim1, lim2);
52aad9fe
KS
552 }
553 }
554 }
cd830591 555 fn deblock_edge4_hor(&self, frame: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize,
52aad9fe
KS
556 dblk_t: u8, dblk_d: u8, deblock_chroma: bool) {
557 let qp_t = dblk_t >> 2;
558 let str_t = dblk_t & 3;
559 let qp_d = dblk_d >> 2;
560 let str_d = dblk_d & 3;
561 let dl_t = &RV60_DEB_LIMITS[qp_t as usize];
562 let dl_d = &RV60_DEB_LIMITS[qp_d as usize];
563 let mode_t = if str_t != 0 { dl_t[(str_t - 1) as usize] } else { 0 };
564 let mode_d = if str_d != 0 { dl_d[(str_d - 1) as usize] } else { 0 };
565 let lim1 = dl_d[2] as i16;
566 let lim2 = (dl_d[3] * 4) as i16;
567 {
cd830591
KS
568 let stride = frame.stride[0];
569 let offset = frame.offset[0] + xpos + ypos * stride;
570 filter_luma_edge(frame.data, offset, stride, 1, mode_t, mode_d, lim1, lim2);
52aad9fe
KS
571 }
572 if ((str_t | str_d) >= 2) && deblock_chroma {
a15d97ad 573 for comp in 1..3 {
cd830591
KS
574 let stride = frame.stride[comp];
575 let offset = frame.offset[comp] + (xpos >> 1) + (ypos >> 1) * stride;
576 filter_chroma_edge(frame.data, offset, stride, 1, mode_t, mode_d, lim1, lim2);
52aad9fe
KS
577 }
578 }
579 }
cd830591 580 fn deblock8x8(&self, dparams: &RV60DeblockParams, frame: &mut NASimpleVideoFrame<u8>,
52aad9fe
KS
581 xpos: usize, ypos: usize, top_str: &[u8], left_str: &[u8], dblkpos: usize) {
582 if xpos > 0 {
583 if ypos > 0 {
0091a508
KS
584 let str_l = left_str[dblkpos - dparams.dblkstride - 1];
585 let str_r = left_str[dblkpos - dparams.dblkstride];
586 if ((str_l | str_r) & 3) != 0 {
52aad9fe
KS
587 self.deblock_edge4_ver(frame, xpos, ypos - 4, str_l, str_r, dparams.deblock_chroma);
588 }
589 }
590 {
0091a508
KS
591 let str_l = left_str[dblkpos - 1];
592 let str_r = left_str[dblkpos];
593 if ((str_l | str_r) & 3) != 0 {
52aad9fe
KS
594 self.deblock_edge4_ver(frame, xpos, ypos + 0, str_l, str_r, dparams.deblock_chroma);
595 }
596 }
0091a508
KS
597 if ypos + 8 >= dparams.height {
598 let str_l = left_str[dblkpos + dparams.dblkstride - 1];
599 let str_r = left_str[dblkpos + dparams.dblkstride];
600 if ((str_l | str_r) & 3) != 0 {
52aad9fe
KS
601 self.deblock_edge4_ver(frame, xpos, ypos + 4, str_l, str_r, dparams.deblock_chroma);
602 }
603 }
604 }
605 if ypos > 0 {
606 if xpos > 0 {
0091a508
KS
607 let str_t = top_str[dblkpos - dparams.dblkstride - 1];
608 let str_d = top_str[dblkpos - 1];
609 if ((str_t | str_d) & 3) != 0 {
52aad9fe
KS
610 self.deblock_edge4_hor(frame, xpos - 4, ypos, str_t, str_d, dparams.deblock_chroma);
611 }
612 }
613 {
0091a508
KS
614 let str_t = top_str[dblkpos - dparams.dblkstride];
615 let str_d = top_str[dblkpos];
616 if ((str_t | str_d) & 3) != 0 {
52aad9fe
KS
617 self.deblock_edge4_hor(frame, xpos + 0, ypos, str_t, str_d, dparams.deblock_chroma);
618 }
619 }
0091a508
KS
620 if xpos + 8 >= dparams.width {
621 let str_t = top_str[dblkpos - dparams.dblkstride + 1];
622 let str_d = top_str[dblkpos + 1];
623 if ((str_t | str_d) & 3) != 0 {
52aad9fe
KS
624 self.deblock_edge4_hor(frame, xpos + 4, ypos, str_t, str_d, dparams.deblock_chroma);
625 }
626 }
627 }
628 }
cd830591 629 pub fn do_deblock(&self, dparams: &RV60DeblockParams, frame: &mut NASimpleVideoFrame<u8>,
52aad9fe
KS
630 xpos: usize, ypos: usize, size: usize, top_str: &[u8], left_str: &[u8], dpos: usize) {
631 for x in 0..(size >> 3) {
632 self.deblock8x8(dparams, frame, xpos + x * 8, ypos,
633 top_str, left_str, dpos + x * 2);
634 }
635 for y in 1..(size >> 3) {
636 self.deblock8x8(dparams, frame, xpos, ypos + y * 8,
637 top_str, left_str, dpos + y * 2 * dparams.dblkstride);
638 }
639 }
640}
641
642const RV60_DEB_LIMITS: [[u8; 4]; 32] = [
643 [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ],
644 [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ],
645 [ 0, 0, 128, 3 ], [ 0, 1, 128, 3 ], [ 0, 1, 122, 3 ], [ 1, 1, 96, 4 ],
646 [ 1, 1, 75, 4 ], [ 1, 1, 59, 4 ], [ 1, 1, 47, 6 ], [ 1, 1, 37, 6 ],
647 [ 1, 1, 29, 6 ], [ 1, 2, 23, 7 ], [ 1, 2, 18, 8 ], [ 1, 2, 15, 8 ],
648 [ 1, 2, 13, 9 ], [ 2, 3, 11, 9 ], [ 2, 3, 10, 10 ], [ 2, 3, 9, 10 ],
649 [ 2, 4, 8, 11 ], [ 3, 4, 7, 11 ], [ 3, 5, 6, 12 ], [ 3, 5, 5, 13 ],
650 [ 3, 5, 4, 14 ], [ 4, 7, 3, 15 ], [ 5, 8, 2, 16 ], [ 5, 9, 1, 17 ]
651];
652
653#[derive(Clone)]
654pub struct IntraPredContext {
655 pub t: [u8; 129], // 0 - TL or 0x80, two block sizes or replicated last val from block0
656 pub l: [u8; 129],
657 pub has_t: bool,
658 pub has_tr: bool,
659 pub has_l: bool,
660 pub has_ld: bool,
661}
662
663impl IntraPredContext {
664 pub fn new() -> Self {
665 Self {
666 t: [0x80; 129], l: [0x80; 129], has_t: false, has_tr: false, has_l: false, has_ld: false,
667 }
668 }
669 pub fn pred_dc(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, filter: bool) {
670 let dc;
671 if !self.has_t && !self.has_l {
672 dc = 0x80;
673 } else {
674 let mut sum = 0;
675 if self.has_t {
676 for x in 0..size { sum += self.t[x + 1] as u16; }
677 }
678 if self.has_l {
679 for y in 0..size { sum += self.l[y + 1] as u16; }
680 }
681 if self.has_t && self.has_l {
682 dc = ((sum + (size as u16)) / ((size as u16) * 2)) as u8;
683 } else {
684 dc = ((sum + ((size >> 1) as u16)) / (size as u16)) as u8;
685 }
686 }
687 for _ in 0..size {
688 for x in 0..size { dst[doff + x] = dc; }
689 doff += dstride;
690 }
691 if filter && self.has_t && self.has_l {
692 doff -= dstride * size;
693 dst[doff] = (((self.t[1] as u16) + (self.l[1] as u16) + 2 * (dst[doff] as u16) + 2) >> 2) as u8;
694 for x in 1..size {
695 dst[doff + x] = (((self.t[x + 1] as u16) + 3 * (dst[doff + x] as u16) + 2) >> 2) as u8;
696 }
697 for y in 1..size {
698 doff += dstride;
699 dst[doff] = (((self.l[y + 1] as u16) + 3 * (dst[doff] as u16) + 2) >> 2) as u8;
700 }
701 }
702 }
703 pub fn pred_plane(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize) {
704 let lastl = self.l[size + 1] as i32;
705 let lastt = self.t[size + 1] as i32;
706 let mut tmp1: [i32; 64] = [0; 64];
707 let mut tmp2: [i32; 64] = [0; 64];
708 for i in 0..size {
709 tmp1[i] = lastl - (self.t[i + 1] as i32);
710 tmp2[i] = lastt - (self.l[i + 1] as i32);
711 }
712 let shift = match size {
713 4 => 3,
714 8 => 4,
715 16 => 5,
716 32 => 6,
717 _ => 7,
718 };
719 let mut top_ref: [i32; 64] = [0; 64];
720 let mut left_ref:[i32; 64] = [0; 64];
721 for i in 0..size {
722 top_ref [i] = (self.t[i + 1] as i32) << (shift - 1);
723 left_ref[i] = (self.l[i + 1] as i32) << (shift - 1);
724 }
725 for y in 0..size {
726 let add = tmp2[y];
727 let mut sum = left_ref[y] + (size as i32);
728 for x in 0..size {
729 let v = tmp1[x] + top_ref[x];
730 sum += add;
731 top_ref[x] = v;
732 dst[doff + x] = ((sum + v) >> shift) as u8;
733 }
734 doff += dstride;
735 }
736 }
737 fn pred_hor_angle(dst: &mut [u8], doff: usize, dstride: usize, size: usize, weight: i16, src: &[u8]) {
738 let mut sum = 0;
739 for x in 0..size {
740 sum += weight;
741 let off = ((sum >> 5) + 32) as usize;
742 let frac = (sum & 0x1F) as u16;
743 if frac == 0 {
744 for y in 0..size {
745 dst[doff + x + y * dstride] = src[off + y];
746 }
747 } else {
748 for y in 0..size {
749 let a = src[off + y + 0] as u16;
750 let b = src[off + y + 1] as u16;
751 dst[doff + x + y * dstride] = (((32 - frac) * a + frac * b + 0x10) >> 5) as u8;
752 }
753 }
754 }
755 }
756 fn pred_ver_angle(dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, weight: i16, src: &[u8]) {
757 let mut sum = 0;
758 for _ in 0..size {
759 sum += weight;
760 let off = ((sum >> 5) + 32) as usize;
761 let frac = (sum & 0x1F) as u16;
762 if frac == 0 {
fa57381e 763 dst[doff..][..size].copy_from_slice(&src[off..][..size]);
52aad9fe
KS
764 } else {
765 for x in 0..size {
766 let a = src[off + x + 0] as u16;
767 let b = src[off + x + 1] as u16;
768 dst[doff + x] = (((32 - frac) * a + frac * b + 0x10) >> 5) as u8;
769 }
770 }
771 doff += dstride;
772 }
773 }
774 fn filter_weak(dst: &mut [u8], src: &[u8], size: usize) {
775 dst[0] = src[0];
776 for i in 1..size-1 {
777 dst[i] = (((src[i - 1] as u16) + 2 * (src[i] as u16) + (src[i + 1] as u16) + 2) >> 2) as u8;
778 }
779 dst[size - 1] = src[size - 1];
780 }
781 fn filter_bilin32(dst: &mut [u8], v0: u8, v1: u8, size: usize) {
782 let diff = (v1 as i16) - (v0 as i16);
783 let mut sum = ((v0 as i16) << 5) + (1 << (5 - 1));
784 for i in 0..size {
785 dst[i] = (sum >> 5) as u8;
786 sum += diff;
787 }
788 }
b7c882c1 789 #[allow(clippy::cognitive_complexity)]
52aad9fe
KS
790 pub fn pred_angle(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, angle: usize, filter: bool) {
791 let mut filtered1: [u8; 96] = [0; 96];
792 let mut filtered2: [u8; 96] = [0; 96];
793 if angle == 0 {
794 self.pred_plane(dst, doff, dstride, size);
795 } else if angle == 1 {
796 self.pred_dc(dst, doff, dstride, size, filter);
797 } else if angle <= 9 {
798 let ang_weight = RV60_IPRED_ANGLE[10 - angle];
799 let add_size = (size * (ang_weight as usize) + 31) >> 5;
800 if size <= 16 {
801 Self::filter_weak(&mut filtered1[32..], &self.l[1..], size + add_size);
802 } else {
803 Self::filter_bilin32(&mut filtered1[32..], self.l[1], self.l[33], 32);
804 Self::filter_bilin32(&mut filtered1[64..], self.l[32], self.l[64], add_size);
805 }
806 Self::pred_hor_angle(dst, doff, dstride, size, ang_weight as i16, &filtered1);
807 } else if angle == 10 {
808 if size <= 16 {
809 Self::filter_weak(&mut filtered1[32..], &self.l[1..], size);
810 } else {
811 Self::filter_bilin32(&mut filtered1[32..], self.l[1], self.l[33], 32);
812 }
813 for y in 0..size {
814 for x in 0..size {
815 dst[doff + x] = filtered1[32 + y];
816 }
817 doff += dstride;
818 }
819 if filter {
820 doff -= dstride * size;
821 let tl = self.t[0] as i16;
822 for x in 0..size {
823 dst[doff + x] = clip8((dst[doff + x] as i16) + (((self.t[x + 1] as i16) - tl) >> 1));
824 }
825 }
826 } else if angle <= 17 {
827 let ang_weight = RV60_IPRED_ANGLE [angle - 10];
828 let inv_angle = RV60_IPRED_INV_ANGLE[angle - 10];
829 let add_size = (size * (ang_weight as usize) + 31) >> 5;
830 if size <= 16 {
e07387c7 831 for i in 0..=size {
52aad9fe
KS
832 filtered1[32-1 + i] = self.l[i];
833 }
e07387c7 834 for i in 0..=size {
52aad9fe
KS
835 filtered2[32-1 + i] = self.t[i];
836 }
837 } else {
838 filtered1[32-1] = self.l[0];
839 Self::filter_bilin32(&mut filtered1[32..], self.l[0], self.l[32], 32);
840 filtered2[32-1] = self.t[0];
841 Self::filter_bilin32(&mut filtered2[32..], self.t[0], self.t[32], 32);
842 }
843 if add_size > 1 {
844 let mut sum = 0x80;
845 for i in 1..add_size {
846 sum += inv_angle;
847 let pos = ((sum >> 8) + 32 - 1) as usize;
848 filtered1[32 - 1 - i] = filtered2[pos];
849 }
850 }
851 Self::pred_hor_angle(dst, doff, dstride, size, -(ang_weight as i16), &filtered1);
852 } else if angle <= 25 {
853 let ang_weight = RV60_IPRED_ANGLE[26 - angle];
854 let inv_angle = RV60_IPRED_INV_ANGLE[26 - angle];
855 let add_size = (size * (ang_weight as usize) + 31) >> 5;
856 if size <= 16 {
e07387c7 857 for i in 0..=size {
52aad9fe
KS
858 filtered1[32-1 + i] = self.t[i];
859 }
e07387c7 860 for i in 0..=size {
52aad9fe
KS
861 filtered2[32-1 + i] = self.l[i];
862 }
863 } else {
864 filtered1[32-1] = self.t[0];
865 Self::filter_bilin32(&mut filtered1[32..], self.t[0], self.t[32], 32);
866 filtered2[32-1] = self.l[0];
867 Self::filter_bilin32(&mut filtered2[32..], self.l[0], self.l[32], 32);
868 }
869 if add_size > 1 {
870 let mut sum = 0x80;
871 for i in 1..add_size {
872 sum += inv_angle;
873 let pos = ((sum >> 8) + 32 - 1) as usize;
874 filtered1[32 - 1 - i] = filtered2[pos];
875 }
876 }
877 Self::pred_ver_angle(dst, doff, dstride, size, -(ang_weight as i16), &filtered1);
878 } else if angle == 26 {
879 if size <= 16 {
880 Self::filter_weak(&mut filtered1[32..], &self.t[1..], size);
881 } else {
882 Self::filter_bilin32(&mut filtered1[32..], self.t[1], self.t[33], 32);
883 }
884 for _ in 0..size {
fa57381e 885 dst[doff..][..size].copy_from_slice(&filtered1[32..][..size]);
52aad9fe
KS
886 doff += dstride;
887 }
888 if filter {
889 doff -= dstride * size;
890 let tl = self.l[0] as i16;
891 for y in 0..size {
892 dst[doff] = clip8((dst[doff] as i16) + (((self.l[y + 1] as i16) - tl) >> 1));
893 doff += dstride;
894 }
895 }
896 } else if angle <= 34 {
897 let ang_weight = RV60_IPRED_ANGLE[angle - 26];
898 let add_size = (size * (ang_weight as usize) + 31) >> 5;
899 if size <= 16 {
900 Self::filter_weak(&mut filtered1[32..], &self.t[1..], size + add_size);
901 } else {
902 Self::filter_bilin32(&mut filtered1[32..], self.t[1], self.t[33], 32);
903 Self::filter_bilin32(&mut filtered1[64..], self.t[32], self.t[64], add_size);
904 }
905 Self::pred_ver_angle(dst, doff, dstride, size, ang_weight as i16, &filtered1);
906 } else {
907 unreachable!();
908 }
909 }
910}
911
912const RV60_IPRED_ANGLE: [u8; 9] = [ 0, 2, 5, 9, 13, 17, 21, 26, 32 ];
913const RV60_IPRED_INV_ANGLE: [i16; 9] = [ 0, 4096, 1638, 910, 630, 482, 390, 315, 256 ];
914const RV60_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
915const RV60_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
916