]> git.nihav.org Git - nihav.git/blame - nihav-realmedia/src/codecs/rv40dsp.rs
avimux: do not record palette change chunks in OpenDML index
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
CommitLineData
5641dccf 1use nihav_core::frame::{FrameType, NAVideoBuffer};
b4d5b851
KS
2use nihav_codec_support::codecs::MV;
3use nihav_codec_support::codecs::blockdsp::edge_emu;
47527732
KS
4use super::rv3040::{RV34DSP, RV34MBInfo};
5
6fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10}
11
12macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14}
15
16macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
8877a319
KS
41 (33; $s: ident, $o: expr, $stride: expr) => (
42 clip8((( el!($s, $o)
43 + el!($s, $o + 1)
44 + el!($s, $o + $stride)
45 + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
46 );
47527732
KS
47}
48
49macro_rules! mc_func {
50 (copy; $name: ident, $size: expr) => (
51 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
52 for _ in 0..$size {
53 let d = &mut dst[didx..][..$size];
54 let s = &src[sidx..][..$size];
55 for x in 0..$size { d[x] = s[x]; }
56 didx += dstride;
57 sidx += sstride;
58 }
59 }
60 );
61 (mc01; $name: ident, $size: expr, $ver: expr) => (
62 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
63 let step = if $ver { sstride } else { 1 };
64 for _ in 0..$size {
65 for x in 0..$size {
66 dst[didx + x] = filter!(01; src, sidx + x, step);
67 }
68 sidx += sstride;
69 didx += dstride;
70 }
71 }
72 );
73 (mc02; $name: ident, $size: expr, $ver: expr) => (
74 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
75 let step = if $ver { sstride } else { 1 };
76 for _ in 0..$size {
77 for x in 0..$size {
78 dst[didx + x] = filter!(02; src, sidx + x, step);
79 }
80 sidx += sstride;
81 didx += dstride;
82 }
83 }
84 );
85 (mc03; $name: ident, $size: expr, $ver: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
87 let step = if $ver { sstride } else { 1 };
88 for _ in 0..$size {
89 for x in 0..$size {
90 dst[didx + x] = filter!(03; src, sidx + x, step);
91 }
92 sidx += sstride;
93 didx += dstride;
94 }
95 }
96 );
97 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
98 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
99 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
100 let mut bidx = 0;
101 let bstride = $size;
102 sidx -= sstride * 2;
103 for _ in 0..$size+5 {
104 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
105 bidx += bstride;
106 sidx += sstride;
107 }
108 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
109 }
110 );
111 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
112 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
113 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
114 let mut bidx = 0;
115 let bstride = $size;
116 sidx -= sstride * 2;
117 for _ in 0..$size+5 {
118 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
119 bidx += bstride;
120 sidx += sstride;
121 }
122 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
123 }
124 );
125 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
126 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
127 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
128 let mut bidx = 0;
129 let bstride = $size;
130 sidx -= sstride * 2;
131 for _ in 0..$size+5 {
132 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
133 bidx += bstride;
134 sidx += sstride;
135 }
136 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
137 }
138 );
8877a319
KS
139 (mc33; $name: ident, $size: expr) => (
140 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
141 for _ in 0..$size {
142 for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); }
143 sidx += sstride;
144 didx += dstride;
145 }
146 }
147 );
47527732
KS
148}
149mc_func!(copy; copy_16, 16);
150mc_func!(copy; copy_8, 8);
151mc_func!(mc01; luma_mc_10_16, 16, false);
152mc_func!(mc01; luma_mc_10_8, 8, false);
153mc_func!(mc02; luma_mc_20_16, 16, false);
154mc_func!(mc02; luma_mc_20_8, 8, false);
155mc_func!(mc03; luma_mc_30_16, 16, false);
156mc_func!(mc03; luma_mc_30_8, 8, false);
157mc_func!(mc01; luma_mc_01_16, 16, true);
158mc_func!(mc01; luma_mc_01_8, 8, true);
159mc_func!(mc02; luma_mc_02_16, 16, true);
160mc_func!(mc02; luma_mc_02_8, 8, true);
161mc_func!(mc03; luma_mc_03_16, 16, true);
162mc_func!(mc03; luma_mc_03_8, 8, true);
163mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
164mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
165mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
166mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
167mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
168mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
169mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
170mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
171mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
172mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
173mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
174mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
175mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
176mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
177mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
178mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
8877a319
KS
179mc_func!(mc33; luma_mc_33_16, 16);
180mc_func!(mc33; luma_mc_33_8, 8);
47527732
KS
181
182const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
183 [ 0, 4, 8, 4 ],
184 [ 8, 7, 8, 7 ],
185 [ 0, 8, 4, 8 ],
186 [ 8, 7, 8, 7 ]
187];
188
189fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
190 if (x == 0) && (y == 0) {
191 for _ in 0..size {
fa57381e 192 dst[didx..][..size].copy_from_slice(&src[sidx..][..size]);
47527732
KS
193 didx += dstride;
194 sidx += sstride;
195 }
196 return;
197 }
198 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
199 if (x > 0) && (y > 0) {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let b = (( x) * (4 - y)) as u16;
202 let c = ((4 - x) * ( y)) as u16;
203 let d = (( x) * ( y)) as u16;
204 for _ in 0..size {
205 for x in 0..size {
206 dst[didx + x] = ((a * (src[sidx + x] as u16)
207 + b * (src[sidx + x + 1] as u16)
208 + c * (src[sidx + x + sstride] as u16)
209 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
210 }
211 didx += dstride;
212 sidx += sstride;
213 }
214 } else {
215 let a = ((4 - x) * (4 - y)) as u16;
216 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
217 let step = if y > 0 { sstride } else { 1 };
218 for _ in 0..size {
219 for x in 0..size {
220 dst[didx + x] = ((a * (src[sidx + x] as u16)
221 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
222 }
223 didx += dstride;
224 sidx += sstride;
225 }
226 }
227}
228
fa57381e 229#[allow(clippy::type_complexity)]
47527732
KS
230pub struct RV40DSP {
231 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
232}
233
234impl RV40DSP {
235 pub fn new() -> Self {
236 RV40DSP {
237 luma_mc: [
1a151e53 238 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
47527732
KS
239 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
240 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
241 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
242 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
243 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
244 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
245 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
246 }
247 }
248}
249
250macro_rules! el {
251 ($src: ident, $o: expr) => ($src[$o] as i16);
252}
253
254fn clip_symm(a: i16, lim: i16) -> i16 {
255 if a < -lim {
256 -lim
257 } else if a > lim {
258 lim
259 } else {
260 a
261 }
262}
263
264fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
265 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
266 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
267 for _ in 0..4 {
268 let p0 = el!(pix, off - step);
269 let q0 = el!(pix, off);
270
271 let t = q0 - p0;
272 if t == 0 {
273 off += stride;
274 continue;
275 }
276
277 let u = (alpha * t.wrapping_abs()) >> 7;
278 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
279 off += stride;
280 continue;
281 }
282
283 let p2 = el!(pix, off - 3*step);
284 let p1 = el!(pix, off - 2*step);
285 let q1 = el!(pix, off + step);
286 let q2 = el!(pix, off + 2*step);
287
d92111a8
KS
288 let strength = if filter_p1 && filter_q1 {
289 (t << 2) + (p1 - q1)
290 } else { t << 2 };
47527732 291
817e4872 292 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
47527732
KS
293 pix[off - step] = clip8(p0 + diff);
294 pix[off ] = clip8(q0 - diff);
295
bb0e22f7 296 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
47527732
KS
297 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
298 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
299 }
300
bb0e22f7 301 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
47527732
KS
302 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
303 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
304 }
305
306 off += stride;
307 }
308}
309
310fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
311 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
312 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
313 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
314}
315fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
316 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
317 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
318 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
319 for ch in src.chunks_mut(stride).take(4) {
320 assert!(ch.len() >= 3 + 3);
321 let p0 = el!(ch, 3 - 1);
322 let q0 = el!(ch, 3);
323
324 let t = q0 - p0;
325 if t == 0 {
326 continue;
327 }
328
329 let u = (alpha * t.wrapping_abs()) >> 7;
330 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
331 continue;
332 }
333
334 let p2 = el!(ch, 3 - 3);
335 let p1 = el!(ch, 3 - 2);
336 let q1 = el!(ch, 3 + 1);
337 let q2 = el!(ch, 3 + 2);
338
d92111a8
KS
339 let strength = if filter_p1 && filter_q1 {
340 (t << 2) + (p1 - q1)
341 } else { t << 2 };
47527732 342
817e4872 343 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
47527732
KS
344 ch[3 - 1] = clip8(p0 + diff);
345 ch[3 ] = clip8(q0 - diff);
346
bb0e22f7 347 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
47527732
KS
348 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
349 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
350 }
351
bb0e22f7 352 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
47527732
KS
353 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
354 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
355 }
356 }
357}
358
359
360const RV40_DITHER_L: [i16; 16] = [
361 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
362 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
363];
364const RV40_DITHER_R: [i16; 16] = [
365 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
366 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
367];
368
369fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
370 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
371 if clip {
372 if val < c - lims {
373 c - lims
374 } else if val > c + lims {
375 c + lims
376 } else {
f84129ed 377 val
47527732
KS
378 }
379 } else {
380 val
381 }
382}
383
384fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
385 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
386 for i in 0..4 {
387 let p0 = el!(pix, off - step);
388 let q0 = el!(pix, off);
389
390 let t = q0 - p0;
391 if t == 0 {
392 off += stride;
393 continue;
394 }
395
396 let fmode = (alpha * t.wrapping_abs()) >> 7;
397 if fmode > 1 {
398 off += stride;
399 continue;
400 }
401
402 let p3 = el!(pix, off - 4*step);
403 let p2 = el!(pix, off - 3*step);
404 let p1 = el!(pix, off - 2*step);
405 let q1 = el!(pix, off + step);
406 let q2 = el!(pix, off + 2*step);
407 let q3 = el!(pix, off + 3*step);
408
409 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
bb0e22f7 410 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
47527732
KS
411
412 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
413 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
414
415 pix[off - 2*step] = np1 as u8;
416 pix[off - step] = np0 as u8;
417 pix[off] = nq0 as u8;
418 pix[off + step] = nq1 as u8;
419
420 if !chroma {
bb0e22f7
KS
421 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
422 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
47527732
KS
423 pix[off - 3*step] = np2 as u8;
424 pix[off + 2*step] = nq2 as u8;
425 }
426
427 off += stride;
428 }
429}
430
431fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
432 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
433 let mut sum_p1p0 = 0;
434 let mut sum_q1q0 = 0;
435
436 let mut off1 = off;
437 for _ in 0..4 {
438 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
439 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
440 off1 += stride;
441 }
442
443 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
444 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
445
446 if (!filter_p1 || !filter_q1) || !edge {
447 return (false, filter_p1, filter_q1);
448 }
449
450 let mut sum_p1p2 = 0;
451 let mut sum_q1q2 = 0;
452
453 let mut off1 = off;
454 for _ in 0..4 {
455 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
456 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
457 off1 += stride;
458 }
459
460 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
461
462 (strong, filter_p1, filter_q1)
463}
464
465fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
466 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
467 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
468}
469
470fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
471 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
472 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
473 let mut sum_p1p0 = 0;
474 let mut sum_q1q0 = 0;
475
476 for ch in src.chunks(stride).take(4) {
477 assert!(ch.len() >= 3 + 3);
478 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
479 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
480 }
481
482 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
483 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
484
485 if (!filter_p1 || !filter_q1) || !edge {
486 return (false, filter_p1, filter_q1);
487 }
488
489 let mut sum_p1p2 = 0;
490 let mut sum_q1q2 = 0;
491
492 for ch in src.chunks(stride).take(4) {
493 assert!(ch.len() >= 3 + 3);
494 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
495 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
496 }
497
498 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
499
500 (strong, filter_p1, filter_q1)
501}
502
503fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
504 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
505 chroma: bool, edge: bool) {
506 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
507 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
508
509 if strong {
510 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
511 } else if filter_p1 && filter_q1 {
512 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
513 lims, lim_p1, lim_q1);
514 } else if filter_p1 || filter_q1 {
515 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
516 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
517 }
518}
519
520fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
521 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
522 chroma: bool, edge: bool) {
523 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
524 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
525
526 if strong {
527 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
528 } else if filter_p1 && filter_q1 {
529 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
530 lims, lim_p1, lim_q1);
531 } else if filter_p1 || filter_q1 {
532 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
533 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
534 }
535}
536
537const RV40_ALPHA_TAB: [i16; 32] = [
538 128, 128, 128, 128, 128, 128, 128, 128,
539 128, 128, 122, 96, 75, 59, 47, 37,
540 29, 23, 18, 15, 13, 11, 10, 9,
541 8, 7, 6, 5, 4, 3, 2, 1
542];
543
544const RV40_BETA_TAB: [i16; 32] = [
545 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
546 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
547];
548
549const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
550 [
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
553 ], [
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
555 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
556 ], [
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
558 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
559 ]
560];
561
562macro_rules! test_bit {
563 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
564}
565
566fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
567 let xn = (x as isize) + (dx as isize);
568 let yn = (y as isize) + (dy as isize);
569
570 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
571}
572
573const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
574const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
575
d14e5288
KS
576const Y_TOP_ROW_MASK: u32 = 0x000F;
577const Y_BOT_ROW_MASK: u32 = 0xF000;
578const Y_LEFT_COL_MASK: u32 = 0x1111;
579const Y_RIGHT_COL_MASK: u32 = 0x8888;
580const C_TOP_ROW_MASK: u32 = 0x3;
581const C_BOT_ROW_MASK: u32 = 0xC;
582const C_LEFT_COL_MASK: u32 = 0x5;
583const C_RIGHT_COL_MASK: u32 = 0xA;
584
47527732 585impl RV34DSP for RV40DSP {
b7c882c1 586 #[allow(clippy::cognitive_complexity)]
16cbd8c0 587 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
47527732
KS
588 // todo proper B-frame filtering?
589 let mut offs: [usize; 3] = [0; 3];
590 let mut stride: [usize; 3] = [0; 3];
591 let (w, h) = frame.get_dimensions(0);
592 let small_frame = w * h <= 176*144;
593
594 for comp in 0..3 {
595 stride[comp] = frame.get_stride(comp);
596 let start = if comp == 0 { row * 16 } else { row * 8 };
597 offs[comp] = frame.get_offset(comp) + start * stride[comp];
598 }
599
1a967e6b 600 let data = frame.get_data_mut().unwrap();
47527732
KS
601 let dst: &mut [u8] = data.as_mut_slice();
602
d14e5288
KS
603 let is_last_row = row == mb_h - 1;
604
47527732
KS
605 let mut mb_pos: usize = row * mb_w;
606 let mut left_q: usize = 0;
d14e5288
KS
607 let mut left_cbp = 0;
608 let mut left_dbk = 0;
47527732
KS
609 for mb_x in 0..mb_w {
610 let q = mbinfo[mb_pos].q as usize;
611 let alpha = RV40_ALPHA_TAB[q];
612 let beta = RV40_BETA_TAB[q];
613 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
614 let beta_c = beta * 3;
615
47527732 616 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
d14e5288
KS
617 let top_is_strong = row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16();
618 let left_is_strong = mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16();
619 let bot_is_strong = !is_last_row && mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16();
620
621 let cur_dbk = mbinfo[mb_pos].deblock;
622 let cur_cbp = if is_strong { 0xFFFFFF } else { mbinfo[mb_pos].cbp };
623
624 let (top_cbp, top_dbk) = if row > 0 {
625 (if top_is_strong { 0xFFFFFF } else { mbinfo[mb_pos - mb_w].cbp }, mbinfo[mb_pos - mb_w].deblock)
626 } else {
627 (0, 0)
628 };
629 let (bot_cbp, bot_dbk) = if !is_last_row {
630 (mbinfo[mb_pos + mb_w].cbp, mbinfo[mb_pos + mb_w].deblock)
631 } else {
632 (0, 0)
633 };
634
635 let y_cbp = cur_cbp & 0xFFFF;
636 let y_to_deblock = (cur_dbk as u32) | ((bot_dbk as u32) << 16);
637 let mut y_h_deblock = y_to_deblock | ((y_cbp << 4) & !Y_TOP_ROW_MASK) | ((top_cbp & Y_BOT_ROW_MASK) >> 12);
638 let mut y_v_deblock = y_to_deblock | ((y_cbp << 1) & !Y_LEFT_COL_MASK) | ((left_cbp & Y_RIGHT_COL_MASK) >> 3);
639
640 if mb_x == 0 {
641 y_v_deblock &= !Y_LEFT_COL_MASK;
642 }
643 if row == 0 {
644 y_h_deblock &= !Y_TOP_ROW_MASK;
645 }
646 if is_last_row || is_strong || bot_is_strong {
647 y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
648 }
47527732
KS
649
650 for y in 0..4 {
651 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
652 for x in 0..4 {
653 let bpos = x + y * 4;
d14e5288 654 let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
47527732
KS
655
656 let cur_strength: usize;
657 if is_strong {
658 cur_strength = 2;
659 } else if test_bit!(cur_dbk, bpos) {
660 cur_strength = 1;
661 } else {
662 cur_strength = 0;
663 }
664
665 let left_strength: usize;
666 if x > 0 {
667 if is_strong {
668 left_strength = 2;
669 } else if test_bit!(cur_dbk, bpos - 1) {
670 left_strength = 1;
671 } else {
672 left_strength = 0;
673 }
674 } else if mb_x > 0 {
675 if left_is_strong {
676 left_strength = 2;
d14e5288 677 } else if test_bit!(left_dbk, bpos + 3) {
47527732
KS
678 left_strength = 1;
679 } else {
680 left_strength = 0;
681 }
682 } else {
683 left_strength = 0;
684 }
685
686 let bot_strength: usize;
687 if y < 3 {
688 if is_strong {
689 bot_strength = 2;
690 } else if test_bit!(cur_dbk, bpos + 4) {
691 bot_strength = 1;
692 } else {
693 bot_strength = 0;
694 }
d14e5288
KS
695 } else if !is_last_row {
696 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
697 bot_strength = 2;
698 } else if test_bit!(bot_dbk, x) {
699 bot_strength = 1;
700 } else {
701 bot_strength = 0;
702 }
47527732
KS
703 } else {
704 bot_strength = 0;
705 }
706
707 let top_strength: usize;
708 if y > 0 {
709 if is_strong {
710 top_strength = 2;
711 } else if test_bit!(cur_dbk, bpos - 4) {
712 top_strength = 1;
713 } else {
714 top_strength = 0;
715 }
716 } else if row > 0 {
717 if top_is_strong {
718 top_strength = 2;
d14e5288 719 } else if test_bit!(top_dbk, bpos + 12) {
47527732
KS
720 top_strength = 1;
721 } else {
722 top_strength = 0;
723 }
724 } else {
725 top_strength = 0;
726 }
727
728 let l_q = if x > 0 { q } else { left_q };
729 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
730
731 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
732 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
733 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
734 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
735
736 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
737
d14e5288 738 if test_bit!(y_h_deblock, bpos + 4) {
47527732
KS
739 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
740 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
741 }
d14e5288 742 if test_bit!(y_v_deblock, bpos) && !ver_strong {
47527732
KS
743 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
744 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
745 }
d14e5288 746 if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
47527732
KS
747 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
748 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
749 }
d14e5288 750 if test_bit!(y_v_deblock, bpos) && ver_strong {
47527732
KS
751 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
752 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
753 }
754 }
755 }
756
757 for comp in 1..3 {
d14e5288
KS
758 let cshift = 16 - 4 + comp * 4;
759 let c_cur_cbp = (cur_cbp >> cshift) & 0xF;
760 let c_top_cbp = (top_cbp >> cshift) & 0xF;
761 let c_left_cbp = (left_cbp >> cshift) & 0xF;
762 let c_bot_cbp = (bot_cbp >> cshift) & 0xF;
763
764 let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
765 let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
766 let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
767 if mb_x == 0 {
768 c_v_deblock &= !C_LEFT_COL_MASK;
769 }
770 if row == 0 {
771 c_h_deblock &= !C_TOP_ROW_MASK;
772 }
773 if is_last_row || is_strong || bot_is_strong {
774 c_h_deblock &= !(C_TOP_ROW_MASK << 4);
775 }
776
47527732
KS
777 for y in 0..2 {
778 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
779 for x in 0..2 {
d14e5288 780 let bpos = x + y * 2;
47527732 781
d14e5288 782 let ver_strong = (x == 0) && (is_strong || left_is_strong);
47527732
KS
783
784 let cur_strength: usize;
785 if is_strong {
786 cur_strength = 2;
d14e5288 787 } else if test_bit!(c_cur_cbp, bpos) {
47527732
KS
788 cur_strength = 1;
789 } else {
790 cur_strength = 0;
791 }
792
793 let left_strength: usize;
794 if x > 0 {
795 if is_strong {
796 left_strength = 2;
d14e5288 797 } else if test_bit!(c_cur_cbp, bpos - 1) {
47527732
KS
798 left_strength = 1;
799 } else {
800 left_strength = 0;
801 }
802 } else if mb_x > 0 {
803 if left_is_strong {
804 left_strength = 2;
d14e5288 805 } else if test_bit!(c_left_cbp, bpos + 1) {
47527732
KS
806 left_strength = 1;
807 } else {
808 left_strength = 0;
809 }
810 } else {
811 left_strength = 0;
812 }
813
814 let bot_strength: usize;
d14e5288 815 if y != 3 {
47527732
KS
816 if is_strong {
817 bot_strength = 2;
d14e5288
KS
818 } else if test_bit!(c_cur_cbp, bpos + 2) {
819 bot_strength = 1;
820 } else {
821 bot_strength = 0;
822 }
823 } else if !is_last_row {
824 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
825 bot_strength = 2;
826 } else if test_bit!(c_bot_cbp, x) {
47527732
KS
827 bot_strength = 1;
828 } else {
829 bot_strength = 0;
830 }
831 } else {
832 bot_strength = 0;
833 }
834
835 let top_strength: usize;
836 if y > 0 {
837 if is_strong {
838 top_strength = 2;
d14e5288 839 } else if test_bit!(c_cur_cbp, bpos - 2) {
47527732
KS
840 top_strength = 1;
841 } else {
842 top_strength = 0;
843 }
844 } else if row > 0 {
845 if top_is_strong {
846 top_strength = 2;
d14e5288 847 } else if test_bit!(c_top_cbp, bpos + 2) {
47527732
KS
848 top_strength = 1;
849 } else {
850 top_strength = 0;
851 }
852 } else {
853 top_strength = 0;
854 }
855
856 let l_q = if x > 0 { q } else { left_q };
857 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
858
859 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
860 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
861 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
862 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
863
d14e5288 864 if test_bit!(c_h_deblock, bpos + 2) {
47527732
KS
865 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
866 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
867 }
d14e5288 868 if test_bit!(c_v_deblock, bpos) && !ver_strong {
47527732
KS
869 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
870 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
871 }
d14e5288 872 if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
47527732
KS
873 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
874 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
875 }
d14e5288 876 if test_bit!(c_v_deblock, bpos) && ver_strong {
47527732
KS
877 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
878 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
879 }
880 }
881 }
882 }
883
884 left_q = q;
d14e5288
KS
885 left_dbk = cur_dbk;
886 left_cbp = cur_cbp;
47527732
KS
887
888 mb_pos += 1;
889 }
890 }
891 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
892 let size: usize = if use16 { 16 } else { 8 };
893 let dstride = frame.get_stride(0);
894 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
1a967e6b 895 let data = frame.get_data_mut().unwrap();
47527732
KS
896 let dst: &mut [u8] = data.as_mut_slice();
897
898 let (w_, h_) = prev_frame.get_dimensions(0);
899 let w = (w_ + 15) & !15;
900 let h = (h_ + 15) & !15;
901
902 let dx = mv.x >> 2;
903 let cx = (mv.x & 3) as usize;
904 let dy = mv.y >> 2;
905 let cy = (mv.y & 3) as usize;
906 let mode = cx + cy * 4;
907
908 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
909 let sstride = prev_frame.get_stride(0);
910 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
911 let data = prev_frame.get_data();
912 let src: &[u8] = data.as_slice();
913 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
914 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
915 } else {
916 let mut ebuf: [u8; 32*22] = [0; 32*22];
86081fed 917 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
47527732
KS
918 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
919 }
920 }
921 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
922 let size: usize = if use8 { 8 } else { 4 };
923 let dstride = frame.get_stride(comp);
924 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
1a967e6b 925 let data = frame.get_data_mut().unwrap();
47527732
KS
926 let dst: &mut [u8] = data.as_mut_slice();
927
928 let (w_, h_) = prev_frame.get_dimensions(comp);
929 let w = (w_ + 7) & !7;
930 let h = (h_ + 7) & !7;
931
932 let mvx = mv.x / 2;
933 let mvy = mv.y / 2;
934 let dx = mvx >> 2;
935 let mut cx = (mvx & 3) as usize;
936 let dy = mvy >> 2;
937 let mut cy = (mvy & 3) as usize;
938
939 if (cx == 3) && (cy == 3) {
940 cx = 2;
941 cy = 2;
942 }
943
944 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
945 let sstride = prev_frame.get_stride(comp);
946 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
947 let data = prev_frame.get_data();
948 let src: &[u8] = data.as_slice();
949 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
950 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
951 } else {
952 let mut ebuf: [u8; 16*10] = [0; 16*10];
86081fed 953 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
47527732
KS
954 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
955 }
956 }
957}