]> git.nihav.org Git - nihav.git/blob - nihav-realmedia/src/codecs/rv40dsp.rs
466a88d412f1418524c80d440fd3373726164075
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_codec_support::codecs::MV;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14 }
15
16 macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
41 (33; $s: ident, $o: expr, $stride: expr) => (
42 clip8((( el!($s, $o)
43 + el!($s, $o + 1)
44 + el!($s, $o + $stride)
45 + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
46 );
47 }
48
49 macro_rules! mc_func {
50 (copy; $name: ident, $size: expr) => (
51 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
52 for _ in 0..$size {
53 let d = &mut dst[didx..][..$size];
54 let s = &src[sidx..][..$size];
55 for x in 0..$size { d[x] = s[x]; }
56 didx += dstride;
57 sidx += sstride;
58 }
59 }
60 );
61 (mc01; $name: ident, $size: expr, $ver: expr) => (
62 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
63 let step = if $ver { sstride } else { 1 };
64 for _ in 0..$size {
65 for x in 0..$size {
66 dst[didx + x] = filter!(01; src, sidx + x, step);
67 }
68 sidx += sstride;
69 didx += dstride;
70 }
71 }
72 );
73 (mc02; $name: ident, $size: expr, $ver: expr) => (
74 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
75 let step = if $ver { sstride } else { 1 };
76 for _ in 0..$size {
77 for x in 0..$size {
78 dst[didx + x] = filter!(02; src, sidx + x, step);
79 }
80 sidx += sstride;
81 didx += dstride;
82 }
83 }
84 );
85 (mc03; $name: ident, $size: expr, $ver: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
87 let step = if $ver { sstride } else { 1 };
88 for _ in 0..$size {
89 for x in 0..$size {
90 dst[didx + x] = filter!(03; src, sidx + x, step);
91 }
92 sidx += sstride;
93 didx += dstride;
94 }
95 }
96 );
97 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
98 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
99 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
100 let mut bidx = 0;
101 let bstride = $size;
102 sidx -= sstride * 2;
103 for _ in 0..$size+5 {
104 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
105 bidx += bstride;
106 sidx += sstride;
107 }
108 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
109 }
110 );
111 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
112 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
113 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
114 let mut bidx = 0;
115 let bstride = $size;
116 sidx -= sstride * 2;
117 for _ in 0..$size+5 {
118 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
119 bidx += bstride;
120 sidx += sstride;
121 }
122 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
123 }
124 );
125 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
126 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
127 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
128 let mut bidx = 0;
129 let bstride = $size;
130 sidx -= sstride * 2;
131 for _ in 0..$size+5 {
132 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
133 bidx += bstride;
134 sidx += sstride;
135 }
136 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
137 }
138 );
139 (mc33; $name: ident, $size: expr) => (
140 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
141 for _ in 0..$size {
142 for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); }
143 sidx += sstride;
144 didx += dstride;
145 }
146 }
147 );
148 }
149 mc_func!(copy; copy_16, 16);
150 mc_func!(copy; copy_8, 8);
151 mc_func!(mc01; luma_mc_10_16, 16, false);
152 mc_func!(mc01; luma_mc_10_8, 8, false);
153 mc_func!(mc02; luma_mc_20_16, 16, false);
154 mc_func!(mc02; luma_mc_20_8, 8, false);
155 mc_func!(mc03; luma_mc_30_16, 16, false);
156 mc_func!(mc03; luma_mc_30_8, 8, false);
157 mc_func!(mc01; luma_mc_01_16, 16, true);
158 mc_func!(mc01; luma_mc_01_8, 8, true);
159 mc_func!(mc02; luma_mc_02_16, 16, true);
160 mc_func!(mc02; luma_mc_02_8, 8, true);
161 mc_func!(mc03; luma_mc_03_16, 16, true);
162 mc_func!(mc03; luma_mc_03_8, 8, true);
163 mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
164 mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
165 mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
166 mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
167 mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
168 mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
169 mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
170 mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
171 mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
172 mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
173 mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
174 mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
175 mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
176 mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
177 mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
178 mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
179 mc_func!(mc33; luma_mc_33_16, 16);
180 mc_func!(mc33; luma_mc_33_8, 8);
181
182 const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
183 [ 0, 4, 8, 4 ],
184 [ 8, 7, 8, 7 ],
185 [ 0, 8, 4, 8 ],
186 [ 8, 7, 8, 7 ]
187 ];
188
189 fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
190 if (x == 0) && (y == 0) {
191 for _ in 0..size {
192 dst[didx..][..size].copy_from_slice(&src[sidx..][..size]);
193 didx += dstride;
194 sidx += sstride;
195 }
196 return;
197 }
198 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
199 if (x > 0) && (y > 0) {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let b = (( x) * (4 - y)) as u16;
202 let c = ((4 - x) * ( y)) as u16;
203 let d = (( x) * ( y)) as u16;
204 for _ in 0..size {
205 for x in 0..size {
206 dst[didx + x] = ((a * (src[sidx + x] as u16)
207 + b * (src[sidx + x + 1] as u16)
208 + c * (src[sidx + x + sstride] as u16)
209 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
210 }
211 didx += dstride;
212 sidx += sstride;
213 }
214 } else {
215 let a = ((4 - x) * (4 - y)) as u16;
216 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
217 let step = if y > 0 { sstride } else { 1 };
218 for _ in 0..size {
219 for x in 0..size {
220 dst[didx + x] = ((a * (src[sidx + x] as u16)
221 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
222 }
223 didx += dstride;
224 sidx += sstride;
225 }
226 }
227 }
228
229 #[allow(clippy::type_complexity)]
230 pub struct RV40DSP {
231 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
232 }
233
234 impl RV40DSP {
235 pub fn new() -> Self {
236 RV40DSP {
237 luma_mc: [
238 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
239 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
240 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
241 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
242 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
243 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
244 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
245 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
246 }
247 }
248 }
249
250 macro_rules! el {
251 ($src: ident, $o: expr) => ($src[$o] as i16);
252 }
253
254 fn clip_symm(a: i16, lim: i16) -> i16 {
255 if a < -lim {
256 -lim
257 } else if a > lim {
258 lim
259 } else {
260 a
261 }
262 }
263
264 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
265 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
266 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
267 for _ in 0..4 {
268 let p0 = el!(pix, off - step);
269 let q0 = el!(pix, off);
270
271 let t = q0 - p0;
272 if t == 0 {
273 off += stride;
274 continue;
275 }
276
277 let u = (alpha * t.wrapping_abs()) >> 7;
278 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
279 off += stride;
280 continue;
281 }
282
283 let p2 = el!(pix, off - 3*step);
284 let p1 = el!(pix, off - 2*step);
285 let q1 = el!(pix, off + step);
286 let q2 = el!(pix, off + 2*step);
287
288 let strength = if filter_p1 && filter_q1 {
289 (t << 2) + (p1 - q1)
290 } else { t << 2 };
291
292 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
293 pix[off - step] = clip8(p0 + diff);
294 pix[off ] = clip8(q0 - diff);
295
296 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
297 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
298 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
299 }
300
301 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
302 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
303 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
304 }
305
306 off += stride;
307 }
308 }
309
310 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
311 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
312 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
313 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
314 }
315 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
316 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
317 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
318 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
319 for ch in src.chunks_mut(stride).take(4) {
320 assert!(ch.len() >= 3 + 3);
321 let p0 = el!(ch, 3 - 1);
322 let q0 = el!(ch, 3);
323
324 let t = q0 - p0;
325 if t == 0 {
326 continue;
327 }
328
329 let u = (alpha * t.wrapping_abs()) >> 7;
330 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
331 continue;
332 }
333
334 let p2 = el!(ch, 3 - 3);
335 let p1 = el!(ch, 3 - 2);
336 let q1 = el!(ch, 3 + 1);
337 let q2 = el!(ch, 3 + 2);
338
339 let strength = if filter_p1 && filter_q1 {
340 (t << 2) + (p1 - q1)
341 } else { t << 2 };
342
343 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
344 ch[3 - 1] = clip8(p0 + diff);
345 ch[3 ] = clip8(q0 - diff);
346
347 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
348 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
349 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
350 }
351
352 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
353 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
354 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
355 }
356 }
357 }
358
359
360 const RV40_DITHER_L: [i16; 16] = [
361 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
362 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
363 ];
364 const RV40_DITHER_R: [i16; 16] = [
365 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
366 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
367 ];
368
369 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
370 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
371 if clip {
372 if val < c - lims {
373 c - lims
374 } else if val > c + lims {
375 c + lims
376 } else {
377 val
378 }
379 } else {
380 val
381 }
382 }
383
384 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
385 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
386 for i in 0..4 {
387 let p0 = el!(pix, off - step);
388 let q0 = el!(pix, off);
389
390 let t = q0 - p0;
391 if t == 0 {
392 off += stride;
393 continue;
394 }
395
396 let fmode = (alpha * t.wrapping_abs()) >> 7;
397 if fmode > 1 {
398 off += stride;
399 continue;
400 }
401
402 let p3 = el!(pix, off - 4*step);
403 let p2 = el!(pix, off - 3*step);
404 let p1 = el!(pix, off - 2*step);
405 let q1 = el!(pix, off + step);
406 let q2 = el!(pix, off + 2*step);
407 let q3 = el!(pix, off + 3*step);
408
409 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
410 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
411
412 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
413 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
414
415 pix[off - 2*step] = np1 as u8;
416 pix[off - step] = np0 as u8;
417 pix[off] = nq0 as u8;
418 pix[off + step] = nq1 as u8;
419
420 if !chroma {
421 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
422 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
423 pix[off - 3*step] = np2 as u8;
424 pix[off + 2*step] = nq2 as u8;
425 }
426
427 off += stride;
428 }
429 }
430
431 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
432 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
433 let mut sum_p1p0 = 0;
434 let mut sum_q1q0 = 0;
435
436 let mut off1 = off;
437 for _ in 0..4 {
438 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
439 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
440 off1 += stride;
441 }
442
443 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
444 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
445
446 if (!filter_p1 || !filter_q1) || !edge {
447 return (false, filter_p1, filter_q1);
448 }
449
450 let mut sum_p1p2 = 0;
451 let mut sum_q1q2 = 0;
452
453 let mut off1 = off;
454 for _ in 0..4 {
455 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
456 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
457 off1 += stride;
458 }
459
460 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
461
462 (strong, filter_p1, filter_q1)
463 }
464
465 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
466 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
467 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
468 }
469
470 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
471 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
472 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
473 let mut sum_p1p0 = 0;
474 let mut sum_q1q0 = 0;
475
476 for ch in src.chunks(stride).take(4) {
477 assert!(ch.len() >= 3 + 3);
478 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
479 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
480 }
481
482 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
483 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
484
485 if (!filter_p1 || !filter_q1) || !edge {
486 return (false, filter_p1, filter_q1);
487 }
488
489 let mut sum_p1p2 = 0;
490 let mut sum_q1q2 = 0;
491
492 for ch in src.chunks(stride).take(4) {
493 assert!(ch.len() >= 3 + 3);
494 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
495 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
496 }
497
498 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
499
500 (strong, filter_p1, filter_q1)
501 }
502
503 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
504 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
505 chroma: bool, edge: bool) {
506 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
507 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
508
509 if strong {
510 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
511 } else if filter_p1 && filter_q1 {
512 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
513 lims, lim_p1, lim_q1);
514 } else if filter_p1 || filter_q1 {
515 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
516 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
517 }
518 }
519
520 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
521 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
522 chroma: bool, edge: bool) {
523 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
524 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
525
526 if strong {
527 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
528 } else if filter_p1 && filter_q1 {
529 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
530 lims, lim_p1, lim_q1);
531 } else if filter_p1 || filter_q1 {
532 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
533 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
534 }
535 }
536
537 const RV40_ALPHA_TAB: [i16; 32] = [
538 128, 128, 128, 128, 128, 128, 128, 128,
539 128, 128, 122, 96, 75, 59, 47, 37,
540 29, 23, 18, 15, 13, 11, 10, 9,
541 8, 7, 6, 5, 4, 3, 2, 1
542 ];
543
544 const RV40_BETA_TAB: [i16; 32] = [
545 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
546 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
547 ];
548
549 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
550 [
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
553 ], [
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
555 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
556 ], [
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
558 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
559 ]
560 ];
561
562 macro_rules! test_bit {
563 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
564 }
565
566 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
567 let xn = (x as isize) + (dx as isize);
568 let yn = (y as isize) + (dy as isize);
569
570 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
571 }
572
573 const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
574 const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
575
576 const Y_TOP_ROW_MASK: u32 = 0x000F;
577 const Y_BOT_ROW_MASK: u32 = 0xF000;
578 const Y_LEFT_COL_MASK: u32 = 0x1111;
579 const Y_RIGHT_COL_MASK: u32 = 0x8888;
580 const C_TOP_ROW_MASK: u32 = 0x3;
581 const C_BOT_ROW_MASK: u32 = 0xC;
582 const C_LEFT_COL_MASK: u32 = 0x5;
583 const C_RIGHT_COL_MASK: u32 = 0xA;
584
585 impl RV34DSP for RV40DSP {
586 #[allow(clippy::cognitive_complexity)]
587 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
588 // todo proper B-frame filtering?
589 let mut offs: [usize; 3] = [0; 3];
590 let mut stride: [usize; 3] = [0; 3];
591 let (w, h) = frame.get_dimensions(0);
592 let small_frame = w * h <= 176*144;
593
594 for comp in 0..3 {
595 stride[comp] = frame.get_stride(comp);
596 let start = if comp == 0 { row * 16 } else { row * 8 };
597 offs[comp] = frame.get_offset(comp) + start * stride[comp];
598 }
599
600 let data = frame.get_data_mut().unwrap();
601 let dst: &mut [u8] = data.as_mut_slice();
602
603 let is_last_row = row == mb_h - 1;
604
605 let mut mb_pos: usize = row * mb_w;
606 let mut left_q: usize = 0;
607 let mut left_cbp = 0;
608 let mut left_dbk = 0;
609 for mb_x in 0..mb_w {
610 let q = mbinfo[mb_pos].q as usize;
611 let alpha = RV40_ALPHA_TAB[q];
612 let beta = RV40_BETA_TAB[q];
613 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
614 let beta_c = beta * 3;
615
616 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
617 let top_is_strong = row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16();
618 let left_is_strong = mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16();
619 let bot_is_strong = !is_last_row && mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16();
620
621 let cur_dbk = mbinfo[mb_pos].deblock;
622 let cur_cbp = if is_strong { 0xFFFFFF } else { mbinfo[mb_pos].cbp };
623
624 let (top_cbp, top_dbk) = if row > 0 {
625 (if top_is_strong { 0xFFFFFF } else { mbinfo[mb_pos - mb_w].cbp }, mbinfo[mb_pos - mb_w].deblock)
626 } else {
627 (0, 0)
628 };
629 let (bot_cbp, bot_dbk) = if !is_last_row {
630 (mbinfo[mb_pos + mb_w].cbp, mbinfo[mb_pos + mb_w].deblock)
631 } else {
632 (0, 0)
633 };
634
635 let y_cbp = cur_cbp & 0xFFFF;
636 let y_to_deblock = (cur_dbk as u32) | ((bot_dbk as u32) << 16);
637 let mut y_h_deblock = y_to_deblock | ((y_cbp << 4) & !Y_TOP_ROW_MASK) | ((top_cbp & Y_BOT_ROW_MASK) >> 12);
638 let mut y_v_deblock = y_to_deblock | ((y_cbp << 1) & !Y_LEFT_COL_MASK) | ((left_cbp & Y_RIGHT_COL_MASK) >> 3);
639
640 if mb_x == 0 {
641 y_v_deblock &= !Y_LEFT_COL_MASK;
642 }
643 if row == 0 {
644 y_h_deblock &= !Y_TOP_ROW_MASK;
645 }
646 if is_last_row || is_strong || bot_is_strong {
647 y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
648 }
649
650 for y in 0..4 {
651 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
652 for x in 0..4 {
653 let bpos = x + y * 4;
654 let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
655
656 let cur_strength: usize;
657 if is_strong {
658 cur_strength = 2;
659 } else if test_bit!(cur_dbk, bpos) {
660 cur_strength = 1;
661 } else {
662 cur_strength = 0;
663 }
664
665 let left_strength: usize;
666 if x > 0 {
667 if is_strong {
668 left_strength = 2;
669 } else if test_bit!(cur_dbk, bpos - 1) {
670 left_strength = 1;
671 } else {
672 left_strength = 0;
673 }
674 } else if mb_x > 0 {
675 if left_is_strong {
676 left_strength = 2;
677 } else if test_bit!(left_dbk, bpos + 3) {
678 left_strength = 1;
679 } else {
680 left_strength = 0;
681 }
682 } else {
683 left_strength = 0;
684 }
685
686 let bot_strength: usize;
687 if y < 3 {
688 if is_strong {
689 bot_strength = 2;
690 } else if test_bit!(cur_dbk, bpos + 4) {
691 bot_strength = 1;
692 } else {
693 bot_strength = 0;
694 }
695 } else if !is_last_row {
696 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
697 bot_strength = 2;
698 } else if test_bit!(bot_dbk, x) {
699 bot_strength = 1;
700 } else {
701 bot_strength = 0;
702 }
703 } else {
704 bot_strength = 0;
705 }
706
707 let top_strength: usize;
708 if y > 0 {
709 if is_strong {
710 top_strength = 2;
711 } else if test_bit!(cur_dbk, bpos - 4) {
712 top_strength = 1;
713 } else {
714 top_strength = 0;
715 }
716 } else if row > 0 {
717 if top_is_strong {
718 top_strength = 2;
719 } else if test_bit!(top_dbk, bpos + 12) {
720 top_strength = 1;
721 } else {
722 top_strength = 0;
723 }
724 } else {
725 top_strength = 0;
726 }
727
728 let l_q = if x > 0 { q } else { left_q };
729 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
730
731 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
732 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
733 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
734 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
735
736 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
737
738 if test_bit!(y_h_deblock, bpos + 4) {
739 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
740 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
741 }
742 if test_bit!(y_v_deblock, bpos) && !ver_strong {
743 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
744 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
745 }
746 if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
747 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
748 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
749 }
750 if test_bit!(y_v_deblock, bpos) && ver_strong {
751 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
752 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
753 }
754 }
755 }
756
757 for comp in 1..3 {
758 let cshift = 16 - 4 + comp * 4;
759 let c_cur_cbp = (cur_cbp >> cshift) & 0xF;
760 let c_top_cbp = (top_cbp >> cshift) & 0xF;
761 let c_left_cbp = (left_cbp >> cshift) & 0xF;
762 let c_bot_cbp = (bot_cbp >> cshift) & 0xF;
763
764 let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
765 let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
766 let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
767 if mb_x == 0 {
768 c_v_deblock &= !C_LEFT_COL_MASK;
769 }
770 if row == 0 {
771 c_h_deblock &= !C_TOP_ROW_MASK;
772 }
773 if is_last_row || is_strong || bot_is_strong {
774 c_h_deblock &= !(C_TOP_ROW_MASK << 4);
775 }
776
777 for y in 0..2 {
778 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
779 for x in 0..2 {
780 let bpos = x + y * 2;
781
782 let ver_strong = (x == 0) && (is_strong || left_is_strong);
783
784 let cur_strength: usize;
785 if is_strong {
786 cur_strength = 2;
787 } else if test_bit!(c_cur_cbp, bpos) {
788 cur_strength = 1;
789 } else {
790 cur_strength = 0;
791 }
792
793 let left_strength: usize;
794 if x > 0 {
795 if is_strong {
796 left_strength = 2;
797 } else if test_bit!(c_cur_cbp, bpos - 1) {
798 left_strength = 1;
799 } else {
800 left_strength = 0;
801 }
802 } else if mb_x > 0 {
803 if left_is_strong {
804 left_strength = 2;
805 } else if test_bit!(c_left_cbp, bpos + 1) {
806 left_strength = 1;
807 } else {
808 left_strength = 0;
809 }
810 } else {
811 left_strength = 0;
812 }
813
814 let bot_strength: usize;
815 if y != 3 {
816 if is_strong {
817 bot_strength = 2;
818 } else if test_bit!(c_cur_cbp, bpos + 2) {
819 bot_strength = 1;
820 } else {
821 bot_strength = 0;
822 }
823 } else if !is_last_row {
824 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
825 bot_strength = 2;
826 } else if test_bit!(c_bot_cbp, x) {
827 bot_strength = 1;
828 } else {
829 bot_strength = 0;
830 }
831 } else {
832 bot_strength = 0;
833 }
834
835 let top_strength: usize;
836 if y > 0 {
837 if is_strong {
838 top_strength = 2;
839 } else if test_bit!(c_cur_cbp, bpos - 2) {
840 top_strength = 1;
841 } else {
842 top_strength = 0;
843 }
844 } else if row > 0 {
845 if top_is_strong {
846 top_strength = 2;
847 } else if test_bit!(c_top_cbp, bpos + 2) {
848 top_strength = 1;
849 } else {
850 top_strength = 0;
851 }
852 } else {
853 top_strength = 0;
854 }
855
856 let l_q = if x > 0 { q } else { left_q };
857 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
858
859 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
860 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
861 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
862 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
863
864 if test_bit!(c_h_deblock, bpos + 2) {
865 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
866 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
867 }
868 if test_bit!(c_v_deblock, bpos) && !ver_strong {
869 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
870 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
871 }
872 if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
873 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
874 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
875 }
876 if test_bit!(c_v_deblock, bpos) && ver_strong {
877 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
878 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
879 }
880 }
881 }
882 }
883
884 left_q = q;
885 left_dbk = cur_dbk;
886 left_cbp = cur_cbp;
887
888 mb_pos += 1;
889 }
890 }
891 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
892 let size: usize = if use16 { 16 } else { 8 };
893 let dstride = frame.get_stride(0);
894 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
895 let data = frame.get_data_mut().unwrap();
896 let dst: &mut [u8] = data.as_mut_slice();
897
898 let (w_, h_) = prev_frame.get_dimensions(0);
899 let w = (w_ + 15) & !15;
900 let h = (h_ + 15) & !15;
901
902 let dx = mv.x >> 2;
903 let cx = (mv.x & 3) as usize;
904 let dy = mv.y >> 2;
905 let cy = (mv.y & 3) as usize;
906 let mode = cx + cy * 4;
907
908 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
909 let sstride = prev_frame.get_stride(0);
910 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
911 let data = prev_frame.get_data();
912 let src: &[u8] = data.as_slice();
913 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
914 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
915 } else {
916 let mut ebuf: [u8; 32*22] = [0; 32*22];
917 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
918 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
919 }
920 }
921 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
922 let size: usize = if use8 { 8 } else { 4 };
923 let dstride = frame.get_stride(comp);
924 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
925 let data = frame.get_data_mut().unwrap();
926 let dst: &mut [u8] = data.as_mut_slice();
927
928 let (w_, h_) = prev_frame.get_dimensions(comp);
929 let w = (w_ + 7) & !7;
930 let h = (h_ + 7) & !7;
931
932 let mvx = mv.x / 2;
933 let mvy = mv.y / 2;
934 let dx = mvx >> 2;
935 let mut cx = (mvx & 3) as usize;
936 let dy = mvy >> 2;
937 let mut cy = (mvy & 3) as usize;
938
939 if (cx == 3) && (cy == 3) {
940 cx = 2;
941 cy = 2;
942 }
943
944 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
945 let sstride = prev_frame.get_stride(comp);
946 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
947 let data = prev_frame.get_data();
948 let src: &[u8] = data.as_slice();
949 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
950 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
951 } else {
952 let mut ebuf: [u8; 16*10] = [0; 16*10];
953 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
954 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
955 }
956 }
957 }