]> git.nihav.org Git - nihav.git/blob - nihav-realmedia/src/codecs/rv40dsp.rs
299c39505b1dc78106e2d7adab74e5d648b1de53
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_codec_support::codecs::MV;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14 }
15
16 macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
41 (33; $s: ident, $o: expr, $stride: expr) => (
42 clip8((( el!($s, $o)
43 + el!($s, $o + 1)
44 + el!($s, $o + $stride)
45 + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
46 );
47 }
48
49 macro_rules! mc_func {
50 (copy; $name: ident, $size: expr) => (
51 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
52 for _ in 0..$size {
53 let d = &mut dst[didx..][..$size];
54 let s = &src[sidx..][..$size];
55 for x in 0..$size { d[x] = s[x]; }
56 didx += dstride;
57 sidx += sstride;
58 }
59 }
60 );
61 (mc01; $name: ident, $size: expr, $ver: expr) => (
62 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
63 let step = if $ver { sstride } else { 1 };
64 for _ in 0..$size {
65 for x in 0..$size {
66 dst[didx + x] = filter!(01; src, sidx + x, step);
67 }
68 sidx += sstride;
69 didx += dstride;
70 }
71 }
72 );
73 (mc02; $name: ident, $size: expr, $ver: expr) => (
74 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
75 let step = if $ver { sstride } else { 1 };
76 for _ in 0..$size {
77 for x in 0..$size {
78 dst[didx + x] = filter!(02; src, sidx + x, step);
79 }
80 sidx += sstride;
81 didx += dstride;
82 }
83 }
84 );
85 (mc03; $name: ident, $size: expr, $ver: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
87 let step = if $ver { sstride } else { 1 };
88 for _ in 0..$size {
89 for x in 0..$size {
90 dst[didx + x] = filter!(03; src, sidx + x, step);
91 }
92 sidx += sstride;
93 didx += dstride;
94 }
95 }
96 );
97 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
98 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
99 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
100 let mut bidx = 0;
101 let bstride = $size;
102 sidx -= sstride * 2;
103 for _ in 0..$size+5 {
104 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
105 bidx += bstride;
106 sidx += sstride;
107 }
108 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
109 }
110 );
111 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
112 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
113 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
114 let mut bidx = 0;
115 let bstride = $size;
116 sidx -= sstride * 2;
117 for _ in 0..$size+5 {
118 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
119 bidx += bstride;
120 sidx += sstride;
121 }
122 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
123 }
124 );
125 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
126 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
127 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
128 let mut bidx = 0;
129 let bstride = $size;
130 sidx -= sstride * 2;
131 for _ in 0..$size+5 {
132 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
133 bidx += bstride;
134 sidx += sstride;
135 }
136 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
137 }
138 );
139 (mc33; $name: ident, $size: expr) => (
140 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
141 for _ in 0..$size {
142 for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); }
143 sidx += sstride;
144 didx += dstride;
145 }
146 }
147 );
148 }
149 mc_func!(copy; copy_16, 16);
150 mc_func!(copy; copy_8, 8);
151 mc_func!(mc01; luma_mc_10_16, 16, false);
152 mc_func!(mc01; luma_mc_10_8, 8, false);
153 mc_func!(mc02; luma_mc_20_16, 16, false);
154 mc_func!(mc02; luma_mc_20_8, 8, false);
155 mc_func!(mc03; luma_mc_30_16, 16, false);
156 mc_func!(mc03; luma_mc_30_8, 8, false);
157 mc_func!(mc01; luma_mc_01_16, 16, true);
158 mc_func!(mc01; luma_mc_01_8, 8, true);
159 mc_func!(mc02; luma_mc_02_16, 16, true);
160 mc_func!(mc02; luma_mc_02_8, 8, true);
161 mc_func!(mc03; luma_mc_03_16, 16, true);
162 mc_func!(mc03; luma_mc_03_8, 8, true);
163 mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
164 mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
165 mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
166 mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
167 mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
168 mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
169 mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
170 mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
171 mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
172 mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
173 mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
174 mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
175 mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
176 mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
177 mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
178 mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
179 mc_func!(mc33; luma_mc_33_16, 16);
180 mc_func!(mc33; luma_mc_33_8, 8);
181
182 const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
183 [ 0, 4, 8, 4 ],
184 [ 8, 7, 8, 7 ],
185 [ 0, 8, 4, 8 ],
186 [ 8, 7, 8, 7 ]
187 ];
188
189 fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
190 if (x == 0) && (y == 0) {
191 for _ in 0..size {
192 dst[didx..][..size].copy_from_slice(&src[sidx..][..size]);
193 didx += dstride;
194 sidx += sstride;
195 }
196 return;
197 }
198 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
199 if (x > 0) && (y > 0) {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let b = (( x) * (4 - y)) as u16;
202 let c = ((4 - x) * ( y)) as u16;
203 let d = (( x) * ( y)) as u16;
204 for _ in 0..size {
205 for x in 0..size {
206 dst[didx + x] = ((a * (src[sidx + x] as u16)
207 + b * (src[sidx + x + 1] as u16)
208 + c * (src[sidx + x + sstride] as u16)
209 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
210 }
211 didx += dstride;
212 sidx += sstride;
213 }
214 } else {
215 let a = ((4 - x) * (4 - y)) as u16;
216 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
217 let step = if y > 0 { sstride } else { 1 };
218 for _ in 0..size {
219 for x in 0..size {
220 dst[didx + x] = ((a * (src[sidx + x] as u16)
221 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
222 }
223 didx += dstride;
224 sidx += sstride;
225 }
226 }
227 }
228
229 #[allow(clippy::type_complexity)]
230 pub struct RV40DSP {
231 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
232 }
233
234 impl RV40DSP {
235 pub fn new() -> Self {
236 RV40DSP {
237 luma_mc: [
238 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
239 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
240 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
241 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
242 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
243 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
244 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
245 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
246 }
247 }
248 }
249
250 macro_rules! el {
251 ($src: ident, $o: expr) => ($src[$o] as i16);
252 }
253
254 fn clip_symm(a: i16, lim: i16) -> i16 {
255 if a < -lim {
256 -lim
257 } else if a > lim {
258 lim
259 } else {
260 a
261 }
262 }
263
264 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
265 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
266 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
267 for _ in 0..4 {
268 let p0 = el!(pix, off - step);
269 let q0 = el!(pix, off);
270
271 let t = q0 - p0;
272 if t == 0 {
273 off += stride;
274 continue;
275 }
276
277 let u = (alpha * t.wrapping_abs()) >> 7;
278 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
279 off += stride;
280 continue;
281 }
282
283 let p2 = el!(pix, off - 3*step);
284 let p1 = el!(pix, off - 2*step);
285 let q1 = el!(pix, off + step);
286 let q2 = el!(pix, off + 2*step);
287
288 let str;
289 if filter_p1 && filter_q1 {
290 str = (t << 2) + (p1 - q1);
291 } else {
292 str = t << 2;
293 }
294
295 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
296 pix[off - step] = clip8(p0 + diff);
297 pix[off ] = clip8(q0 - diff);
298
299 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
300 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
301 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
302 }
303
304 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
305 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
306 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
307 }
308
309 off += stride;
310 }
311 }
312
313 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
314 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
315 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
316 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
317 }
318 #[allow(clippy::eq_op)]
319 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
320 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
321 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
322 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
323 for ch in src.chunks_mut(stride).take(4) {
324 assert!(ch.len() >= 3 + 3);
325 let p0 = el!(ch, 3 - 1);
326 let q0 = el!(ch, 3);
327
328 let t = q0 - p0;
329 if t == 0 {
330 continue;
331 }
332
333 let u = (alpha * t.wrapping_abs()) >> 7;
334 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
335 continue;
336 }
337
338 let p2 = el!(ch, 3 - 3);
339 let p1 = el!(ch, 3 - 2);
340 let q1 = el!(ch, 3 + 1);
341 let q2 = el!(ch, 3 + 2);
342
343 let str;
344 if filter_p1 && filter_q1 {
345 str = (t << 2) + (p1 - q1);
346 } else {
347 str = t << 2;
348 }
349
350 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
351 ch[3 - 1] = clip8(p0 + diff);
352 ch[3 ] = clip8(q0 - diff);
353
354 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
355 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
356 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
357 }
358
359 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
360 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
361 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
362 }
363 }
364 }
365
366
367 const RV40_DITHER_L: [i16; 16] = [
368 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
369 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
370 ];
371 const RV40_DITHER_R: [i16; 16] = [
372 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
373 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
374 ];
375
376 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
377 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
378 if clip {
379 if val < c - lims {
380 c - lims
381 } else if val > c + lims {
382 c + lims
383 } else {
384 val
385 }
386 } else {
387 val
388 }
389 }
390
391 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
392 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
393 for i in 0..4 {
394 let p0 = el!(pix, off - step);
395 let q0 = el!(pix, off);
396
397 let t = q0 - p0;
398 if t == 0 {
399 off += stride;
400 continue;
401 }
402
403 let fmode = (alpha * t.wrapping_abs()) >> 7;
404 if fmode > 1 {
405 off += stride;
406 continue;
407 }
408
409 let p3 = el!(pix, off - 4*step);
410 let p2 = el!(pix, off - 3*step);
411 let p1 = el!(pix, off - 2*step);
412 let q1 = el!(pix, off + step);
413 let q2 = el!(pix, off + 2*step);
414 let q3 = el!(pix, off + 3*step);
415
416 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
417 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
418
419 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
420 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
421
422 pix[off - 2*step] = np1 as u8;
423 pix[off - step] = np0 as u8;
424 pix[off] = nq0 as u8;
425 pix[off + step] = nq1 as u8;
426
427 if !chroma {
428 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
429 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
430 pix[off - 3*step] = np2 as u8;
431 pix[off + 2*step] = nq2 as u8;
432 }
433
434 off += stride;
435 }
436 }
437
438 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
439 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
440 let mut sum_p1p0 = 0;
441 let mut sum_q1q0 = 0;
442
443 let mut off1 = off;
444 for _ in 0..4 {
445 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
446 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
447 off1 += stride;
448 }
449
450 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
451 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
452
453 if (!filter_p1 || !filter_q1) || !edge {
454 return (false, filter_p1, filter_q1);
455 }
456
457 let mut sum_p1p2 = 0;
458 let mut sum_q1q2 = 0;
459
460 let mut off1 = off;
461 for _ in 0..4 {
462 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
463 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
464 off1 += stride;
465 }
466
467 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
468
469 (strong, filter_p1, filter_q1)
470 }
471
472 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
473 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
474 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
475 }
476
477 #[allow(clippy::eq_op)]
478 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
479 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
480 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
481 let mut sum_p1p0 = 0;
482 let mut sum_q1q0 = 0;
483
484 for ch in src.chunks(stride).take(4) {
485 assert!(ch.len() >= 3 + 3);
486 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
487 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
488 }
489
490 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
491 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
492
493 if (!filter_p1 || !filter_q1) || !edge {
494 return (false, filter_p1, filter_q1);
495 }
496
497 let mut sum_p1p2 = 0;
498 let mut sum_q1q2 = 0;
499
500 for ch in src.chunks(stride).take(4) {
501 assert!(ch.len() >= 3 + 3);
502 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
503 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
504 }
505
506 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
507
508 (strong, filter_p1, filter_q1)
509 }
510
511 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
512 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
513 chroma: bool, edge: bool) {
514 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
515 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
516
517 if strong {
518 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
519 } else if filter_p1 && filter_q1 {
520 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
521 lims, lim_p1, lim_q1);
522 } else if filter_p1 || filter_q1 {
523 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
524 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
525 }
526 }
527
528 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
529 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
530 chroma: bool, edge: bool) {
531 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
532 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
533
534 if strong {
535 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
536 } else if filter_p1 && filter_q1 {
537 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
538 lims, lim_p1, lim_q1);
539 } else if filter_p1 || filter_q1 {
540 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
541 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
542 }
543 }
544
545 const RV40_ALPHA_TAB: [i16; 32] = [
546 128, 128, 128, 128, 128, 128, 128, 128,
547 128, 128, 122, 96, 75, 59, 47, 37,
548 29, 23, 18, 15, 13, 11, 10, 9,
549 8, 7, 6, 5, 4, 3, 2, 1
550 ];
551
552 const RV40_BETA_TAB: [i16; 32] = [
553 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
554 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
555 ];
556
557 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
558 [
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
561 ], [
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
563 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
564 ], [
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
566 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
567 ]
568 ];
569
570 macro_rules! test_bit {
571 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
572 }
573
574 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
575 let xn = (x as isize) + (dx as isize);
576 let yn = (y as isize) + (dy as isize);
577
578 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
579 }
580
581 const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
582 const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
583
584 const Y_TOP_ROW_MASK: u32 = 0x000F;
585 const Y_BOT_ROW_MASK: u32 = 0xF000;
586 const Y_LEFT_COL_MASK: u32 = 0x1111;
587 const Y_RIGHT_COL_MASK: u32 = 0x8888;
588 const C_TOP_ROW_MASK: u32 = 0x3;
589 const C_BOT_ROW_MASK: u32 = 0xC;
590 const C_LEFT_COL_MASK: u32 = 0x5;
591 const C_RIGHT_COL_MASK: u32 = 0xA;
592
593 impl RV34DSP for RV40DSP {
594 #[allow(clippy::cyclomatic_complexity)]
595 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
596 // todo proper B-frame filtering?
597 let mut offs: [usize; 3] = [0; 3];
598 let mut stride: [usize; 3] = [0; 3];
599 let (w, h) = frame.get_dimensions(0);
600 let small_frame = w * h <= 176*144;
601
602 for comp in 0..3 {
603 stride[comp] = frame.get_stride(comp);
604 let start = if comp == 0 { row * 16 } else { row * 8 };
605 offs[comp] = frame.get_offset(comp) + start * stride[comp];
606 }
607
608 let data = frame.get_data_mut().unwrap();
609 let dst: &mut [u8] = data.as_mut_slice();
610
611 let is_last_row = row == mb_h - 1;
612
613 let mut mb_pos: usize = row * mb_w;
614 let mut left_q: usize = 0;
615 let mut left_cbp = 0;
616 let mut left_dbk = 0;
617 for mb_x in 0..mb_w {
618 let q = mbinfo[mb_pos].q as usize;
619 let alpha = RV40_ALPHA_TAB[q];
620 let beta = RV40_BETA_TAB[q];
621 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
622 let beta_c = beta * 3;
623
624 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
625 let top_is_strong = row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16();
626 let left_is_strong = mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16();
627 let bot_is_strong = !is_last_row && mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16();
628
629 let cur_dbk = mbinfo[mb_pos].deblock;
630 let cur_cbp = if is_strong { 0xFFFFFF } else { mbinfo[mb_pos].cbp };
631
632 let (top_cbp, top_dbk) = if row > 0 {
633 (if top_is_strong { 0xFFFFFF } else { mbinfo[mb_pos - mb_w].cbp }, mbinfo[mb_pos - mb_w].deblock)
634 } else {
635 (0, 0)
636 };
637 let (bot_cbp, bot_dbk) = if !is_last_row {
638 (mbinfo[mb_pos + mb_w].cbp, mbinfo[mb_pos + mb_w].deblock)
639 } else {
640 (0, 0)
641 };
642
643 let y_cbp = cur_cbp & 0xFFFF;
644 let y_to_deblock = (cur_dbk as u32) | ((bot_dbk as u32) << 16);
645 let mut y_h_deblock = y_to_deblock | ((y_cbp << 4) & !Y_TOP_ROW_MASK) | ((top_cbp & Y_BOT_ROW_MASK) >> 12);
646 let mut y_v_deblock = y_to_deblock | ((y_cbp << 1) & !Y_LEFT_COL_MASK) | ((left_cbp & Y_RIGHT_COL_MASK) >> 3);
647
648 if mb_x == 0 {
649 y_v_deblock &= !Y_LEFT_COL_MASK;
650 }
651 if row == 0 {
652 y_h_deblock &= !Y_TOP_ROW_MASK;
653 }
654 if is_last_row || is_strong || bot_is_strong {
655 y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
656 }
657
658 for y in 0..4 {
659 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
660 for x in 0..4 {
661 let bpos = x + y * 4;
662 let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
663
664 let cur_strength: usize;
665 if is_strong {
666 cur_strength = 2;
667 } else if test_bit!(cur_dbk, bpos) {
668 cur_strength = 1;
669 } else {
670 cur_strength = 0;
671 }
672
673 let left_strength: usize;
674 if x > 0 {
675 if is_strong {
676 left_strength = 2;
677 } else if test_bit!(cur_dbk, bpos - 1) {
678 left_strength = 1;
679 } else {
680 left_strength = 0;
681 }
682 } else if mb_x > 0 {
683 if left_is_strong {
684 left_strength = 2;
685 } else if test_bit!(left_dbk, bpos + 3) {
686 left_strength = 1;
687 } else {
688 left_strength = 0;
689 }
690 } else {
691 left_strength = 0;
692 }
693
694 let bot_strength: usize;
695 if y < 3 {
696 if is_strong {
697 bot_strength = 2;
698 } else if test_bit!(cur_dbk, bpos + 4) {
699 bot_strength = 1;
700 } else {
701 bot_strength = 0;
702 }
703 } else if !is_last_row {
704 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
705 bot_strength = 2;
706 } else if test_bit!(bot_dbk, x) {
707 bot_strength = 1;
708 } else {
709 bot_strength = 0;
710 }
711 } else {
712 bot_strength = 0;
713 }
714
715 let top_strength: usize;
716 if y > 0 {
717 if is_strong {
718 top_strength = 2;
719 } else if test_bit!(cur_dbk, bpos - 4) {
720 top_strength = 1;
721 } else {
722 top_strength = 0;
723 }
724 } else if row > 0 {
725 if top_is_strong {
726 top_strength = 2;
727 } else if test_bit!(top_dbk, bpos + 12) {
728 top_strength = 1;
729 } else {
730 top_strength = 0;
731 }
732 } else {
733 top_strength = 0;
734 }
735
736 let l_q = if x > 0 { q } else { left_q };
737 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
738
739 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
740 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
741 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
742 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
743
744 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
745
746 if test_bit!(y_h_deblock, bpos + 4) {
747 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
748 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
749 }
750 if test_bit!(y_v_deblock, bpos) && !ver_strong {
751 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
752 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
753 }
754 if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
755 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
756 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
757 }
758 if test_bit!(y_v_deblock, bpos) && ver_strong {
759 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
760 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
761 }
762 }
763 }
764
765 for comp in 1..3 {
766 let cshift = 16 - 4 + comp * 4;
767 let c_cur_cbp = (cur_cbp >> cshift) & 0xF;
768 let c_top_cbp = (top_cbp >> cshift) & 0xF;
769 let c_left_cbp = (left_cbp >> cshift) & 0xF;
770 let c_bot_cbp = (bot_cbp >> cshift) & 0xF;
771
772 let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
773 let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
774 let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
775 if mb_x == 0 {
776 c_v_deblock &= !C_LEFT_COL_MASK;
777 }
778 if row == 0 {
779 c_h_deblock &= !C_TOP_ROW_MASK;
780 }
781 if is_last_row || is_strong || bot_is_strong {
782 c_h_deblock &= !(C_TOP_ROW_MASK << 4);
783 }
784
785 for y in 0..2 {
786 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
787 for x in 0..2 {
788 let bpos = x + y * 2;
789
790 let ver_strong = (x == 0) && (is_strong || left_is_strong);
791
792 let cur_strength: usize;
793 if is_strong {
794 cur_strength = 2;
795 } else if test_bit!(c_cur_cbp, bpos) {
796 cur_strength = 1;
797 } else {
798 cur_strength = 0;
799 }
800
801 let left_strength: usize;
802 if x > 0 {
803 if is_strong {
804 left_strength = 2;
805 } else if test_bit!(c_cur_cbp, bpos - 1) {
806 left_strength = 1;
807 } else {
808 left_strength = 0;
809 }
810 } else if mb_x > 0 {
811 if left_is_strong {
812 left_strength = 2;
813 } else if test_bit!(c_left_cbp, bpos + 1) {
814 left_strength = 1;
815 } else {
816 left_strength = 0;
817 }
818 } else {
819 left_strength = 0;
820 }
821
822 let bot_strength: usize;
823 if y != 3 {
824 if is_strong {
825 bot_strength = 2;
826 } else if test_bit!(c_cur_cbp, bpos + 2) {
827 bot_strength = 1;
828 } else {
829 bot_strength = 0;
830 }
831 } else if !is_last_row {
832 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
833 bot_strength = 2;
834 } else if test_bit!(c_bot_cbp, x) {
835 bot_strength = 1;
836 } else {
837 bot_strength = 0;
838 }
839 } else {
840 bot_strength = 0;
841 }
842
843 let top_strength: usize;
844 if y > 0 {
845 if is_strong {
846 top_strength = 2;
847 } else if test_bit!(c_cur_cbp, bpos - 2) {
848 top_strength = 1;
849 } else {
850 top_strength = 0;
851 }
852 } else if row > 0 {
853 if top_is_strong {
854 top_strength = 2;
855 } else if test_bit!(c_top_cbp, bpos + 2) {
856 top_strength = 1;
857 } else {
858 top_strength = 0;
859 }
860 } else {
861 top_strength = 0;
862 }
863
864 let l_q = if x > 0 { q } else { left_q };
865 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
866
867 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
868 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
869 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
870 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
871
872 if test_bit!(c_h_deblock, bpos + 2) {
873 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
874 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
875 }
876 if test_bit!(c_v_deblock, bpos) && !ver_strong {
877 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
878 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
879 }
880 if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
881 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
882 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
883 }
884 if test_bit!(c_v_deblock, bpos) && ver_strong {
885 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
886 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
887 }
888 }
889 }
890 }
891
892 left_q = q;
893 left_dbk = cur_dbk;
894 left_cbp = cur_cbp;
895
896 mb_pos += 1;
897 }
898 }
899 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
900 let size: usize = if use16 { 16 } else { 8 };
901 let dstride = frame.get_stride(0);
902 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
903 let data = frame.get_data_mut().unwrap();
904 let dst: &mut [u8] = data.as_mut_slice();
905
906 let (w_, h_) = prev_frame.get_dimensions(0);
907 let w = (w_ + 15) & !15;
908 let h = (h_ + 15) & !15;
909
910 let dx = mv.x >> 2;
911 let cx = (mv.x & 3) as usize;
912 let dy = mv.y >> 2;
913 let cy = (mv.y & 3) as usize;
914 let mode = cx + cy * 4;
915
916 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
917 let sstride = prev_frame.get_stride(0);
918 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
919 let data = prev_frame.get_data();
920 let src: &[u8] = data.as_slice();
921 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
922 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
923 } else {
924 let mut ebuf: [u8; 32*22] = [0; 32*22];
925 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
926 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
927 }
928 }
929 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
930 let size: usize = if use8 { 8 } else { 4 };
931 let dstride = frame.get_stride(comp);
932 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
933 let data = frame.get_data_mut().unwrap();
934 let dst: &mut [u8] = data.as_mut_slice();
935
936 let (w_, h_) = prev_frame.get_dimensions(comp);
937 let w = (w_ + 7) & !7;
938 let h = (h_ + 7) & !7;
939
940 let mvx = mv.x / 2;
941 let mvy = mv.y / 2;
942 let dx = mvx >> 2;
943 let mut cx = (mvx & 3) as usize;
944 let dy = mvy >> 2;
945 let mut cy = (mvy & 3) as usize;
946
947 if (cx == 3) && (cy == 3) {
948 cx = 2;
949 cy = 2;
950 }
951
952 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
953 let sstride = prev_frame.get_stride(comp);
954 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
955 let data = prev_frame.get_data();
956 let src: &[u8] = data.as_slice();
957 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
958 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
959 } else {
960 let mut ebuf: [u8; 16*10] = [0; 16*10];
961 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
962 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
963 }
964 }
965 }