b40ef532a427b852652bad5249d198e998570f10
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_codec_support::codecs::MV;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14 }
15
16 macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
41 }
42
43 macro_rules! mc_func {
44 (copy; $name: ident, $size: expr) => (
45 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
46 for _ in 0..$size {
47 let d = &mut dst[didx..][..$size];
48 let s = &src[sidx..][..$size];
49 for x in 0..$size { d[x] = s[x]; }
50 didx += dstride;
51 sidx += sstride;
52 }
53 }
54 );
55 (mc01; $name: ident, $size: expr, $ver: expr) => (
56 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
57 let step = if $ver { sstride } else { 1 };
58 for _ in 0..$size {
59 for x in 0..$size {
60 dst[didx + x] = filter!(01; src, sidx + x, step);
61 }
62 sidx += sstride;
63 didx += dstride;
64 }
65 }
66 );
67 (mc02; $name: ident, $size: expr, $ver: expr) => (
68 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
69 let step = if $ver { sstride } else { 1 };
70 for _ in 0..$size {
71 for x in 0..$size {
72 dst[didx + x] = filter!(02; src, sidx + x, step);
73 }
74 sidx += sstride;
75 didx += dstride;
76 }
77 }
78 );
79 (mc03; $name: ident, $size: expr, $ver: expr) => (
80 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
81 let step = if $ver { sstride } else { 1 };
82 for _ in 0..$size {
83 for x in 0..$size {
84 dst[didx + x] = filter!(03; src, sidx + x, step);
85 }
86 sidx += sstride;
87 didx += dstride;
88 }
89 }
90 );
91 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
92 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
93 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
94 let mut bidx = 0;
95 let bstride = $size;
96 sidx -= sstride * 2;
97 for _ in 0..$size+5 {
98 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
99 bidx += bstride;
100 sidx += sstride;
101 }
102 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
103 }
104 );
105 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
106 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
107 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
108 let mut bidx = 0;
109 let bstride = $size;
110 sidx -= sstride * 2;
111 for _ in 0..$size+5 {
112 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
113 bidx += bstride;
114 sidx += sstride;
115 }
116 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
117 }
118 );
119 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
120 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
121 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
122 let mut bidx = 0;
123 let bstride = $size;
124 sidx -= sstride * 2;
125 for _ in 0..$size+5 {
126 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
127 bidx += bstride;
128 sidx += sstride;
129 }
130 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
131 }
132 );
133 }
134 mc_func!(copy; copy_16, 16);
135 mc_func!(copy; copy_8, 8);
136 mc_func!(mc01; luma_mc_10_16, 16, false);
137 mc_func!(mc01; luma_mc_10_8, 8, false);
138 mc_func!(mc02; luma_mc_20_16, 16, false);
139 mc_func!(mc02; luma_mc_20_8, 8, false);
140 mc_func!(mc03; luma_mc_30_16, 16, false);
141 mc_func!(mc03; luma_mc_30_8, 8, false);
142 mc_func!(mc01; luma_mc_01_16, 16, true);
143 mc_func!(mc01; luma_mc_01_8, 8, true);
144 mc_func!(mc02; luma_mc_02_16, 16, true);
145 mc_func!(mc02; luma_mc_02_8, 8, true);
146 mc_func!(mc03; luma_mc_03_16, 16, true);
147 mc_func!(mc03; luma_mc_03_8, 8, true);
148 mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
149 mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
150 mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
151 mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
152 mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
153 mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
154 mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
155 mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
156 mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
157 mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
158 mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
159 mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
160 mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
161 mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
162 mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
163 mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
164 mc_func!(cm03; luma_mc_33_16, 16, luma_mc_03_16);
165 mc_func!(cm03; luma_mc_33_8, 8, luma_mc_03_8);
166
167 const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
168 [ 0, 4, 8, 4 ],
169 [ 8, 7, 8, 7 ],
170 [ 0, 8, 4, 8 ],
171 [ 8, 7, 8, 7 ]
172 ];
173
174 fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
175 if (x == 0) && (y == 0) {
176 for _ in 0..size {
177 for x in 0..size { dst[didx + x] = src[sidx + x]; }
178 didx += dstride;
179 sidx += sstride;
180 }
181 return;
182 }
183 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
184 if (x > 0) && (y > 0) {
185 let a = ((4 - x) * (4 - y)) as u16;
186 let b = (( x) * (4 - y)) as u16;
187 let c = ((4 - x) * ( y)) as u16;
188 let d = (( x) * ( y)) as u16;
189 for _ in 0..size {
190 for x in 0..size {
191 dst[didx + x] = ((a * (src[sidx + x] as u16)
192 + b * (src[sidx + x + 1] as u16)
193 + c * (src[sidx + x + sstride] as u16)
194 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
195 }
196 didx += dstride;
197 sidx += sstride;
198 }
199 } else {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
202 let step = if y > 0 { sstride } else { 1 };
203 for _ in 0..size {
204 for x in 0..size {
205 dst[didx + x] = ((a * (src[sidx + x] as u16)
206 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
207 }
208 didx += dstride;
209 sidx += sstride;
210 }
211 }
212 }
213
214 pub struct RV40DSP {
215 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
216 }
217
218 impl RV40DSP {
219 pub fn new() -> Self {
220 RV40DSP {
221 luma_mc: [
222 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
223 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
224 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
225 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
226 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
227 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
228 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
229 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
230 }
231 }
232 }
233
234 macro_rules! el {
235 ($src: ident, $o: expr) => ($src[$o] as i16);
236 }
237
238 fn clip_symm(a: i16, lim: i16) -> i16 {
239 if a < -lim {
240 -lim
241 } else if a > lim {
242 lim
243 } else {
244 a
245 }
246 }
247
248 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
249 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
250 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
251 for _ in 0..4 {
252 let p0 = el!(pix, off - step);
253 let q0 = el!(pix, off);
254
255 let t = q0 - p0;
256 if t == 0 {
257 off += stride;
258 continue;
259 }
260
261 let u = (alpha * t.wrapping_abs()) >> 7;
262 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
263 off += stride;
264 continue;
265 }
266
267 let p2 = el!(pix, off - 3*step);
268 let p1 = el!(pix, off - 2*step);
269 let q1 = el!(pix, off + step);
270 let q2 = el!(pix, off + 2*step);
271
272 let str;
273 if filter_p1 && filter_q1 {
274 str = (t << 2) + (p1 - q1);
275 } else {
276 str = t << 2;
277 }
278
279 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
280 pix[off - step] = clip8(p0 + diff);
281 pix[off ] = clip8(q0 - diff);
282
283 if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) {
284 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
285 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
286 }
287
288 if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) {
289 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
290 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
291 }
292
293 off += stride;
294 }
295 }
296
297 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
298 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
299 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
300 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
301 }
302 #[allow(clippy::eq_op)]
303 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
304 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
305 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
306 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
307 for ch in src.chunks_mut(stride).take(4) {
308 assert!(ch.len() >= 3 + 3);
309 let p0 = el!(ch, 3 - 1);
310 let q0 = el!(ch, 3);
311
312 let t = q0 - p0;
313 if t == 0 {
314 continue;
315 }
316
317 let u = (alpha * t.wrapping_abs()) >> 7;
318 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
319 continue;
320 }
321
322 let p2 = el!(ch, 3 - 3);
323 let p1 = el!(ch, 3 - 2);
324 let q1 = el!(ch, 3 + 1);
325 let q2 = el!(ch, 3 + 2);
326
327 let str;
328 if filter_p1 && filter_q1 {
329 str = (t << 2) + (p1 - q1);
330 } else {
331 str = t << 2;
332 }
333
334 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
335 ch[3 - 1] = clip8(p0 + diff);
336 ch[3 ] = clip8(q0 - diff);
337
338 if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) {
339 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
340 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
341 }
342
343 if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) {
344 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
345 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
346 }
347 }
348 }
349
350
351 const RV40_DITHER_L: [i16; 16] = [
352 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
353 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
354 ];
355 const RV40_DITHER_R: [i16; 16] = [
356 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
357 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
358 ];
359
360 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
361 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
362 if clip {
363 if val < c - lims {
364 c - lims
365 } else if val > c + lims {
366 c + lims
367 } else {
368 c
369 }
370 } else {
371 val
372 }
373 }
374
375 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
376 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
377 for i in 0..4 {
378 let p0 = el!(pix, off - step);
379 let q0 = el!(pix, off);
380
381 let t = q0 - p0;
382 if t == 0 {
383 off += stride;
384 continue;
385 }
386
387 let fmode = (alpha * t.wrapping_abs()) >> 7;
388 if fmode > 1 {
389 off += stride;
390 continue;
391 }
392
393 let p3 = el!(pix, off - 4*step);
394 let p2 = el!(pix, off - 3*step);
395 let p1 = el!(pix, off - 2*step);
396 let q1 = el!(pix, off + step);
397 let q2 = el!(pix, off + 2*step);
398 let q3 = el!(pix, off + 3*step);
399
400 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
401 let nq0 = sfilter( p1, p0, q0, q1, q0, RV40_DITHER_R[dmode + i], fmode != 0, lims);
402
403 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
404 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
405
406 pix[off - 2*step] = np1 as u8;
407 pix[off - step] = np0 as u8;
408 pix[off] = nq0 as u8;
409 pix[off + step] = nq1 as u8;
410
411 if !chroma {
412 let np2 = sfilter(np0, np1, p2, p3, np1, 64, false, 0);
413 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
414 pix[off - 3*step] = np2 as u8;
415 pix[off + 2*step] = nq2 as u8;
416 }
417
418 off += stride;
419 }
420 }
421
422 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
423 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
424 let mut sum_p1p0 = 0;
425 let mut sum_q1q0 = 0;
426
427 let mut off1 = off;
428 for _ in 0..4 {
429 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
430 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
431 off1 += stride;
432 }
433
434 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
435 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
436
437 if (!filter_p1 || !filter_q1) || !edge {
438 return (false, filter_p1, filter_q1);
439 }
440
441 let mut sum_p1p2 = 0;
442 let mut sum_q1q2 = 0;
443
444 let mut off1 = off;
445 for _ in 0..4 {
446 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
447 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
448 off1 += stride;
449 }
450
451 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
452
453 (strong, filter_p1, filter_q1)
454 }
455
456 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
457 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
458 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
459 }
460
461 #[allow(clippy::eq_op)]
462 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
463 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
464 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
465 let mut sum_p1p0 = 0;
466 let mut sum_q1q0 = 0;
467
468 for ch in src.chunks(stride).take(4) {
469 assert!(ch.len() >= 3 + 3);
470 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
471 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
472 }
473
474 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
475 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
476
477 if (!filter_p1 || !filter_q1) || !edge {
478 return (false, filter_p1, filter_q1);
479 }
480
481 let mut sum_p1p2 = 0;
482 let mut sum_q1q2 = 0;
483
484 for ch in src.chunks(stride).take(4) {
485 assert!(ch.len() >= 3 + 3);
486 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
487 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
488 }
489
490 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
491
492 (strong, filter_p1, filter_q1)
493 }
494
495 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
496 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
497 chroma: bool, edge: bool) {
498 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
499 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
500
501 if strong {
502 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
503 } else if filter_p1 && filter_q1 {
504 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
505 lims, lim_p1, lim_q1);
506 } else if filter_p1 || filter_q1 {
507 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
508 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
509 }
510 }
511
512 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
513 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
514 chroma: bool, edge: bool) {
515 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
516 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
517
518 if strong {
519 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
520 } else if filter_p1 && filter_q1 {
521 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
522 lims, lim_p1, lim_q1);
523 } else if filter_p1 || filter_q1 {
524 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
525 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
526 }
527 }
528
529 const RV40_ALPHA_TAB: [i16; 32] = [
530 128, 128, 128, 128, 128, 128, 128, 128,
531 128, 128, 122, 96, 75, 59, 47, 37,
532 29, 23, 18, 15, 13, 11, 10, 9,
533 8, 7, 6, 5, 4, 3, 2, 1
534 ];
535
536 const RV40_BETA_TAB: [i16; 32] = [
537 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
538 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
539 ];
540
541 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
542 [
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
545 ], [
546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
547 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
548 ], [
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
550 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
551 ]
552 ];
553
554 macro_rules! test_bit {
555 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
556 }
557
558 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
559 let xn = (x as isize) + (dx as isize);
560 let yn = (y as isize) + (dy as isize);
561
562 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
563 }
564
565 const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
566 const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
567
568 impl RV34DSP for RV40DSP {
569 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
570 // todo proper B-frame filtering?
571 let mut offs: [usize; 3] = [0; 3];
572 let mut stride: [usize; 3] = [0; 3];
573 let (w, h) = frame.get_dimensions(0);
574 let small_frame = w * h <= 176*144;
575
576 for comp in 0..3 {
577 stride[comp] = frame.get_stride(comp);
578 let start = if comp == 0 { row * 16 } else { row * 8 };
579 offs[comp] = frame.get_offset(comp) + start * stride[comp];
580 }
581
582 let data = frame.get_data_mut().unwrap();
583 let dst: &mut [u8] = data.as_mut_slice();
584
585 let mut mb_pos: usize = row * mb_w;
586 let mut left_q: usize = 0;
587 for mb_x in 0..mb_w {
588 let q = mbinfo[mb_pos].q as usize;
589 let alpha = RV40_ALPHA_TAB[q];
590 let beta = RV40_BETA_TAB[q];
591 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
592 let beta_c = beta * 3;
593
594 let cur_dbk = mbinfo[mb_pos].deblock;
595 let cur_cbp = mbinfo[mb_pos].cbp_c;
596
597 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
598 let top_is_strong = is_strong || (row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16());
599 let left_is_strong = is_strong || (mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16());
600
601 for y in 0..4 {
602 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
603 for x in 0..4 {
604 let bpos = x + y * 4;
605 let filter_hor_down = (y != 3) && !is_strong;
606 let filter_ver = (x > 0) || (mb_x > 0);
607 let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
608 let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
609
610 let cur_strength: usize;
611 if is_strong {
612 cur_strength = 2;
613 } else if test_bit!(cur_dbk, bpos) {
614 cur_strength = 1;
615 } else {
616 cur_strength = 0;
617 }
618
619 let left_strength: usize;
620 if x > 0 {
621 if is_strong {
622 left_strength = 2;
623 } else if test_bit!(cur_dbk, bpos - 1) {
624 left_strength = 1;
625 } else {
626 left_strength = 0;
627 }
628 } else if mb_x > 0 {
629 if left_is_strong {
630 left_strength = 2;
631 } else if test_bit!(mbinfo[mb_pos - 1].deblock, bpos + 3) {
632 left_strength = 1;
633 } else {
634 left_strength = 0;
635 }
636 } else {
637 left_strength = 0;
638 }
639
640 let bot_strength: usize;
641 if y < 3 {
642 if is_strong {
643 bot_strength = 2;
644 } else if test_bit!(cur_dbk, bpos + 4) {
645 bot_strength = 1;
646 } else {
647 bot_strength = 0;
648 }
649 } else {
650 bot_strength = 0;
651 }
652
653 let top_strength: usize;
654 if y > 0 {
655 if is_strong {
656 top_strength = 2;
657 } else if test_bit!(cur_dbk, bpos - 4) {
658 top_strength = 1;
659 } else {
660 top_strength = 0;
661 }
662 } else if row > 0 {
663 if top_is_strong {
664 top_strength = 2;
665 } else if test_bit!(mbinfo[mb_pos - mb_w].deblock, bpos + 12) {
666 top_strength = 1;
667 } else {
668 top_strength = 0;
669 }
670 } else {
671 top_strength = 0;
672 }
673
674 let l_q = if x > 0 { q } else { left_q };
675 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
676
677 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
678 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
679 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
680 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
681
682 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
683
684 if filter_hor_down {
685 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
686 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
687 }
688 if filter_ver && !ver_strong {
689 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
690 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
691 }
692 if filter_hor_up {
693 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
694 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
695 }
696 if filter_ver && ver_strong {
697 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
698 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
699 }
700 }
701 }
702
703 for comp in 1..3 {
704 for y in 0..2 {
705 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
706 for x in 0..2 {
707 let bpos = x + y * 2 + (comp - 1) * 4;
708
709 let filter_hor_down = (y != 1) && !is_strong;
710 let filter_ver = (x > 0) || (mb_x > 0);
711 let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
712 let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
713
714 let cur_strength: usize;
715 if is_strong {
716 cur_strength = 2;
717 } else if test_bit!(cur_cbp, bpos) {
718 cur_strength = 1;
719 } else {
720 cur_strength = 0;
721 }
722
723 let left_strength: usize;
724 if x > 0 {
725 if is_strong {
726 left_strength = 2;
727 } else if test_bit!(cur_cbp, bpos - 1) {
728 left_strength = 1;
729 } else {
730 left_strength = 0;
731 }
732 } else if mb_x > 0 {
733 if left_is_strong {
734 left_strength = 2;
735 } else if test_bit!(mbinfo[mb_pos - 1].cbp_c, bpos + 1) {
736 left_strength = 1;
737 } else {
738 left_strength = 0;
739 }
740 } else {
741 left_strength = 0;
742 }
743
744 let bot_strength: usize;
745 if y == 0 {
746 if is_strong {
747 bot_strength = 2;
748 } else if test_bit!(cur_cbp, bpos + 2) {
749 bot_strength = 1;
750 } else {
751 bot_strength = 0;
752 }
753 } else {
754 bot_strength = 0;
755 }
756
757 let top_strength: usize;
758 if y > 0 {
759 if is_strong {
760 top_strength = 2;
761 } else if test_bit!(cur_cbp, bpos - 2) {
762 top_strength = 1;
763 } else {
764 top_strength = 0;
765 }
766 } else if row > 0 {
767 if top_is_strong {
768 top_strength = 2;
769 } else if test_bit!(mbinfo[mb_pos - mb_w].cbp_c, bpos + 2) {
770 top_strength = 1;
771 } else {
772 top_strength = 0;
773 }
774 } else {
775 top_strength = 0;
776 }
777
778 let l_q = if x > 0 { q } else { left_q };
779 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
780
781 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
782 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
783 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
784 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
785
786 if filter_hor_down {
787 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
788 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
789 }
790 if filter_ver && !ver_strong {
791 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
792 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
793 }
794 if filter_hor_up {
795 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
796 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
797 }
798 if filter_ver && ver_strong {
799 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
800 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
801 }
802 }
803 }
804 }
805
806 left_q = q;
807
808 mb_pos += 1;
809 }
810 }
811 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
812 let size: usize = if use16 { 16 } else { 8 };
813 let dstride = frame.get_stride(0);
814 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
815 let data = frame.get_data_mut().unwrap();
816 let dst: &mut [u8] = data.as_mut_slice();
817
818 let (w_, h_) = prev_frame.get_dimensions(0);
819 let w = (w_ + 15) & !15;
820 let h = (h_ + 15) & !15;
821
822 let dx = mv.x >> 2;
823 let cx = (mv.x & 3) as usize;
824 let dy = mv.y >> 2;
825 let cy = (mv.y & 3) as usize;
826 let mode = cx + cy * 4;
827
828 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
829 let sstride = prev_frame.get_stride(0);
830 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
831 let data = prev_frame.get_data();
832 let src: &[u8] = data.as_slice();
833 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
834 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
835 } else {
836 let mut ebuf: [u8; 32*22] = [0; 32*22];
837 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0);
838 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
839 }
840 }
841 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
842 let size: usize = if use8 { 8 } else { 4 };
843 let dstride = frame.get_stride(comp);
844 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
845 let data = frame.get_data_mut().unwrap();
846 let dst: &mut [u8] = data.as_mut_slice();
847
848 let (w_, h_) = prev_frame.get_dimensions(comp);
849 let w = (w_ + 7) & !7;
850 let h = (h_ + 7) & !7;
851
852 let mvx = mv.x / 2;
853 let mvy = mv.y / 2;
854 let dx = mvx >> 2;
855 let mut cx = (mvx & 3) as usize;
856 let dy = mvy >> 2;
857 let mut cy = (mvy & 3) as usize;
858
859 if (cx == 3) && (cy == 3) {
860 cx = 2;
861 cy = 2;
862 }
863
864 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
865 let sstride = prev_frame.get_stride(comp);
866 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
867 let data = prev_frame.get_data();
868 let src: &[u8] = data.as_slice();
869 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
870 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
871 } else {
872 let mut ebuf: [u8; 16*10] = [0; 16*10];
873 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp);
874 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
875 }
876 }
877 }