]> git.nihav.org Git - nihav.git/blob - nihav-realmedia/src/codecs/rv40dsp.rs
c2841dfc1ec71d2f638e39e371aeec15a04ea27c
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_core::codecs::MV;
3 use nihav_core::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14 }
15
16 macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
41 }
42
43 macro_rules! mc_func {
44 (copy; $name: ident, $size: expr) => (
45 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
46 for _ in 0..$size {
47 let d = &mut dst[didx..][..$size];
48 let s = &src[sidx..][..$size];
49 for x in 0..$size { d[x] = s[x]; }
50 didx += dstride;
51 sidx += sstride;
52 }
53 }
54 );
55 (mc01; $name: ident, $size: expr, $ver: expr) => (
56 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
57 let step = if $ver { sstride } else { 1 };
58 for _ in 0..$size {
59 for x in 0..$size {
60 dst[didx + x] = filter!(01; src, sidx + x, step);
61 }
62 sidx += sstride;
63 didx += dstride;
64 }
65 }
66 );
67 (mc02; $name: ident, $size: expr, $ver: expr) => (
68 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
69 let step = if $ver { sstride } else { 1 };
70 for _ in 0..$size {
71 for x in 0..$size {
72 dst[didx + x] = filter!(02; src, sidx + x, step);
73 }
74 sidx += sstride;
75 didx += dstride;
76 }
77 }
78 );
79 (mc03; $name: ident, $size: expr, $ver: expr) => (
80 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
81 let step = if $ver { sstride } else { 1 };
82 for _ in 0..$size {
83 for x in 0..$size {
84 dst[didx + x] = filter!(03; src, sidx + x, step);
85 }
86 sidx += sstride;
87 didx += dstride;
88 }
89 }
90 );
91 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
92 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
93 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
94 let mut bidx = 0;
95 let bstride = $size;
96 sidx -= sstride * 2;
97 for _ in 0..$size+5 {
98 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
99 bidx += bstride;
100 sidx += sstride;
101 }
102 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
103 }
104 );
105 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
106 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
107 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
108 let mut bidx = 0;
109 let bstride = $size;
110 sidx -= sstride * 2;
111 for _ in 0..$size+5 {
112 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
113 bidx += bstride;
114 sidx += sstride;
115 }
116 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
117 }
118 );
119 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
120 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
121 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
122 let mut bidx = 0;
123 let bstride = $size;
124 sidx -= sstride * 2;
125 for _ in 0..$size+5 {
126 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
127 bidx += bstride;
128 sidx += sstride;
129 }
130 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
131 }
132 );
133 }
134 mc_func!(copy; copy_16, 16);
135 mc_func!(copy; copy_8, 8);
136 mc_func!(mc01; luma_mc_10_16, 16, false);
137 mc_func!(mc01; luma_mc_10_8, 8, false);
138 mc_func!(mc02; luma_mc_20_16, 16, false);
139 mc_func!(mc02; luma_mc_20_8, 8, false);
140 mc_func!(mc03; luma_mc_30_16, 16, false);
141 mc_func!(mc03; luma_mc_30_8, 8, false);
142 mc_func!(mc01; luma_mc_01_16, 16, true);
143 mc_func!(mc01; luma_mc_01_8, 8, true);
144 mc_func!(mc02; luma_mc_02_16, 16, true);
145 mc_func!(mc02; luma_mc_02_8, 8, true);
146 mc_func!(mc03; luma_mc_03_16, 16, true);
147 mc_func!(mc03; luma_mc_03_8, 8, true);
148 mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
149 mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
150 mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
151 mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
152 mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
153 mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
154 mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
155 mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
156 mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
157 mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
158 mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
159 mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
160 mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
161 mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
162 mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
163 mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
164 mc_func!(cm03; luma_mc_33_16, 16, luma_mc_03_16);
165 mc_func!(cm03; luma_mc_33_8, 8, luma_mc_03_8);
166
167 const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
168 [ 0, 4, 8, 4 ],
169 [ 8, 7, 8, 7 ],
170 [ 0, 8, 4, 8 ],
171 [ 8, 7, 8, 7 ]
172 ];
173
174 fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
175 if (x == 0) && (y == 0) {
176 for _ in 0..size {
177 for x in 0..size { dst[didx + x] = src[sidx + x]; }
178 didx += dstride;
179 sidx += sstride;
180 }
181 return;
182 }
183 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
184 if (x > 0) && (y > 0) {
185 let a = ((4 - x) * (4 - y)) as u16;
186 let b = (( x) * (4 - y)) as u16;
187 let c = ((4 - x) * ( y)) as u16;
188 let d = (( x) * ( y)) as u16;
189 for _ in 0..size {
190 for x in 0..size {
191 dst[didx + x] = ((a * (src[sidx + x] as u16)
192 + b * (src[sidx + x + 1] as u16)
193 + c * (src[sidx + x + sstride] as u16)
194 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
195 }
196 didx += dstride;
197 sidx += sstride;
198 }
199 } else {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
202 let step = if y > 0 { sstride } else { 1 };
203 for _ in 0..size {
204 for x in 0..size {
205 dst[didx + x] = ((a * (src[sidx + x] as u16)
206 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
207 }
208 didx += dstride;
209 sidx += sstride;
210 }
211 }
212 }
213
214 pub struct RV40DSP {
215 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
216 }
217
218 impl RV40DSP {
219 pub fn new() -> Self {
220 RV40DSP {
221 luma_mc: [
222 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
223 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
224 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
225 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
226 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
227 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
228 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
229 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
230 }
231 }
232 }
233
234 macro_rules! el {
235 ($src: ident, $o: expr) => ($src[$o] as i16);
236 }
237
238 fn clip_symm(a: i16, lim: i16) -> i16 {
239 if a < -lim {
240 -lim
241 } else if a > lim {
242 lim
243 } else {
244 a
245 }
246 }
247
248 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
249 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
250 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
251 for _ in 0..4 {
252 let p0 = el!(pix, off - step);
253 let q0 = el!(pix, off);
254
255 let t = q0 - p0;
256 if t == 0 {
257 off += stride;
258 continue;
259 }
260
261 let u = (alpha * t.wrapping_abs()) >> 7;
262 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
263 off += stride;
264 continue;
265 }
266
267 let p2 = el!(pix, off - 3*step);
268 let p1 = el!(pix, off - 2*step);
269 let q1 = el!(pix, off + step);
270 let q2 = el!(pix, off + 2*step);
271
272 let str;
273 if filter_p1 && filter_q1 {
274 str = (t << 2) + (p1 - q1);
275 } else {
276 str = t << 2;
277 }
278
279 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
280 pix[off - step] = clip8(p0 + diff);
281 pix[off ] = clip8(q0 - diff);
282
283 if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) {
284 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
285 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
286 }
287
288 if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) {
289 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
290 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
291 }
292
293 off += stride;
294 }
295 }
296
297 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
298 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
299 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
300 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
301 }
302 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
303 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
304 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
305 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
306 for ch in src.chunks_mut(stride).take(4) {
307 assert!(ch.len() >= 3 + 3);
308 let p0 = el!(ch, 3 - 1);
309 let q0 = el!(ch, 3);
310
311 let t = q0 - p0;
312 if t == 0 {
313 continue;
314 }
315
316 let u = (alpha * t.wrapping_abs()) >> 7;
317 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
318 continue;
319 }
320
321 let p2 = el!(ch, 3 - 3);
322 let p1 = el!(ch, 3 - 2);
323 let q1 = el!(ch, 3 + 1);
324 let q2 = el!(ch, 3 + 2);
325
326 let str;
327 if filter_p1 && filter_q1 {
328 str = (t << 2) + (p1 - q1);
329 } else {
330 str = t << 2;
331 }
332
333 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
334 ch[3 - 1] = clip8(p0 + diff);
335 ch[3 ] = clip8(q0 - diff);
336
337 if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) {
338 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
339 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
340 }
341
342 if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) {
343 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
344 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
345 }
346 }
347 }
348
349
350 const RV40_DITHER_L: [i16; 16] = [
351 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
352 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
353 ];
354 const RV40_DITHER_R: [i16; 16] = [
355 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
356 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
357 ];
358
359 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
360 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
361 if clip {
362 if val < c - lims {
363 c - lims
364 } else if val > c + lims {
365 c + lims
366 } else {
367 c
368 }
369 } else {
370 val
371 }
372 }
373
374 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
375 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
376 for i in 0..4 {
377 let p0 = el!(pix, off - step);
378 let q0 = el!(pix, off);
379
380 let t = q0 - p0;
381 if t == 0 {
382 off += stride;
383 continue;
384 }
385
386 let fmode = (alpha * t.wrapping_abs()) >> 7;
387 if fmode > 1 {
388 off += stride;
389 continue;
390 }
391
392 let p3 = el!(pix, off - 4*step);
393 let p2 = el!(pix, off - 3*step);
394 let p1 = el!(pix, off - 2*step);
395 let q1 = el!(pix, off + step);
396 let q2 = el!(pix, off + 2*step);
397 let q3 = el!(pix, off + 3*step);
398
399 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
400 let nq0 = sfilter( p1, p0, q0, q1, q0, RV40_DITHER_R[dmode + i], fmode != 0, lims);
401
402 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
403 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
404
405 pix[off - 2*step] = np1 as u8;
406 pix[off - step] = np0 as u8;
407 pix[off] = nq0 as u8;
408 pix[off + step] = nq1 as u8;
409
410 if !chroma {
411 let np2 = sfilter(np0, np1, p2, p3, np1, 64, false, 0);
412 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
413 pix[off - 3*step] = np2 as u8;
414 pix[off + 2*step] = nq2 as u8;
415 }
416
417 off += stride;
418 }
419 }
420
421 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
422 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
423 let mut sum_p1p0 = 0;
424 let mut sum_q1q0 = 0;
425
426 let mut off1 = off;
427 for _ in 0..4 {
428 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
429 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
430 off1 += stride;
431 }
432
433 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
434 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
435
436 if (!filter_p1 || !filter_q1) || !edge {
437 return (false, filter_p1, filter_q1);
438 }
439
440 let mut sum_p1p2 = 0;
441 let mut sum_q1q2 = 0;
442
443 let mut off1 = off;
444 for _ in 0..4 {
445 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
446 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
447 off1 += stride;
448 }
449
450 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
451
452 (strong, filter_p1, filter_q1)
453 }
454
455 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
456 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
457 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
458 }
459
460 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
461 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
462 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
463 let mut sum_p1p0 = 0;
464 let mut sum_q1q0 = 0;
465
466 for ch in src.chunks(stride).take(4) {
467 assert!(ch.len() >= 3 + 3);
468 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
469 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
470 }
471
472 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
473 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
474
475 if (!filter_p1 || !filter_q1) || !edge {
476 return (false, filter_p1, filter_q1);
477 }
478
479 let mut sum_p1p2 = 0;
480 let mut sum_q1q2 = 0;
481
482 for ch in src.chunks(stride).take(4) {
483 assert!(ch.len() >= 3 + 3);
484 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
485 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
486 }
487
488 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
489
490 (strong, filter_p1, filter_q1)
491 }
492
493 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
494 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
495 chroma: bool, edge: bool) {
496 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
497 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
498
499 if strong {
500 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
501 } else if filter_p1 && filter_q1 {
502 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
503 lims, lim_p1, lim_q1);
504 } else if filter_p1 || filter_q1 {
505 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
506 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
507 }
508 }
509
510 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
511 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
512 chroma: bool, edge: bool) {
513 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
514 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
515
516 if strong {
517 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
518 } else if filter_p1 && filter_q1 {
519 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
520 lims, lim_p1, lim_q1);
521 } else if filter_p1 || filter_q1 {
522 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
523 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
524 }
525 }
526
527 const RV40_ALPHA_TAB: [i16; 32] = [
528 128, 128, 128, 128, 128, 128, 128, 128,
529 128, 128, 122, 96, 75, 59, 47, 37,
530 29, 23, 18, 15, 13, 11, 10, 9,
531 8, 7, 6, 5, 4, 3, 2, 1
532 ];
533
534 const RV40_BETA_TAB: [i16; 32] = [
535 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
536 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
537 ];
538
539 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
540 [
541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
542 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
543 ], [
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
546 ], [
547 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
548 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
549 ]
550 ];
551
552 macro_rules! test_bit {
553 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
554 }
555
556 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
557 let xn = (x as isize) + (dx as isize);
558 let yn = (y as isize) + (dy as isize);
559
560 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
561 }
562
563 const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
564 const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
565
566 impl RV34DSP for RV40DSP {
567 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, row: usize) {
568 // todo proper B-frame filtering?
569 let mut offs: [usize; 3] = [0; 3];
570 let mut stride: [usize; 3] = [0; 3];
571 let (w, h) = frame.get_dimensions(0);
572 let small_frame = w * h <= 176*144;
573
574 for comp in 0..3 {
575 stride[comp] = frame.get_stride(comp);
576 let start = if comp == 0 { row * 16 } else { row * 8 };
577 offs[comp] = frame.get_offset(comp) + start * stride[comp];
578 }
579
580 let data = frame.get_data_mut().unwrap();
581 let dst: &mut [u8] = data.as_mut_slice();
582
583 let mut mb_pos: usize = row * mb_w;
584 let mut left_q: usize = 0;
585 for mb_x in 0..mb_w {
586 let q = mbinfo[mb_pos].q as usize;
587 let alpha = RV40_ALPHA_TAB[q];
588 let beta = RV40_BETA_TAB[q];
589 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
590 let beta_c = beta * 3;
591
592 let cur_dbk = mbinfo[mb_pos].deblock;
593 let cur_cbp = mbinfo[mb_pos].cbp_c;
594
595 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
596 let top_is_strong = is_strong || (row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16());
597 let left_is_strong = is_strong || (mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16());
598
599 for y in 0..4 {
600 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
601 for x in 0..4 {
602 let bpos = x + y * 4;
603 let filter_hor_down = (y != 3) && !is_strong;
604 let filter_ver = (x > 0) || (mb_x > 0);
605 let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
606 let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
607
608 let cur_strength: usize;
609 if is_strong {
610 cur_strength = 2;
611 } else if test_bit!(cur_dbk, bpos) {
612 cur_strength = 1;
613 } else {
614 cur_strength = 0;
615 }
616
617 let left_strength: usize;
618 if x > 0 {
619 if is_strong {
620 left_strength = 2;
621 } else if test_bit!(cur_dbk, bpos - 1) {
622 left_strength = 1;
623 } else {
624 left_strength = 0;
625 }
626 } else if mb_x > 0 {
627 if left_is_strong {
628 left_strength = 2;
629 } else if test_bit!(mbinfo[mb_pos - 1].deblock, bpos + 3) {
630 left_strength = 1;
631 } else {
632 left_strength = 0;
633 }
634 } else {
635 left_strength = 0;
636 }
637
638 let bot_strength: usize;
639 if y < 3 {
640 if is_strong {
641 bot_strength = 2;
642 } else if test_bit!(cur_dbk, bpos + 4) {
643 bot_strength = 1;
644 } else {
645 bot_strength = 0;
646 }
647 } else {
648 bot_strength = 0;
649 }
650
651 let top_strength: usize;
652 if y > 0 {
653 if is_strong {
654 top_strength = 2;
655 } else if test_bit!(cur_dbk, bpos - 4) {
656 top_strength = 1;
657 } else {
658 top_strength = 0;
659 }
660 } else if row > 0 {
661 if top_is_strong {
662 top_strength = 2;
663 } else if test_bit!(mbinfo[mb_pos - mb_w].deblock, bpos + 12) {
664 top_strength = 1;
665 } else {
666 top_strength = 0;
667 }
668 } else {
669 top_strength = 0;
670 }
671
672 let l_q = if x > 0 { q } else { left_q };
673 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
674
675 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
676 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
677 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
678 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
679
680 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
681
682 if filter_hor_down {
683 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
684 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
685 }
686 if filter_ver && !ver_strong {
687 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
688 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
689 }
690 if filter_hor_up {
691 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
692 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
693 }
694 if filter_ver && ver_strong {
695 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
696 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
697 }
698 }
699 }
700
701 for comp in 1..3 {
702 for y in 0..2 {
703 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
704 for x in 0..2 {
705 let bpos = x + y * 2 + (comp - 1) * 4;
706
707 let filter_hor_down = (y != 1) && !is_strong;
708 let filter_ver = (x > 0) || (mb_x > 0);
709 let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
710 let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
711
712 let cur_strength: usize;
713 if is_strong {
714 cur_strength = 2;
715 } else if test_bit!(cur_cbp, bpos) {
716 cur_strength = 1;
717 } else {
718 cur_strength = 0;
719 }
720
721 let left_strength: usize;
722 if x > 0 {
723 if is_strong {
724 left_strength = 2;
725 } else if test_bit!(cur_cbp, bpos - 1) {
726 left_strength = 1;
727 } else {
728 left_strength = 0;
729 }
730 } else if mb_x > 0 {
731 if left_is_strong {
732 left_strength = 2;
733 } else if test_bit!(mbinfo[mb_pos - 1].cbp_c, bpos + 1) {
734 left_strength = 1;
735 } else {
736 left_strength = 0;
737 }
738 } else {
739 left_strength = 0;
740 }
741
742 let bot_strength: usize;
743 if y == 0 {
744 if is_strong {
745 bot_strength = 2;
746 } else if test_bit!(cur_cbp, bpos + 2) {
747 bot_strength = 1;
748 } else {
749 bot_strength = 0;
750 }
751 } else {
752 bot_strength = 0;
753 }
754
755 let top_strength: usize;
756 if y > 0 {
757 if is_strong {
758 top_strength = 2;
759 } else if test_bit!(cur_cbp, bpos - 2) {
760 top_strength = 1;
761 } else {
762 top_strength = 0;
763 }
764 } else if row > 0 {
765 if top_is_strong {
766 top_strength = 2;
767 } else if test_bit!(mbinfo[mb_pos - mb_w].cbp_c, bpos + 2) {
768 top_strength = 1;
769 } else {
770 top_strength = 0;
771 }
772 } else {
773 top_strength = 0;
774 }
775
776 let l_q = if x > 0 { q } else { left_q };
777 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
778
779 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
780 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
781 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
782 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
783
784 if filter_hor_down {
785 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
786 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
787 }
788 if filter_ver && !ver_strong {
789 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
790 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
791 }
792 if filter_hor_up {
793 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
794 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
795 }
796 if filter_ver && ver_strong {
797 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
798 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
799 }
800 }
801 }
802 }
803
804 left_q = q;
805
806 mb_pos += 1;
807 }
808 }
809 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
810 let size: usize = if use16 { 16 } else { 8 };
811 let dstride = frame.get_stride(0);
812 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
813 let data = frame.get_data_mut().unwrap();
814 let dst: &mut [u8] = data.as_mut_slice();
815
816 let (w_, h_) = prev_frame.get_dimensions(0);
817 let w = (w_ + 15) & !15;
818 let h = (h_ + 15) & !15;
819
820 let dx = mv.x >> 2;
821 let cx = (mv.x & 3) as usize;
822 let dy = mv.y >> 2;
823 let cy = (mv.y & 3) as usize;
824 let mode = cx + cy * 4;
825
826 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
827 let sstride = prev_frame.get_stride(0);
828 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
829 let data = prev_frame.get_data();
830 let src: &[u8] = data.as_slice();
831 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
832 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
833 } else {
834 let mut ebuf: [u8; 32*22] = [0; 32*22];
835 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0);
836 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
837 }
838 }
839 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
840 let size: usize = if use8 { 8 } else { 4 };
841 let dstride = frame.get_stride(comp);
842 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
843 let data = frame.get_data_mut().unwrap();
844 let dst: &mut [u8] = data.as_mut_slice();
845
846 let (w_, h_) = prev_frame.get_dimensions(comp);
847 let w = (w_ + 7) & !7;
848 let h = (h_ + 7) & !7;
849
850 let mvx = mv.x / 2;
851 let mvy = mv.y / 2;
852 let dx = mvx >> 2;
853 let mut cx = (mvx & 3) as usize;
854 let dy = mvy >> 2;
855 let mut cy = (mvy & 3) as usize;
856
857 if (cx == 3) && (cy == 3) {
858 cx = 2;
859 cy = 2;
860 }
861
862 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
863 let sstride = prev_frame.get_stride(comp);
864 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
865 let data = prev_frame.get_data();
866 let src: &[u8] = data.as_slice();
867 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
868 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
869 } else {
870 let mut ebuf: [u8; 16*10] = [0; 16*10];
871 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp);
872 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
873 }
874 }
875 }