8e080772fb7b19949251d5d7856ec21230095a66
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_codec_support::codecs::MV;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14 }
15
16 macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
41 (33; $s: ident, $o: expr, $stride: expr) => (
42 clip8((( el!($s, $o)
43 + el!($s, $o + 1)
44 + el!($s, $o + $stride)
45 + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
46 );
47 }
48
49 macro_rules! mc_func {
50 (copy; $name: ident, $size: expr) => (
51 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
52 for _ in 0..$size {
53 let d = &mut dst[didx..][..$size];
54 let s = &src[sidx..][..$size];
55 for x in 0..$size { d[x] = s[x]; }
56 didx += dstride;
57 sidx += sstride;
58 }
59 }
60 );
61 (mc01; $name: ident, $size: expr, $ver: expr) => (
62 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
63 let step = if $ver { sstride } else { 1 };
64 for _ in 0..$size {
65 for x in 0..$size {
66 dst[didx + x] = filter!(01; src, sidx + x, step);
67 }
68 sidx += sstride;
69 didx += dstride;
70 }
71 }
72 );
73 (mc02; $name: ident, $size: expr, $ver: expr) => (
74 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
75 let step = if $ver { sstride } else { 1 };
76 for _ in 0..$size {
77 for x in 0..$size {
78 dst[didx + x] = filter!(02; src, sidx + x, step);
79 }
80 sidx += sstride;
81 didx += dstride;
82 }
83 }
84 );
85 (mc03; $name: ident, $size: expr, $ver: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
87 let step = if $ver { sstride } else { 1 };
88 for _ in 0..$size {
89 for x in 0..$size {
90 dst[didx + x] = filter!(03; src, sidx + x, step);
91 }
92 sidx += sstride;
93 didx += dstride;
94 }
95 }
96 );
97 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
98 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
99 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
100 let mut bidx = 0;
101 let bstride = $size;
102 sidx -= sstride * 2;
103 for _ in 0..$size+5 {
104 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
105 bidx += bstride;
106 sidx += sstride;
107 }
108 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
109 }
110 );
111 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
112 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
113 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
114 let mut bidx = 0;
115 let bstride = $size;
116 sidx -= sstride * 2;
117 for _ in 0..$size+5 {
118 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
119 bidx += bstride;
120 sidx += sstride;
121 }
122 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
123 }
124 );
125 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
126 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
127 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
128 let mut bidx = 0;
129 let bstride = $size;
130 sidx -= sstride * 2;
131 for _ in 0..$size+5 {
132 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
133 bidx += bstride;
134 sidx += sstride;
135 }
136 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
137 }
138 );
139 (mc33; $name: ident, $size: expr) => (
140 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
141 for _ in 0..$size {
142 for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); }
143 sidx += sstride;
144 didx += dstride;
145 }
146 }
147 );
148 }
149 mc_func!(copy; copy_16, 16);
150 mc_func!(copy; copy_8, 8);
151 mc_func!(mc01; luma_mc_10_16, 16, false);
152 mc_func!(mc01; luma_mc_10_8, 8, false);
153 mc_func!(mc02; luma_mc_20_16, 16, false);
154 mc_func!(mc02; luma_mc_20_8, 8, false);
155 mc_func!(mc03; luma_mc_30_16, 16, false);
156 mc_func!(mc03; luma_mc_30_8, 8, false);
157 mc_func!(mc01; luma_mc_01_16, 16, true);
158 mc_func!(mc01; luma_mc_01_8, 8, true);
159 mc_func!(mc02; luma_mc_02_16, 16, true);
160 mc_func!(mc02; luma_mc_02_8, 8, true);
161 mc_func!(mc03; luma_mc_03_16, 16, true);
162 mc_func!(mc03; luma_mc_03_8, 8, true);
163 mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
164 mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
165 mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
166 mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
167 mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
168 mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
169 mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
170 mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
171 mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
172 mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
173 mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
174 mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
175 mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
176 mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
177 mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
178 mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
179 mc_func!(mc33; luma_mc_33_16, 16);
180 mc_func!(mc33; luma_mc_33_8, 8);
181
182 const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
183 [ 0, 4, 8, 4 ],
184 [ 8, 7, 8, 7 ],
185 [ 0, 8, 4, 8 ],
186 [ 8, 7, 8, 7 ]
187 ];
188
189 fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
190 if (x == 0) && (y == 0) {
191 for _ in 0..size {
192 for x in 0..size { dst[didx + x] = src[sidx + x]; }
193 didx += dstride;
194 sidx += sstride;
195 }
196 return;
197 }
198 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
199 if (x > 0) && (y > 0) {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let b = (( x) * (4 - y)) as u16;
202 let c = ((4 - x) * ( y)) as u16;
203 let d = (( x) * ( y)) as u16;
204 for _ in 0..size {
205 for x in 0..size {
206 dst[didx + x] = ((a * (src[sidx + x] as u16)
207 + b * (src[sidx + x + 1] as u16)
208 + c * (src[sidx + x + sstride] as u16)
209 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
210 }
211 didx += dstride;
212 sidx += sstride;
213 }
214 } else {
215 let a = ((4 - x) * (4 - y)) as u16;
216 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
217 let step = if y > 0 { sstride } else { 1 };
218 for _ in 0..size {
219 for x in 0..size {
220 dst[didx + x] = ((a * (src[sidx + x] as u16)
221 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
222 }
223 didx += dstride;
224 sidx += sstride;
225 }
226 }
227 }
228
229 pub struct RV40DSP {
230 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
231 }
232
233 impl RV40DSP {
234 pub fn new() -> Self {
235 RV40DSP {
236 luma_mc: [
237 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
238 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
239 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
240 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
241 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
242 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
243 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
244 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
245 }
246 }
247 }
248
249 macro_rules! el {
250 ($src: ident, $o: expr) => ($src[$o] as i16);
251 }
252
253 fn clip_symm(a: i16, lim: i16) -> i16 {
254 if a < -lim {
255 -lim
256 } else if a > lim {
257 lim
258 } else {
259 a
260 }
261 }
262
263 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
264 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
265 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
266 for _ in 0..4 {
267 let p0 = el!(pix, off - step);
268 let q0 = el!(pix, off);
269
270 let t = q0 - p0;
271 if t == 0 {
272 off += stride;
273 continue;
274 }
275
276 let u = (alpha * t.wrapping_abs()) >> 7;
277 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
278 off += stride;
279 continue;
280 }
281
282 let p2 = el!(pix, off - 3*step);
283 let p1 = el!(pix, off - 2*step);
284 let q1 = el!(pix, off + step);
285 let q2 = el!(pix, off + 2*step);
286
287 let str;
288 if filter_p1 && filter_q1 {
289 str = (t << 2) + (p1 - q1);
290 } else {
291 str = t << 2;
292 }
293
294 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
295 pix[off - step] = clip8(p0 + diff);
296 pix[off ] = clip8(q0 - diff);
297
298 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
299 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
300 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
301 }
302
303 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
304 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
305 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
306 }
307
308 off += stride;
309 }
310 }
311
312 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
313 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
314 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
315 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
316 }
317 #[allow(clippy::eq_op)]
318 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
319 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
320 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
321 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
322 for ch in src.chunks_mut(stride).take(4) {
323 assert!(ch.len() >= 3 + 3);
324 let p0 = el!(ch, 3 - 1);
325 let q0 = el!(ch, 3);
326
327 let t = q0 - p0;
328 if t == 0 {
329 continue;
330 }
331
332 let u = (alpha * t.wrapping_abs()) >> 7;
333 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
334 continue;
335 }
336
337 let p2 = el!(ch, 3 - 3);
338 let p1 = el!(ch, 3 - 2);
339 let q1 = el!(ch, 3 + 1);
340 let q2 = el!(ch, 3 + 2);
341
342 let str;
343 if filter_p1 && filter_q1 {
344 str = (t << 2) + (p1 - q1);
345 } else {
346 str = t << 2;
347 }
348
349 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
350 ch[3 - 1] = clip8(p0 + diff);
351 ch[3 ] = clip8(q0 - diff);
352
353 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
354 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
355 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
356 }
357
358 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
359 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
360 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
361 }
362 }
363 }
364
365
366 const RV40_DITHER_L: [i16; 16] = [
367 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
368 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
369 ];
370 const RV40_DITHER_R: [i16; 16] = [
371 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
372 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
373 ];
374
375 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
376 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
377 if clip {
378 if val < c - lims {
379 c - lims
380 } else if val > c + lims {
381 c + lims
382 } else {
383 val
384 }
385 } else {
386 val
387 }
388 }
389
390 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
391 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
392 for i in 0..4 {
393 let p0 = el!(pix, off - step);
394 let q0 = el!(pix, off);
395
396 let t = q0 - p0;
397 if t == 0 {
398 off += stride;
399 continue;
400 }
401
402 let fmode = (alpha * t.wrapping_abs()) >> 7;
403 if fmode > 1 {
404 off += stride;
405 continue;
406 }
407
408 let p3 = el!(pix, off - 4*step);
409 let p2 = el!(pix, off - 3*step);
410 let p1 = el!(pix, off - 2*step);
411 let q1 = el!(pix, off + step);
412 let q2 = el!(pix, off + 2*step);
413 let q3 = el!(pix, off + 3*step);
414
415 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
416 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
417
418 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
419 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
420
421 pix[off - 2*step] = np1 as u8;
422 pix[off - step] = np0 as u8;
423 pix[off] = nq0 as u8;
424 pix[off + step] = nq1 as u8;
425
426 if !chroma {
427 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
428 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
429 pix[off - 3*step] = np2 as u8;
430 pix[off + 2*step] = nq2 as u8;
431 }
432
433 off += stride;
434 }
435 }
436
437 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
438 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
439 let mut sum_p1p0 = 0;
440 let mut sum_q1q0 = 0;
441
442 let mut off1 = off;
443 for _ in 0..4 {
444 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
445 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
446 off1 += stride;
447 }
448
449 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
450 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
451
452 if (!filter_p1 || !filter_q1) || !edge {
453 return (false, filter_p1, filter_q1);
454 }
455
456 let mut sum_p1p2 = 0;
457 let mut sum_q1q2 = 0;
458
459 let mut off1 = off;
460 for _ in 0..4 {
461 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
462 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
463 off1 += stride;
464 }
465
466 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
467
468 (strong, filter_p1, filter_q1)
469 }
470
471 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
472 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
473 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
474 }
475
476 #[allow(clippy::eq_op)]
477 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
478 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
479 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
480 let mut sum_p1p0 = 0;
481 let mut sum_q1q0 = 0;
482
483 for ch in src.chunks(stride).take(4) {
484 assert!(ch.len() >= 3 + 3);
485 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
486 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
487 }
488
489 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
490 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
491
492 if (!filter_p1 || !filter_q1) || !edge {
493 return (false, filter_p1, filter_q1);
494 }
495
496 let mut sum_p1p2 = 0;
497 let mut sum_q1q2 = 0;
498
499 for ch in src.chunks(stride).take(4) {
500 assert!(ch.len() >= 3 + 3);
501 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
502 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
503 }
504
505 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
506
507 (strong, filter_p1, filter_q1)
508 }
509
510 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
511 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
512 chroma: bool, edge: bool) {
513 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
514 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
515
516 if strong {
517 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
518 } else if filter_p1 && filter_q1 {
519 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
520 lims, lim_p1, lim_q1);
521 } else if filter_p1 || filter_q1 {
522 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
523 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
524 }
525 }
526
527 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
528 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
529 chroma: bool, edge: bool) {
530 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
531 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
532
533 if strong {
534 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
535 } else if filter_p1 && filter_q1 {
536 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
537 lims, lim_p1, lim_q1);
538 } else if filter_p1 || filter_q1 {
539 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
540 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
541 }
542 }
543
544 const RV40_ALPHA_TAB: [i16; 32] = [
545 128, 128, 128, 128, 128, 128, 128, 128,
546 128, 128, 122, 96, 75, 59, 47, 37,
547 29, 23, 18, 15, 13, 11, 10, 9,
548 8, 7, 6, 5, 4, 3, 2, 1
549 ];
550
551 const RV40_BETA_TAB: [i16; 32] = [
552 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
553 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
554 ];
555
556 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
557 [
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
560 ], [
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
562 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
563 ], [
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
565 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
566 ]
567 ];
568
569 macro_rules! test_bit {
570 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
571 }
572
573 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
574 let xn = (x as isize) + (dx as isize);
575 let yn = (y as isize) + (dy as isize);
576
577 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
578 }
579
580 const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
581 const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
582
583 impl RV34DSP for RV40DSP {
584 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
585 // todo proper B-frame filtering?
586 let mut offs: [usize; 3] = [0; 3];
587 let mut stride: [usize; 3] = [0; 3];
588 let (w, h) = frame.get_dimensions(0);
589 let small_frame = w * h <= 176*144;
590
591 for comp in 0..3 {
592 stride[comp] = frame.get_stride(comp);
593 let start = if comp == 0 { row * 16 } else { row * 8 };
594 offs[comp] = frame.get_offset(comp) + start * stride[comp];
595 }
596
597 let data = frame.get_data_mut().unwrap();
598 let dst: &mut [u8] = data.as_mut_slice();
599
600 let mut mb_pos: usize = row * mb_w;
601 let mut left_q: usize = 0;
602 for mb_x in 0..mb_w {
603 let q = mbinfo[mb_pos].q as usize;
604 let alpha = RV40_ALPHA_TAB[q];
605 let beta = RV40_BETA_TAB[q];
606 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
607 let beta_c = beta * 3;
608
609 let cur_dbk = mbinfo[mb_pos].deblock;
610 let cur_cbp = mbinfo[mb_pos].cbp_c;
611
612 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
613 let top_is_strong = is_strong || (row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16());
614 let left_is_strong = is_strong || (mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16());
615
616 for y in 0..4 {
617 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
618 for x in 0..4 {
619 let bpos = x + y * 4;
620 let filter_hor_down = (y != 3) && !is_strong;
621 let filter_ver = (x > 0) || (mb_x > 0);
622 let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
623 let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
624
625 let cur_strength: usize;
626 if is_strong {
627 cur_strength = 2;
628 } else if test_bit!(cur_dbk, bpos) {
629 cur_strength = 1;
630 } else {
631 cur_strength = 0;
632 }
633
634 let left_strength: usize;
635 if x > 0 {
636 if is_strong {
637 left_strength = 2;
638 } else if test_bit!(cur_dbk, bpos - 1) {
639 left_strength = 1;
640 } else {
641 left_strength = 0;
642 }
643 } else if mb_x > 0 {
644 if left_is_strong {
645 left_strength = 2;
646 } else if test_bit!(mbinfo[mb_pos - 1].deblock, bpos + 3) {
647 left_strength = 1;
648 } else {
649 left_strength = 0;
650 }
651 } else {
652 left_strength = 0;
653 }
654
655 let bot_strength: usize;
656 if y < 3 {
657 if is_strong {
658 bot_strength = 2;
659 } else if test_bit!(cur_dbk, bpos + 4) {
660 bot_strength = 1;
661 } else {
662 bot_strength = 0;
663 }
664 } else {
665 bot_strength = 0;
666 }
667
668 let top_strength: usize;
669 if y > 0 {
670 if is_strong {
671 top_strength = 2;
672 } else if test_bit!(cur_dbk, bpos - 4) {
673 top_strength = 1;
674 } else {
675 top_strength = 0;
676 }
677 } else if row > 0 {
678 if top_is_strong {
679 top_strength = 2;
680 } else if test_bit!(mbinfo[mb_pos - mb_w].deblock, bpos + 12) {
681 top_strength = 1;
682 } else {
683 top_strength = 0;
684 }
685 } else {
686 top_strength = 0;
687 }
688
689 let l_q = if x > 0 { q } else { left_q };
690 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
691
692 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
693 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
694 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
695 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
696
697 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
698
699 if filter_hor_down {
700 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
701 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
702 }
703 if filter_ver && !ver_strong {
704 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
705 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
706 }
707 if filter_hor_up {
708 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
709 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
710 }
711 if filter_ver && ver_strong {
712 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
713 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
714 }
715 }
716 }
717
718 for comp in 1..3 {
719 for y in 0..2 {
720 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
721 for x in 0..2 {
722 let bpos = x + y * 2 + (comp - 1) * 4;
723
724 let filter_hor_down = (y != 1) && !is_strong;
725 let filter_ver = (x > 0) || (mb_x > 0);
726 let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
727 let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
728
729 let cur_strength: usize;
730 if is_strong {
731 cur_strength = 2;
732 } else if test_bit!(cur_cbp, bpos) {
733 cur_strength = 1;
734 } else {
735 cur_strength = 0;
736 }
737
738 let left_strength: usize;
739 if x > 0 {
740 if is_strong {
741 left_strength = 2;
742 } else if test_bit!(cur_cbp, bpos - 1) {
743 left_strength = 1;
744 } else {
745 left_strength = 0;
746 }
747 } else if mb_x > 0 {
748 if left_is_strong {
749 left_strength = 2;
750 } else if test_bit!(mbinfo[mb_pos - 1].cbp_c, bpos + 1) {
751 left_strength = 1;
752 } else {
753 left_strength = 0;
754 }
755 } else {
756 left_strength = 0;
757 }
758
759 let bot_strength: usize;
760 if y == 0 {
761 if is_strong {
762 bot_strength = 2;
763 } else if test_bit!(cur_cbp, bpos + 2) {
764 bot_strength = 1;
765 } else {
766 bot_strength = 0;
767 }
768 } else {
769 bot_strength = 0;
770 }
771
772 let top_strength: usize;
773 if y > 0 {
774 if is_strong {
775 top_strength = 2;
776 } else if test_bit!(cur_cbp, bpos - 2) {
777 top_strength = 1;
778 } else {
779 top_strength = 0;
780 }
781 } else if row > 0 {
782 if top_is_strong {
783 top_strength = 2;
784 } else if test_bit!(mbinfo[mb_pos - mb_w].cbp_c, bpos + 2) {
785 top_strength = 1;
786 } else {
787 top_strength = 0;
788 }
789 } else {
790 top_strength = 0;
791 }
792
793 let l_q = if x > 0 { q } else { left_q };
794 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
795
796 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
797 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
798 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
799 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
800
801 if filter_hor_down {
802 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
803 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
804 }
805 if filter_ver && !ver_strong {
806 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
807 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
808 }
809 if filter_hor_up {
810 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
811 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
812 }
813 if filter_ver && ver_strong {
814 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
815 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
816 }
817 }
818 }
819 }
820
821 left_q = q;
822
823 mb_pos += 1;
824 }
825 }
826 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
827 let size: usize = if use16 { 16 } else { 8 };
828 let dstride = frame.get_stride(0);
829 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
830 let data = frame.get_data_mut().unwrap();
831 let dst: &mut [u8] = data.as_mut_slice();
832
833 let (w_, h_) = prev_frame.get_dimensions(0);
834 let w = (w_ + 15) & !15;
835 let h = (h_ + 15) & !15;
836
837 let dx = mv.x >> 2;
838 let cx = (mv.x & 3) as usize;
839 let dy = mv.y >> 2;
840 let cy = (mv.y & 3) as usize;
841 let mode = cx + cy * 4;
842
843 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
844 let sstride = prev_frame.get_stride(0);
845 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
846 let data = prev_frame.get_data();
847 let src: &[u8] = data.as_slice();
848 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
849 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
850 } else {
851 let mut ebuf: [u8; 32*22] = [0; 32*22];
852 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
853 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
854 }
855 }
856 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
857 let size: usize = if use8 { 8 } else { 4 };
858 let dstride = frame.get_stride(comp);
859 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
860 let data = frame.get_data_mut().unwrap();
861 let dst: &mut [u8] = data.as_mut_slice();
862
863 let (w_, h_) = prev_frame.get_dimensions(comp);
864 let w = (w_ + 7) & !7;
865 let h = (h_ + 7) & !7;
866
867 let mvx = mv.x / 2;
868 let mvy = mv.y / 2;
869 let dx = mvx >> 2;
870 let mut cx = (mvx & 3) as usize;
871 let dy = mvy >> 2;
872 let mut cy = (mvy & 3) as usize;
873
874 if (cx == 3) && (cy == 3) {
875 cx = 2;
876 cy = 2;
877 }
878
879 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
880 let sstride = prev_frame.get_stride(comp);
881 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
882 let data = prev_frame.get_data();
883 let src: &[u8] = data.as_slice();
884 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
885 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
886 } else {
887 let mut ebuf: [u8; 16*10] = [0; 16*10];
888 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
889 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
890 }
891 }
892 }