]> git.nihav.org Git - nihav.git/blame - nihav-realmedia/src/codecs/rv40dsp.rs
rv20: fix coefficient quantisation
[nihav.git] / nihav-realmedia / src / codecs / rv40dsp.rs
CommitLineData
5641dccf 1use nihav_core::frame::{FrameType, NAVideoBuffer};
b4d5b851
KS
2use nihav_codec_support::codecs::MV;
3use nihav_codec_support::codecs::blockdsp::edge_emu;
47527732
KS
4use super::rv3040::{RV34DSP, RV34MBInfo};
5
6fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10}
11
12macro_rules! el {
13 ($s: ident, $o: expr) => ( $s[$o] as i16 )
14}
15
16macro_rules! filter {
17 (01; $s: ident, $o: expr, $step: expr) => (
18 clip8((( el!($s, $o - 2 * $step)
19 -5 * el!($s, $o - 1 * $step)
20 +52 * el!($s, $o - 0 * $step)
21 +20 * el!($s, $o + 1 * $step)
22 -5 * el!($s, $o + 2 * $step)
23 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
24 );
25 (02; $s: ident, $o: expr, $step: expr) => (
26 clip8((( el!($s, $o - 2 * $step)
27 -5 * el!($s, $o - 1 * $step)
28 +20 * el!($s, $o - 0 * $step)
29 +20 * el!($s, $o + 1 * $step)
30 -5 * el!($s, $o + 2 * $step)
31 + el!($s, $o + 3 * $step) + 16) >> 5) as i16)
32 );
33 (03; $s: ident, $o: expr, $step: expr) => (
34 clip8((( el!($s, $o - 2 * $step)
35 -5 * el!($s, $o - 1 * $step)
36 +20 * el!($s, $o - 0 * $step)
37 +52 * el!($s, $o + 1 * $step)
38 -5 * el!($s, $o + 2 * $step)
39 + el!($s, $o + 3 * $step) + 32) >> 6) as i16)
40 );
8877a319
KS
41 (33; $s: ident, $o: expr, $stride: expr) => (
42 clip8((( el!($s, $o)
43 + el!($s, $o + 1)
44 + el!($s, $o + $stride)
45 + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
46 );
47527732
KS
47}
48
49macro_rules! mc_func {
50 (copy; $name: ident, $size: expr) => (
51 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
52 for _ in 0..$size {
53 let d = &mut dst[didx..][..$size];
54 let s = &src[sidx..][..$size];
55 for x in 0..$size { d[x] = s[x]; }
56 didx += dstride;
57 sidx += sstride;
58 }
59 }
60 );
61 (mc01; $name: ident, $size: expr, $ver: expr) => (
62 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
63 let step = if $ver { sstride } else { 1 };
64 for _ in 0..$size {
65 for x in 0..$size {
66 dst[didx + x] = filter!(01; src, sidx + x, step);
67 }
68 sidx += sstride;
69 didx += dstride;
70 }
71 }
72 );
73 (mc02; $name: ident, $size: expr, $ver: expr) => (
74 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
75 let step = if $ver { sstride } else { 1 };
76 for _ in 0..$size {
77 for x in 0..$size {
78 dst[didx + x] = filter!(02; src, sidx + x, step);
79 }
80 sidx += sstride;
81 didx += dstride;
82 }
83 }
84 );
85 (mc03; $name: ident, $size: expr, $ver: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
87 let step = if $ver { sstride } else { 1 };
88 for _ in 0..$size {
89 for x in 0..$size {
90 dst[didx + x] = filter!(03; src, sidx + x, step);
91 }
92 sidx += sstride;
93 didx += dstride;
94 }
95 }
96 );
97 (cm01; $name: ident, $size: expr, $ofilt: ident) => (
98 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
99 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
100 let mut bidx = 0;
101 let bstride = $size;
102 sidx -= sstride * 2;
103 for _ in 0..$size+5 {
104 for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); }
105 bidx += bstride;
106 sidx += sstride;
107 }
108 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
109 }
110 );
111 (cm02; $name: ident, $size: expr, $ofilt: ident) => (
112 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
113 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
114 let mut bidx = 0;
115 let bstride = $size;
116 sidx -= sstride * 2;
117 for _ in 0..$size+5 {
118 for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); }
119 bidx += bstride;
120 sidx += sstride;
121 }
122 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
123 }
124 );
125 (cm03; $name: ident, $size: expr, $ofilt: ident) => (
126 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
127 let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size];
128 let mut bidx = 0;
129 let bstride = $size;
130 sidx -= sstride * 2;
131 for _ in 0..$size+5 {
132 for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); }
133 bidx += bstride;
134 sidx += sstride;
135 }
136 $ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
137 }
138 );
8877a319
KS
139 (mc33; $name: ident, $size: expr) => (
140 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
141 for _ in 0..$size {
142 for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); }
143 sidx += sstride;
144 didx += dstride;
145 }
146 }
147 );
47527732
KS
148}
149mc_func!(copy; copy_16, 16);
150mc_func!(copy; copy_8, 8);
151mc_func!(mc01; luma_mc_10_16, 16, false);
152mc_func!(mc01; luma_mc_10_8, 8, false);
153mc_func!(mc02; luma_mc_20_16, 16, false);
154mc_func!(mc02; luma_mc_20_8, 8, false);
155mc_func!(mc03; luma_mc_30_16, 16, false);
156mc_func!(mc03; luma_mc_30_8, 8, false);
157mc_func!(mc01; luma_mc_01_16, 16, true);
158mc_func!(mc01; luma_mc_01_8, 8, true);
159mc_func!(mc02; luma_mc_02_16, 16, true);
160mc_func!(mc02; luma_mc_02_8, 8, true);
161mc_func!(mc03; luma_mc_03_16, 16, true);
162mc_func!(mc03; luma_mc_03_8, 8, true);
163mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16);
164mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8);
165mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16);
166mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8);
167mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16);
168mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8);
169mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16);
170mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8);
171mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16);
172mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8);
173mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16);
174mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8);
175mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16);
176mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
177mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
178mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
8877a319
KS
179mc_func!(mc33; luma_mc_33_16, 16);
180mc_func!(mc33; luma_mc_33_8, 8);
47527732
KS
181
182const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
183 [ 0, 4, 8, 4 ],
184 [ 8, 7, 8, 7 ],
185 [ 0, 8, 4, 8 ],
186 [ 8, 7, 8, 7 ]
187];
188
189fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
190 if (x == 0) && (y == 0) {
191 for _ in 0..size {
192 for x in 0..size { dst[didx + x] = src[sidx + x]; }
193 didx += dstride;
194 sidx += sstride;
195 }
196 return;
197 }
198 let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1];
199 if (x > 0) && (y > 0) {
200 let a = ((4 - x) * (4 - y)) as u16;
201 let b = (( x) * (4 - y)) as u16;
202 let c = ((4 - x) * ( y)) as u16;
203 let d = (( x) * ( y)) as u16;
204 for _ in 0..size {
205 for x in 0..size {
206 dst[didx + x] = ((a * (src[sidx + x] as u16)
207 + b * (src[sidx + x + 1] as u16)
208 + c * (src[sidx + x + sstride] as u16)
209 + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8;
210 }
211 didx += dstride;
212 sidx += sstride;
213 }
214 } else {
215 let a = ((4 - x) * (4 - y)) as u16;
216 let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16;
217 let step = if y > 0 { sstride } else { 1 };
218 for _ in 0..size {
219 for x in 0..size {
220 dst[didx + x] = ((a * (src[sidx + x] as u16)
221 + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8;
222 }
223 didx += dstride;
224 sidx += sstride;
225 }
226 }
227}
228
229pub struct RV40DSP {
230 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2],
231}
232
233impl RV40DSP {
234 pub fn new() -> Self {
235 RV40DSP {
236 luma_mc: [
1a151e53 237 [ copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16,
47527732
KS
238 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16,
239 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16,
240 luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 ],
241 [ copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8,
242 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8,
243 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8,
244 luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 ] ],
245 }
246 }
247}
248
249macro_rules! el {
250 ($src: ident, $o: expr) => ($src[$o] as i16);
251}
252
253fn clip_symm(a: i16, lim: i16) -> i16 {
254 if a < -lim {
255 -lim
256 } else if a > lim {
257 lim
258 } else {
259 a
260 }
261}
262
263fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
264 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
265 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
266 for _ in 0..4 {
267 let p0 = el!(pix, off - step);
268 let q0 = el!(pix, off);
269
270 let t = q0 - p0;
271 if t == 0 {
272 off += stride;
273 continue;
274 }
275
276 let u = (alpha * t.wrapping_abs()) >> 7;
277 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
278 off += stride;
279 continue;
280 }
281
282 let p2 = el!(pix, off - 3*step);
283 let p1 = el!(pix, off - 2*step);
284 let q1 = el!(pix, off + step);
285 let q2 = el!(pix, off + 2*step);
286
287 let str;
288 if filter_p1 && filter_q1 {
289 str = (t << 2) + (p1 - q1);
290 } else {
291 str = t << 2;
292 }
293
294 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
295 pix[off - step] = clip8(p0 + diff);
296 pix[off ] = clip8(q0 - diff);
297
bb0e22f7 298 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
47527732
KS
299 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
300 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
301 }
302
bb0e22f7 303 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
47527732
KS
304 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
305 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
306 }
307
308 off += stride;
309 }
310}
311
312fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
313 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
314 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
315 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
316}
61d3e294 317#[allow(clippy::eq_op)]
47527732
KS
318fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
319 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
320 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
321 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
322 for ch in src.chunks_mut(stride).take(4) {
323 assert!(ch.len() >= 3 + 3);
324 let p0 = el!(ch, 3 - 1);
325 let q0 = el!(ch, 3);
326
327 let t = q0 - p0;
328 if t == 0 {
329 continue;
330 }
331
332 let u = (alpha * t.wrapping_abs()) >> 7;
333 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
334 continue;
335 }
336
337 let p2 = el!(ch, 3 - 3);
338 let p1 = el!(ch, 3 - 2);
339 let q1 = el!(ch, 3 + 1);
340 let q2 = el!(ch, 3 + 2);
341
342 let str;
343 if filter_p1 && filter_q1 {
344 str = (t << 2) + (p1 - q1);
345 } else {
346 str = t << 2;
347 }
348
349 let diff = clip_symm((str + 4) >> 3, lim_p0q0);
350 ch[3 - 1] = clip8(p0 + diff);
351 ch[3 ] = clip8(q0 - diff);
352
bb0e22f7 353 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
47527732
KS
354 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
355 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
356 }
357
bb0e22f7 358 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
47527732
KS
359 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
360 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
361 }
362 }
363}
364
365
366const RV40_DITHER_L: [i16; 16] = [
367 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
368 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
369];
370const RV40_DITHER_R: [i16; 16] = [
371 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
372 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
373];
374
375fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
376 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
377 if clip {
378 if val < c - lims {
379 c - lims
380 } else if val > c + lims {
381 c + lims
382 } else {
f84129ed 383 val
47527732
KS
384 }
385 } else {
386 val
387 }
388}
389
390fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
391 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
392 for i in 0..4 {
393 let p0 = el!(pix, off - step);
394 let q0 = el!(pix, off);
395
396 let t = q0 - p0;
397 if t == 0 {
398 off += stride;
399 continue;
400 }
401
402 let fmode = (alpha * t.wrapping_abs()) >> 7;
403 if fmode > 1 {
404 off += stride;
405 continue;
406 }
407
408 let p3 = el!(pix, off - 4*step);
409 let p2 = el!(pix, off - 3*step);
410 let p1 = el!(pix, off - 2*step);
411 let q1 = el!(pix, off + step);
412 let q2 = el!(pix, off + 2*step);
413 let q3 = el!(pix, off + 3*step);
414
415 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
bb0e22f7 416 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
47527732
KS
417
418 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
419 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
420
421 pix[off - 2*step] = np1 as u8;
422 pix[off - step] = np0 as u8;
423 pix[off] = nq0 as u8;
424 pix[off + step] = nq1 as u8;
425
426 if !chroma {
bb0e22f7
KS
427 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
428 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
47527732
KS
429 pix[off - 3*step] = np2 as u8;
430 pix[off + 2*step] = nq2 as u8;
431 }
432
433 off += stride;
434 }
435}
436
437fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
438 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
439 let mut sum_p1p0 = 0;
440 let mut sum_q1q0 = 0;
441
442 let mut off1 = off;
443 for _ in 0..4 {
444 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
445 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
446 off1 += stride;
447 }
448
449 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
450 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
451
452 if (!filter_p1 || !filter_q1) || !edge {
453 return (false, filter_p1, filter_q1);
454 }
455
456 let mut sum_p1p2 = 0;
457 let mut sum_q1q2 = 0;
458
459 let mut off1 = off;
460 for _ in 0..4 {
461 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
462 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
463 off1 += stride;
464 }
465
466 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
467
468 (strong, filter_p1, filter_q1)
469}
470
471fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
472 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
473 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
474}
475
61d3e294 476#[allow(clippy::eq_op)]
47527732
KS
477fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
478 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
479 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
480 let mut sum_p1p0 = 0;
481 let mut sum_q1q0 = 0;
482
483 for ch in src.chunks(stride).take(4) {
484 assert!(ch.len() >= 3 + 3);
485 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
486 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
487 }
488
489 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
490 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
491
492 if (!filter_p1 || !filter_q1) || !edge {
493 return (false, filter_p1, filter_q1);
494 }
495
496 let mut sum_p1p2 = 0;
497 let mut sum_q1q2 = 0;
498
499 for ch in src.chunks(stride).take(4) {
500 assert!(ch.len() >= 3 + 3);
501 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
502 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
503 }
504
505 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
506
507 (strong, filter_p1, filter_q1)
508}
509
510fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
511 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
512 chroma: bool, edge: bool) {
513 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
514 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
515
516 if strong {
517 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
518 } else if filter_p1 && filter_q1 {
519 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
520 lims, lim_p1, lim_q1);
521 } else if filter_p1 || filter_q1 {
522 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
523 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
524 }
525}
526
527fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
528 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
529 chroma: bool, edge: bool) {
530 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
531 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
532
533 if strong {
534 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
535 } else if filter_p1 && filter_q1 {
536 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
537 lims, lim_p1, lim_q1);
538 } else if filter_p1 || filter_q1 {
539 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
540 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
541 }
542}
543
544const RV40_ALPHA_TAB: [i16; 32] = [
545 128, 128, 128, 128, 128, 128, 128, 128,
546 128, 128, 122, 96, 75, 59, 47, 37,
547 29, 23, 18, 15, 13, 11, 10, 9,
548 8, 7, 6, 5, 4, 3, 2, 1
549];
550
551const RV40_BETA_TAB: [i16; 32] = [
552 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
553 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
554];
555
556const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
557 [
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
560 ], [
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
562 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
563 ], [
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
565 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9
566 ]
567];
568
569macro_rules! test_bit {
570 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
571}
572
573fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
574 let xn = (x as isize) + (dx as isize);
575 let yn = (y as isize) + (dy as isize);
576
577 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
578}
579
580const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
581const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
582
d14e5288
KS
583const Y_TOP_ROW_MASK: u32 = 0x000F;
584const Y_BOT_ROW_MASK: u32 = 0xF000;
585const Y_LEFT_COL_MASK: u32 = 0x1111;
586const Y_RIGHT_COL_MASK: u32 = 0x8888;
587const C_TOP_ROW_MASK: u32 = 0x3;
588const C_BOT_ROW_MASK: u32 = 0xC;
589const C_LEFT_COL_MASK: u32 = 0x5;
590const C_RIGHT_COL_MASK: u32 = 0xA;
591
47527732 592impl RV34DSP for RV40DSP {
16cbd8c0 593 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
47527732
KS
594 // todo proper B-frame filtering?
595 let mut offs: [usize; 3] = [0; 3];
596 let mut stride: [usize; 3] = [0; 3];
597 let (w, h) = frame.get_dimensions(0);
598 let small_frame = w * h <= 176*144;
599
600 for comp in 0..3 {
601 stride[comp] = frame.get_stride(comp);
602 let start = if comp == 0 { row * 16 } else { row * 8 };
603 offs[comp] = frame.get_offset(comp) + start * stride[comp];
604 }
605
1a967e6b 606 let data = frame.get_data_mut().unwrap();
47527732
KS
607 let dst: &mut [u8] = data.as_mut_slice();
608
d14e5288
KS
609 let is_last_row = row == mb_h - 1;
610
47527732
KS
611 let mut mb_pos: usize = row * mb_w;
612 let mut left_q: usize = 0;
d14e5288
KS
613 let mut left_cbp = 0;
614 let mut left_dbk = 0;
47527732
KS
615 for mb_x in 0..mb_w {
616 let q = mbinfo[mb_pos].q as usize;
617 let alpha = RV40_ALPHA_TAB[q];
618 let beta = RV40_BETA_TAB[q];
619 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
620 let beta_c = beta * 3;
621
47527732 622 let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
d14e5288
KS
623 let top_is_strong = row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16();
624 let left_is_strong = mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16();
625 let bot_is_strong = !is_last_row && mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16();
626
627 let cur_dbk = mbinfo[mb_pos].deblock;
628 let cur_cbp = if is_strong { 0xFFFFFF } else { mbinfo[mb_pos].cbp };
629
630 let (top_cbp, top_dbk) = if row > 0 {
631 (if top_is_strong { 0xFFFFFF } else { mbinfo[mb_pos - mb_w].cbp }, mbinfo[mb_pos - mb_w].deblock)
632 } else {
633 (0, 0)
634 };
635 let (bot_cbp, bot_dbk) = if !is_last_row {
636 (mbinfo[mb_pos + mb_w].cbp, mbinfo[mb_pos + mb_w].deblock)
637 } else {
638 (0, 0)
639 };
640
641 let y_cbp = cur_cbp & 0xFFFF;
642 let y_to_deblock = (cur_dbk as u32) | ((bot_dbk as u32) << 16);
643 let mut y_h_deblock = y_to_deblock | ((y_cbp << 4) & !Y_TOP_ROW_MASK) | ((top_cbp & Y_BOT_ROW_MASK) >> 12);
644 let mut y_v_deblock = y_to_deblock | ((y_cbp << 1) & !Y_LEFT_COL_MASK) | ((left_cbp & Y_RIGHT_COL_MASK) >> 3);
645
646 if mb_x == 0 {
647 y_v_deblock &= !Y_LEFT_COL_MASK;
648 }
649 if row == 0 {
650 y_h_deblock &= !Y_TOP_ROW_MASK;
651 }
652 if is_last_row || is_strong || bot_is_strong {
653 y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
654 }
47527732
KS
655
656 for y in 0..4 {
657 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
658 for x in 0..4 {
659 let bpos = x + y * 4;
d14e5288 660 let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
47527732
KS
661
662 let cur_strength: usize;
663 if is_strong {
664 cur_strength = 2;
665 } else if test_bit!(cur_dbk, bpos) {
666 cur_strength = 1;
667 } else {
668 cur_strength = 0;
669 }
670
671 let left_strength: usize;
672 if x > 0 {
673 if is_strong {
674 left_strength = 2;
675 } else if test_bit!(cur_dbk, bpos - 1) {
676 left_strength = 1;
677 } else {
678 left_strength = 0;
679 }
680 } else if mb_x > 0 {
681 if left_is_strong {
682 left_strength = 2;
d14e5288 683 } else if test_bit!(left_dbk, bpos + 3) {
47527732
KS
684 left_strength = 1;
685 } else {
686 left_strength = 0;
687 }
688 } else {
689 left_strength = 0;
690 }
691
692 let bot_strength: usize;
693 if y < 3 {
694 if is_strong {
695 bot_strength = 2;
696 } else if test_bit!(cur_dbk, bpos + 4) {
697 bot_strength = 1;
698 } else {
699 bot_strength = 0;
700 }
d14e5288
KS
701 } else if !is_last_row {
702 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
703 bot_strength = 2;
704 } else if test_bit!(bot_dbk, x) {
705 bot_strength = 1;
706 } else {
707 bot_strength = 0;
708 }
47527732
KS
709 } else {
710 bot_strength = 0;
711 }
712
713 let top_strength: usize;
714 if y > 0 {
715 if is_strong {
716 top_strength = 2;
717 } else if test_bit!(cur_dbk, bpos - 4) {
718 top_strength = 1;
719 } else {
720 top_strength = 0;
721 }
722 } else if row > 0 {
723 if top_is_strong {
724 top_strength = 2;
d14e5288 725 } else if test_bit!(top_dbk, bpos + 12) {
47527732
KS
726 top_strength = 1;
727 } else {
728 top_strength = 0;
729 }
730 } else {
731 top_strength = 0;
732 }
733
734 let l_q = if x > 0 { q } else { left_q };
735 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
736
737 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
738 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
739 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
740 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
741
742 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
743
d14e5288 744 if test_bit!(y_h_deblock, bpos + 4) {
47527732
KS
745 rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
746 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
747 }
d14e5288 748 if test_bit!(y_v_deblock, bpos) && !ver_strong {
47527732
KS
749 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
750 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
751 }
d14e5288 752 if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
47527732
KS
753 rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
754 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
755 }
d14e5288 756 if test_bit!(y_v_deblock, bpos) && ver_strong {
47527732
KS
757 rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
758 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
759 }
760 }
761 }
762
763 for comp in 1..3 {
d14e5288
KS
764 let cshift = 16 - 4 + comp * 4;
765 let c_cur_cbp = (cur_cbp >> cshift) & 0xF;
766 let c_top_cbp = (top_cbp >> cshift) & 0xF;
767 let c_left_cbp = (left_cbp >> cshift) & 0xF;
768 let c_bot_cbp = (bot_cbp >> cshift) & 0xF;
769
770 let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
771 let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
772 let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
773 if mb_x == 0 {
774 c_v_deblock &= !C_LEFT_COL_MASK;
775 }
776 if row == 0 {
777 c_h_deblock &= !C_TOP_ROW_MASK;
778 }
779 if is_last_row || is_strong || bot_is_strong {
780 c_h_deblock &= !(C_TOP_ROW_MASK << 4);
781 }
782
47527732
KS
783 for y in 0..2 {
784 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
785 for x in 0..2 {
d14e5288 786 let bpos = x + y * 2;
47527732 787
d14e5288 788 let ver_strong = (x == 0) && (is_strong || left_is_strong);
47527732
KS
789
790 let cur_strength: usize;
791 if is_strong {
792 cur_strength = 2;
d14e5288 793 } else if test_bit!(c_cur_cbp, bpos) {
47527732
KS
794 cur_strength = 1;
795 } else {
796 cur_strength = 0;
797 }
798
799 let left_strength: usize;
800 if x > 0 {
801 if is_strong {
802 left_strength = 2;
d14e5288 803 } else if test_bit!(c_cur_cbp, bpos - 1) {
47527732
KS
804 left_strength = 1;
805 } else {
806 left_strength = 0;
807 }
808 } else if mb_x > 0 {
809 if left_is_strong {
810 left_strength = 2;
d14e5288 811 } else if test_bit!(c_left_cbp, bpos + 1) {
47527732
KS
812 left_strength = 1;
813 } else {
814 left_strength = 0;
815 }
816 } else {
817 left_strength = 0;
818 }
819
820 let bot_strength: usize;
d14e5288 821 if y != 3 {
47527732
KS
822 if is_strong {
823 bot_strength = 2;
d14e5288
KS
824 } else if test_bit!(c_cur_cbp, bpos + 2) {
825 bot_strength = 1;
826 } else {
827 bot_strength = 0;
828 }
829 } else if !is_last_row {
830 if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
831 bot_strength = 2;
832 } else if test_bit!(c_bot_cbp, x) {
47527732
KS
833 bot_strength = 1;
834 } else {
835 bot_strength = 0;
836 }
837 } else {
838 bot_strength = 0;
839 }
840
841 let top_strength: usize;
842 if y > 0 {
843 if is_strong {
844 top_strength = 2;
d14e5288 845 } else if test_bit!(c_cur_cbp, bpos - 2) {
47527732
KS
846 top_strength = 1;
847 } else {
848 top_strength = 0;
849 }
850 } else if row > 0 {
851 if top_is_strong {
852 top_strength = 2;
d14e5288 853 } else if test_bit!(c_top_cbp, bpos + 2) {
47527732
KS
854 top_strength = 1;
855 } else {
856 top_strength = 0;
857 }
858 } else {
859 top_strength = 0;
860 }
861
862 let l_q = if x > 0 { q } else { left_q };
863 let top_q = if row > 0 { mbinfo[mb_pos - mb_w].q as usize } else { 0 };
864
865 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
866 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
867 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
868 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
869
d14e5288 870 if test_bit!(c_h_deblock, bpos + 2) {
47527732
KS
871 rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
872 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
873 }
d14e5288 874 if test_bit!(c_v_deblock, bpos) && !ver_strong {
47527732
KS
875 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
876 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
877 }
d14e5288 878 if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
47527732
KS
879 rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
880 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
881 }
d14e5288 882 if test_bit!(c_v_deblock, bpos) && ver_strong {
47527732
KS
883 rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
884 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
885 }
886 }
887 }
888 }
889
890 left_q = q;
d14e5288
KS
891 left_dbk = cur_dbk;
892 left_cbp = cur_cbp;
47527732
KS
893
894 mb_pos += 1;
895 }
896 }
897 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
898 let size: usize = if use16 { 16 } else { 8 };
899 let dstride = frame.get_stride(0);
900 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
1a967e6b 901 let data = frame.get_data_mut().unwrap();
47527732
KS
902 let dst: &mut [u8] = data.as_mut_slice();
903
904 let (w_, h_) = prev_frame.get_dimensions(0);
905 let w = (w_ + 15) & !15;
906 let h = (h_ + 15) & !15;
907
908 let dx = mv.x >> 2;
909 let cx = (mv.x & 3) as usize;
910 let dy = mv.y >> 2;
911 let cy = (mv.y & 3) as usize;
912 let mode = cx + cy * 4;
913
914 if check_pos(x, y, size, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) {
915 let sstride = prev_frame.get_stride(0);
916 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
917 let data = prev_frame.get_data();
918 let src: &[u8] = data.as_slice();
919 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
920 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
921 } else {
922 let mut ebuf: [u8; 32*22] = [0; 32*22];
86081fed 923 edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
47527732
KS
924 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
925 }
926 }
927 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
928 let size: usize = if use8 { 8 } else { 4 };
929 let dstride = frame.get_stride(comp);
930 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
1a967e6b 931 let data = frame.get_data_mut().unwrap();
47527732
KS
932 let dst: &mut [u8] = data.as_mut_slice();
933
934 let (w_, h_) = prev_frame.get_dimensions(comp);
935 let w = (w_ + 7) & !7;
936 let h = (h_ + 7) & !7;
937
938 let mvx = mv.x / 2;
939 let mvy = mv.y / 2;
940 let dx = mvx >> 2;
941 let mut cx = (mvx & 3) as usize;
942 let dy = mvy >> 2;
943 let mut cy = (mvy & 3) as usize;
944
945 if (cx == 3) && (cy == 3) {
946 cx = 2;
947 cy = 2;
948 }
949
950 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
951 let sstride = prev_frame.get_stride(comp);
952 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
953 let data = prev_frame.get_data();
954 let src: &[u8] = data.as_slice();
955 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
956 rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
957 } else {
958 let mut ebuf: [u8; 16*10] = [0; 16*10];
86081fed 959 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
47527732
KS
960 rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
961 }
962 }
963}