]> git.nihav.org Git - nihav.git/blob - nihav-realmedia/src/codecs/rv30dsp.rs
4ec44bd37dde010184d9aaa771617b0131f1b271
[nihav.git] / nihav-realmedia / src / codecs / rv30dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_codec_support::codecs::MV;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 #[allow(clippy::identity_op)]
13 fn rv3_filter_h(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) {
14 for _ in 0..bsize {
15 for x in 0..bsize {
16 dst[didx + x] = clip8((-((src[sidx + x - 1] as i16) + (src[sidx + x + 2] as i16)) + (src[sidx + x + 0] as i16) * c1 + (src[sidx + x + 1] as i16) * c2 + 8) >> 4);
17 }
18 sidx += sstride;
19 didx += dstride;
20 }
21 }
22
23 #[allow(clippy::erasing_op)]
24 #[allow(clippy::identity_op)]
25 fn rv3_filter_v(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) {
26 for _ in 0..bsize {
27 for x in 0..bsize {
28 dst[didx + x] = clip8((-((src[sidx + x - 1 * sstride] as i16) + (src[sidx + x + 2 * sstride] as i16)) + (src[sidx + x + 0 * sstride] as i16) * c1 + (src[sidx + x + 1 * sstride] as i16) * c2 + 8) >> 4);
29 }
30 sidx += sstride;
31 didx += dstride;
32 }
33 }
34
35 macro_rules! mc_matrix {
36 ($s: ident, $o: expr, $c1: expr) => (
37 ($c1 * 6) * ($s[$o] as i32) + ($c1 * 9) * ($s[$o + 1] as i32) + ($c1) * ($s[$o + 2] as i32)
38 );
39 ($s: ident, $o: expr, $c1: expr, $d1: expr, $d2: expr) => (
40 -($c1) * ($s[$o - 1] as i32) + ($c1 * $d1) * ($s[$o] as i32) + ($c1 * $d2) * ($s[$o + 1] as i32) + -($c1) * ($s[$o + 2] as i32)
41 );
42 ($s: ident, $o: expr, $ss: expr, $c1: expr, $c2: expr, $d1: expr, $d2: expr) => (
43 ((mc_matrix!($s, $o - $ss, -1, $d1, $d2) +
44 mc_matrix!($s, $o , $c1, $d1, $d2) +
45 mc_matrix!($s, $o + $ss, $c2, $d1, $d2) +
46 mc_matrix!($s, $o + 2 * $ss, -1, $d1, $d2) + 128) >> 8) as i16
47 );
48 (m22; $s: ident, $o: expr, $ss: expr) => (
49 ((mc_matrix!($s, $o + 0 * $ss, 6) +
50 mc_matrix!($s, $o + 1 * $ss, 9) +
51 mc_matrix!($s, $o + 2 * $ss, 1) + 128) >> 8) as i16
52 );
53 }
54
55 macro_rules! mc_func {
56 (copy; $name: ident, $size: expr) => (
57 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
58 for _ in 0..$size {
59 let d = &mut dst[didx..][..$size];
60 let s = &src[sidx..][..$size];
61 for x in 0..$size { d[x] = s[x]; }
62 didx += dstride;
63 sidx += sstride;
64 }
65 }
66 );
67 (hor; $name: ident, $c1: expr, $c2: expr, $size: expr) => (
68 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
69 rv3_filter_h(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2);
70 }
71 );
72 (ver; $name: ident, $c1: expr, $c2: expr, $size: expr) => (
73 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
74 rv3_filter_v(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2);
75 }
76 );
77 (m11; $name: ident, $size: expr) => (
78 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
79 for _ in 0..$size {
80 for x in 0..$size {
81 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 12, 6));
82 }
83 didx += dstride;
84 sidx += sstride;
85 }
86 }
87 );
88 (m12; $name: ident, $size: expr) => (
89 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
90 for _ in 0..$size {
91 for x in 0..$size {
92 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 6, 12, 12, 6));
93 }
94 didx += dstride;
95 sidx += sstride;
96 }
97 }
98 );
99 (m21; $name: ident, $size: expr) => (
100 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
101 for _ in 0..$size {
102 for x in 0..$size {
103 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 6, 12));
104 }
105 didx += dstride;
106 sidx += sstride;
107 }
108 }
109 );
110 (m22; $name: ident, $size: expr) => (
111 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
112 for _ in 0..$size {
113 for x in 0..$size {
114 dst[didx + x] = clip8(mc_matrix!(m22; src, sidx + x, sstride));
115 }
116 didx += dstride;
117 sidx += sstride;
118 }
119 }
120 );
121 }
122 mc_func!(copy; copy_16, 16);
123 mc_func!(copy; copy_8, 8);
124 mc_func!(hor; luma_mc_10_16, 12, 6, 16);
125 mc_func!(hor; luma_mc_20_16, 6, 12, 16);
126 mc_func!(hor; luma_mc_10_8, 12, 6, 8);
127 mc_func!(hor; luma_mc_20_8, 6, 12, 8);
128 mc_func!(ver; luma_mc_01_16, 12, 6, 16);
129 mc_func!(ver; luma_mc_02_16, 6, 12, 16);
130 mc_func!(ver; luma_mc_01_8, 12, 6, 8);
131 mc_func!(ver; luma_mc_02_8, 6, 12, 8);
132 mc_func!(m11; luma_mc_11_16, 16);
133 mc_func!(m11; luma_mc_11_8, 8);
134 mc_func!(m21; luma_mc_21_16, 16);
135 mc_func!(m21; luma_mc_21_8, 8);
136 mc_func!(m12; luma_mc_12_16, 16);
137 mc_func!(m12; luma_mc_12_8, 8);
138 mc_func!(m22; luma_mc_22_16, 16);
139 mc_func!(m22; luma_mc_22_8, 8);
140
141 const RV30_CHROMA_FRAC1: [u16; 3] = [ 8, 5, 3 ];
142 const RV30_CHROMA_FRAC2: [u16; 3] = [ 0, 3, 5 ];
143 fn rv30_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
144 if (x == 0) && (y == 0) {
145 for _ in 0..size {
146 dst[didx..][..size].copy_from_slice(&src[sidx..][..size]);
147 didx += dstride;
148 sidx += sstride;
149 }
150 return;
151 }
152 let a = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC1[y];
153 let b = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC1[y];
154 let c = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC2[y];
155 let d = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC2[y];
156 for _ in 0..size {
157 for x in 0..size {
158 dst[didx + x] = ((a * (src[sidx + x] as u16)
159 + b * (src[sidx + x + 1] as u16)
160 + c * (src[sidx + x + sstride] as u16)
161 + d * (src[sidx + x + 1 + sstride] as u16) + 32) >> 6) as u8;
162 }
163 didx += dstride;
164 sidx += sstride;
165 }
166 }
167
168 #[allow(clippy::type_complexity)]
169 pub struct RV30DSP {
170 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 9]; 2],
171 }
172
173 impl RV30DSP {
174 pub fn new() -> Self {
175 RV30DSP {
176 luma_mc: [
177 [ copy_16, luma_mc_10_16, luma_mc_20_16,
178 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16,
179 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16 ],
180 [ copy_8, luma_mc_10_8, luma_mc_20_8,
181 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8,
182 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8 ] ],
183 }
184 }
185 }
186
187 macro_rules! el {
188 ($src: ident, $o: expr) => ($src[$o] as i16);
189 }
190
191 fn clip_symm(a: i16, lim: i16) -> i16 {
192 if a < -lim {
193 -lim
194 } else if a > lim {
195 lim
196 } else {
197 a
198 }
199 }
200
201 const RV30_LOOP_FILTER_STRENGTH: [i16; 32] = [
202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
203 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5
204 ];
205
206 macro_rules! test_bit {
207 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
208 }
209
210 fn rv30_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize, lim: i16) {
211 for _ in 0..4 {
212 let a = el!(pix, off - 2*step);
213 let b = el!(pix, off - step);
214 let c = el!(pix, off);
215 let d = el!(pix, off + step);
216 let diff0 = ((a - d) - (b - c) * 4) >> 3;
217 let diff = clip_symm(diff0, lim);
218 pix[off - step] = clip8(b + diff);
219 pix[off ] = clip8(c - diff);
220 off += stride;
221 }
222 }
223
224 fn rv30_div_mv(mv: i16) -> (i16, usize) {
225 let i = mv / 3;
226 let f = mv - i * 3;
227 if f < 0 {
228 (i - 1, (f + 3) as usize)
229 } else {
230 (i, f as usize)
231 }
232 }
233
234 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
235 let xn = (x as isize) + (dx as isize);
236 let yn = (y as isize) + (dy as isize);
237
238 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
239 }
240
241 const RV30_EDGE1: [isize; 3] = [ 0, 1, 1 ];
242 const RV30_EDGE2: [isize; 3] = [ 0, 2, 2 ];
243
244 impl RV34DSP for RV30DSP {
245 #[allow(clippy::cognitive_complexity)]
246 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, _mb_h: usize, row: usize) {
247 let mut offs: [usize; 3] = [0; 3];
248 let mut stride: [usize; 3] = [0; 3];
249
250 for comp in 0..3 {
251 stride[comp] = frame.get_stride(comp);
252 let start = if comp == 0 { row * 16 } else { row * 8 };
253 offs[comp] = frame.get_offset(comp) + start * stride[comp];
254 }
255
256 let data = frame.get_data_mut().unwrap();
257 let dst: &mut [u8] = data.as_mut_slice();
258
259 // vertical filter
260 let mut left_cbp = 0;
261 let mut left_lim = 0;
262 let mut left_dbk = 0;
263 let mut mb_pos: usize = row * mb_w;
264 for mb_x in 0..mb_w {
265 let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize];
266 let cur_dbk = mbinfo[mb_pos].deblock;
267 let cur_cbp = mbinfo[mb_pos].cbp_c;
268 let xstart = if mb_x == 0 { 1 } else { 0 };
269 for y in 0..4 {
270 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
271 for x in xstart..4 {
272 let cs = x + y*4;
273 let loc_lim;
274
275 if test_bit!(cur_dbk, cs) {
276 loc_lim = cur_lim;
277 } else if (x == 0) && test_bit!(left_dbk, cs + 3) {
278 loc_lim = left_lim;
279 } else if (x != 0) && test_bit!(cur_dbk, cs - 1) {
280 loc_lim = cur_lim;
281 } else {
282 loc_lim = 0;
283 }
284 if loc_lim != 0 {
285 rv30_loop_filter4(dst, yoff + x * 4, 1, stride[0], loc_lim);
286 }
287 }
288 }
289
290 for comp in 1..3 {
291 for y in 0..2 {
292 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
293 for x in xstart..2 {
294 let cs = x + y * 2 + (comp - 1) * 4;
295 let loc_lim;
296
297 if test_bit!(cur_cbp, cs) {
298 loc_lim = cur_lim;
299 } else if (x == 0) && test_bit!(left_cbp, cs + 1) {
300 loc_lim = left_lim;
301 } else if (x != 0) && test_bit!(cur_cbp, cs - 1) {
302 loc_lim = cur_lim;
303 } else {
304 loc_lim = 0;
305 }
306 if loc_lim != 0 {
307 rv30_loop_filter4(dst, coff + x * 4, 1, stride[comp], loc_lim);
308 }
309 }
310 }
311 }
312
313 left_lim = cur_lim;
314 left_dbk = cur_dbk;
315 left_cbp = cur_cbp;
316 mb_pos += 1;
317 }
318
319 // horizontal filter
320 let mut mb_pos: usize = row * mb_w;
321 for mb_x in 0..mb_w {
322 let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize];
323 let cur_dbk = mbinfo[mb_pos].deblock;
324 let cur_cbp = mbinfo[mb_pos].cbp_c;
325 let ystart = if row == 0 { 1 } else { 0 };
326 let top_lim;
327 let top_dbk;
328 let top_cbp;
329 if row > 0 {
330 top_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos - mb_w].q as usize];
331 top_dbk = mbinfo[mb_pos - mb_w].deblock;
332 top_cbp = mbinfo[mb_pos - mb_w].cbp_c;
333 } else {
334 top_lim = 0;
335 top_dbk = 0;
336 top_cbp = 0;
337 }
338 for y in ystart..4 {
339 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
340 for x in 0..4 {
341 let cs = x + y*4;
342 let loc_lim;
343
344 if test_bit!(cur_dbk, cs) {
345 loc_lim = cur_lim;
346 } else if (y == 0) && test_bit!(top_dbk, cs + 12) {
347 loc_lim = top_lim;
348 } else if (y != 0) && test_bit!(cur_dbk, cs - 4) {
349 loc_lim = cur_lim;
350 } else {
351 loc_lim = 0;
352 }
353 if loc_lim != 0 {
354 rv30_loop_filter4(dst, yoff + x * 4, stride[0], 1, loc_lim);
355 }
356 }
357 }
358
359 for comp in 1..3 {
360 for y in ystart..2 {
361 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
362 for x in 0..2 {
363 let cs = x + y * 2 + (comp - 1) * 4;
364 let loc_lim;
365
366 if test_bit!(cur_cbp, cs) {
367 loc_lim = cur_lim;
368 } else if (y == 0) && test_bit!(top_cbp, cs + 2) {
369 loc_lim = top_lim;
370 } else if (y != 0) && test_bit!(cur_cbp, cs - 2) {
371 loc_lim = cur_lim;
372 } else {
373 loc_lim = 0;
374 }
375 if loc_lim != 0 {
376 rv30_loop_filter4(dst, coff + x * 4, stride[comp], 1, loc_lim);
377 }
378 }
379 }
380 }
381
382 mb_pos += 1;
383 }
384 }
385 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
386 let size: usize = if use16 { 16 } else { 8 };
387 let dstride = frame.get_stride(0);
388 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
389 let data = frame.get_data_mut().unwrap();
390 let dst: &mut [u8] = data.as_mut_slice();
391
392 let (w_, h_) = prev_frame.get_dimensions(0);
393 let w = (w_ + 15) & !15;
394 let h = (h_ + 15) & !15;
395
396 let (dx, cx) = rv30_div_mv(mv.x);
397 let (dy, cy) = rv30_div_mv(mv.y);
398 let mode = cx + cy * 3;
399
400 if check_pos(x, y, size, w, h, dx, dy, RV30_EDGE1[cx], RV30_EDGE2[cx], RV30_EDGE1[cy], RV30_EDGE2[cy]) {
401 let sstride = prev_frame.get_stride(0);
402 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
403 let data = prev_frame.get_data();
404 let src: &[u8] = data.as_slice();
405 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
406 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
407 } else {
408 let mut ebuf: [u8; 32*20] = [0; 32*20];
409 edge_emu(prev_frame, (x as isize) + (dx as isize) - 1, (y as isize) + (dy as isize) - 1, 16+3, 16+3, &mut ebuf, 32, 0, 4);
410 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32 + 1, 32);
411 }
412 }
413 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
414 let size: usize = if use8 { 8 } else { 4 };
415 let dstride = frame.get_stride(comp);
416 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
417 let data = frame.get_data_mut().unwrap();
418 let dst: &mut [u8] = data.as_mut_slice();
419
420 let (w_, h_) = prev_frame.get_dimensions(comp);
421 let w = (w_ + 7) & !7;
422 let h = (h_ + 7) & !7;
423
424 let (dx, cx) = rv30_div_mv(mv.x / 2);
425 let (dy, cy) = rv30_div_mv(mv.y / 2);
426
427 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
428 let sstride = prev_frame.get_stride(comp);
429 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
430 let data = prev_frame.get_data();
431 let src: &[u8] = data.as_slice();
432 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
433 rv30_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
434 } else {
435 let mut ebuf: [u8; 16*10] = [0; 16*10];
436 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
437 rv30_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
438 }
439 }
440 }