6065f7ee7d19d04798059ab64fe4ff38b2380d32
[nihav.git] / nihav-realmedia / src / codecs / rv30dsp.rs
1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_codec_support::codecs::MV;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
5
6 fn clip8(a: i16) -> u8 {
7 if a < 0 { 0 }
8 else if a > 255 { 255 }
9 else { a as u8 }
10 }
11
12 fn rv3_filter_h(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) {
13 for _ in 0..bsize {
14 for x in 0..bsize {
15 dst[didx + x] = clip8((-((src[sidx + x - 1] as i16) + (src[sidx + x + 2] as i16)) + (src[sidx + x + 0] as i16) * c1 + (src[sidx + x + 1] as i16) * c2 + 8) >> 4);
16 }
17 sidx += sstride;
18 didx += dstride;
19 }
20 }
21
22 fn rv3_filter_v(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) {
23 for _ in 0..bsize {
24 for x in 0..bsize {
25 dst[didx + x] = clip8((-((src[sidx + x - 1 * sstride] as i16) + (src[sidx + x + 2 * sstride] as i16)) + (src[sidx + x + 0 * sstride] as i16) * c1 + (src[sidx + x + 1 * sstride] as i16) * c2 + 8) >> 4);
26 }
27 sidx += sstride;
28 didx += dstride;
29 }
30 }
31
32 macro_rules! mc_matrix {
33 ($s: ident, $o: expr, $c1: expr) => (
34 ($c1 * 6) * ($s[$o] as i32) + ($c1 * 9) * ($s[$o + 1] as i32) + ($c1) * ($s[$o + 2] as i32)
35 );
36 ($s: ident, $o: expr, $c1: expr, $d1: expr, $d2: expr) => (
37 -($c1) * ($s[$o - 1] as i32) + ($c1 * $d1) * ($s[$o] as i32) + ($c1 * $d2) * ($s[$o + 1] as i32) + -($c1) * ($s[$o + 2] as i32)
38 );
39 ($s: ident, $o: expr, $ss: expr, $c1: expr, $c2: expr, $d1: expr, $d2: expr) => (
40 ((mc_matrix!($s, $o - $ss, -1, $d1, $d2) +
41 mc_matrix!($s, $o , $c1, $d1, $d2) +
42 mc_matrix!($s, $o + $ss, $c2, $d1, $d2) +
43 mc_matrix!($s, $o + 2 * $ss, -1, $d1, $d2) + 128) >> 8) as i16
44 );
45 (m22; $s: ident, $o: expr, $ss: expr) => (
46 ((mc_matrix!($s, $o + 0 * $ss, 6) +
47 mc_matrix!($s, $o + 1 * $ss, 9) +
48 mc_matrix!($s, $o + 2 * $ss, 1) + 128) >> 8) as i16
49 );
50 }
51
52 macro_rules! mc_func {
53 (copy; $name: ident, $size: expr) => (
54 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
55 for _ in 0..$size {
56 let d = &mut dst[didx..][..$size];
57 let s = &src[sidx..][..$size];
58 for x in 0..$size { d[x] = s[x]; }
59 didx += dstride;
60 sidx += sstride;
61 }
62 }
63 );
64 (hor; $name: ident, $c1: expr, $c2: expr, $size: expr) => (
65 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
66 rv3_filter_h(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2);
67 }
68 );
69 (ver; $name: ident, $c1: expr, $c2: expr, $size: expr) => (
70 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
71 rv3_filter_v(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2);
72 }
73 );
74 (m11; $name: ident, $size: expr) => (
75 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
76 for _ in 0..$size {
77 for x in 0..$size {
78 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 12, 6));
79 }
80 didx += dstride;
81 sidx += sstride;
82 }
83 }
84 );
85 (m12; $name: ident, $size: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
87 for _ in 0..$size {
88 for x in 0..$size {
89 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 6, 12, 12, 6));
90 }
91 didx += dstride;
92 sidx += sstride;
93 }
94 }
95 );
96 (m21; $name: ident, $size: expr) => (
97 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
98 for _ in 0..$size {
99 for x in 0..$size {
100 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 6, 12));
101 }
102 didx += dstride;
103 sidx += sstride;
104 }
105 }
106 );
107 (m22; $name: ident, $size: expr) => (
108 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
109 for _ in 0..$size {
110 for x in 0..$size {
111 dst[didx + x] = clip8(mc_matrix!(m22; src, sidx + x, sstride));
112 }
113 didx += dstride;
114 sidx += sstride;
115 }
116 }
117 );
118 }
119 mc_func!(copy; copy_16, 16);
120 mc_func!(copy; copy_8, 8);
121 mc_func!(hor; luma_mc_10_16, 12, 6, 16);
122 mc_func!(hor; luma_mc_20_16, 6, 12, 16);
123 mc_func!(hor; luma_mc_10_8, 12, 6, 8);
124 mc_func!(hor; luma_mc_20_8, 6, 12, 8);
125 mc_func!(ver; luma_mc_01_16, 12, 6, 16);
126 mc_func!(ver; luma_mc_02_16, 6, 12, 16);
127 mc_func!(ver; luma_mc_01_8, 12, 6, 8);
128 mc_func!(ver; luma_mc_02_8, 6, 12, 8);
129 mc_func!(m11; luma_mc_11_16, 16);
130 mc_func!(m11; luma_mc_11_8, 8);
131 mc_func!(m21; luma_mc_21_16, 16);
132 mc_func!(m21; luma_mc_21_8, 8);
133 mc_func!(m12; luma_mc_12_16, 16);
134 mc_func!(m12; luma_mc_12_8, 8);
135 mc_func!(m22; luma_mc_22_16, 16);
136 mc_func!(m22; luma_mc_22_8, 8);
137
138 const RV30_CHROMA_FRAC1: [u16; 3] = [ 8, 5, 3 ];
139 const RV30_CHROMA_FRAC2: [u16; 3] = [ 0, 3, 5 ];
140 fn rv30_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
141 if (x == 0) && (y == 0) {
142 for _ in 0..size {
143 for x in 0..size { dst[didx + x] = src[sidx + x]; }
144 didx += dstride;
145 sidx += sstride;
146 }
147 return;
148 }
149 let a = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC1[y];
150 let b = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC1[y];
151 let c = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC2[y];
152 let d = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC2[y];
153 for _ in 0..size {
154 for x in 0..size {
155 dst[didx + x] = ((a * (src[sidx + x] as u16)
156 + b * (src[sidx + x + 1] as u16)
157 + c * (src[sidx + x + sstride] as u16)
158 + d * (src[sidx + x + 1 + sstride] as u16) + 32) >> 6) as u8;
159 }
160 didx += dstride;
161 sidx += sstride;
162 }
163 }
164
165 pub struct RV30DSP {
166 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 9]; 2],
167 }
168
169 impl RV30DSP {
170 pub fn new() -> Self {
171 RV30DSP {
172 luma_mc: [
173 [ copy_16, luma_mc_10_16, luma_mc_20_16,
174 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16,
175 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16 ],
176 [ copy_8, luma_mc_10_8, luma_mc_20_8,
177 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8,
178 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8 ] ],
179 }
180 }
181 }
182
183 macro_rules! el {
184 ($src: ident, $o: expr) => ($src[$o] as i16);
185 }
186
187 fn clip_symm(a: i16, lim: i16) -> i16 {
188 if a < -lim {
189 -lim
190 } else if a > lim {
191 lim
192 } else {
193 a
194 }
195 }
196
197 const RV30_LOOP_FILTER_STRENGTH: [i16; 32] = [
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
199 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5
200 ];
201
202 macro_rules! test_bit {
203 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
204 }
205
206 fn rv30_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize, lim: i16) {
207 for _ in 0..4 {
208 let a = el!(pix, off - 2*step);
209 let b = el!(pix, off - step);
210 let c = el!(pix, off);
211 let d = el!(pix, off + step);
212 let diff0 = ((a - d) - (b - c) * 4) >> 3;
213 let diff = clip_symm(diff0, lim);
214 pix[off - step] = clip8(b + diff);
215 pix[off ] = clip8(c - diff);
216 off += stride;
217 }
218 }
219
220 fn rv30_div_mv(mv: i16) -> (i16, usize) {
221 let i = mv / 3;
222 let f = mv - i * 3;
223 if f < 0 {
224 (i - 1, (f + 3) as usize)
225 } else {
226 (i, f as usize)
227 }
228 }
229
230 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
231 let xn = (x as isize) + (dx as isize);
232 let yn = (y as isize) + (dy as isize);
233
234 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
235 }
236
237 const RV30_EDGE1: [isize; 3] = [ 0, 1, 1 ];
238 const RV30_EDGE2: [isize; 3] = [ 0, 2, 2 ];
239
240 impl RV34DSP for RV30DSP {
241 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, _mb_h: usize, row: usize) {
242 let mut offs: [usize; 3] = [0; 3];
243 let mut stride: [usize; 3] = [0; 3];
244
245 for comp in 0..3 {
246 stride[comp] = frame.get_stride(comp);
247 let start = if comp == 0 { row * 16 } else { row * 8 };
248 offs[comp] = frame.get_offset(comp) + start * stride[comp];
249 }
250
251 let data = frame.get_data_mut().unwrap();
252 let dst: &mut [u8] = data.as_mut_slice();
253
254 // vertical filter
255 let mut left_cbp = 0;
256 let mut left_lim = 0;
257 let mut left_dbk = 0;
258 let mut mb_pos: usize = row * mb_w;
259 for mb_x in 0..mb_w {
260 let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize];
261 let cur_dbk = mbinfo[mb_pos].deblock;
262 let cur_cbp = mbinfo[mb_pos].cbp_c;
263 let xstart = if mb_x == 0 { 1 } else { 0 };
264 for y in 0..4 {
265 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
266 for x in xstart..4 {
267 let cs = x + y*4;
268 let loc_lim;
269
270 if test_bit!(cur_dbk, cs) {
271 loc_lim = cur_lim;
272 } else if (x == 0) && test_bit!(left_dbk, cs + 3) {
273 loc_lim = left_lim;
274 } else if (x != 0) && test_bit!(cur_dbk, cs - 1) {
275 loc_lim = cur_lim;
276 } else {
277 loc_lim = 0;
278 }
279 if loc_lim != 0 {
280 rv30_loop_filter4(dst, yoff + x * 4, 1, stride[0], loc_lim);
281 }
282 }
283 }
284
285 for comp in 1..3 {
286 for y in 0..2 {
287 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
288 for x in xstart..2 {
289 let cs = x + y * 2 + (comp - 1) * 4;
290 let loc_lim;
291
292 if test_bit!(cur_cbp, cs) {
293 loc_lim = cur_lim;
294 } else if (x == 0) && test_bit!(left_cbp, cs + 1) {
295 loc_lim = left_lim;
296 } else if (x != 0) && test_bit!(cur_cbp, cs - 1) {
297 loc_lim = cur_lim;
298 } else {
299 loc_lim = 0;
300 }
301 if loc_lim != 0 {
302 rv30_loop_filter4(dst, coff + x * 4, 1, stride[comp], loc_lim);
303 }
304 }
305 }
306 }
307
308 left_lim = cur_lim;
309 left_dbk = cur_dbk;
310 left_cbp = cur_cbp;
311 mb_pos += 1;
312 }
313
314 // horizontal filter
315 let mut mb_pos: usize = row * mb_w;
316 for mb_x in 0..mb_w {
317 let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize];
318 let cur_dbk = mbinfo[mb_pos].deblock;
319 let cur_cbp = mbinfo[mb_pos].cbp_c;
320 let ystart = if row == 0 { 1 } else { 0 };
321 let top_lim;
322 let top_dbk;
323 let top_cbp;
324 if row > 0 {
325 top_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos - mb_w].q as usize];
326 top_dbk = mbinfo[mb_pos - mb_w].deblock;
327 top_cbp = mbinfo[mb_pos - mb_w].cbp_c;
328 } else {
329 top_lim = 0;
330 top_dbk = 0;
331 top_cbp = 0;
332 }
333 for y in ystart..4 {
334 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
335 for x in 0..4 {
336 let cs = x + y*4;
337 let loc_lim;
338
339 if test_bit!(cur_dbk, cs) {
340 loc_lim = cur_lim;
341 } else if (y == 0) && test_bit!(top_dbk, cs + 12) {
342 loc_lim = top_lim;
343 } else if (y != 0) && test_bit!(cur_dbk, cs - 4) {
344 loc_lim = cur_lim;
345 } else {
346 loc_lim = 0;
347 }
348 if loc_lim != 0 {
349 rv30_loop_filter4(dst, yoff + x * 4, stride[0], 1, loc_lim);
350 }
351 }
352 }
353
354 for comp in 1..3 {
355 for y in ystart..2 {
356 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
357 for x in 0..2 {
358 let cs = x + y * 2 + (comp - 1) * 4;
359 let loc_lim;
360
361 if test_bit!(cur_cbp, cs) {
362 loc_lim = cur_lim;
363 } else if (y == 0) && test_bit!(top_cbp, cs + 2) {
364 loc_lim = top_lim;
365 } else if (y != 0) && test_bit!(cur_cbp, cs - 2) {
366 loc_lim = cur_lim;
367 } else {
368 loc_lim = 0;
369 }
370 if loc_lim != 0 {
371 rv30_loop_filter4(dst, coff + x * 4, stride[comp], 1, loc_lim);
372 }
373 }
374 }
375 }
376
377 mb_pos += 1;
378 }
379 }
380 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
381 let size: usize = if use16 { 16 } else { 8 };
382 let dstride = frame.get_stride(0);
383 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
384 let data = frame.get_data_mut().unwrap();
385 let dst: &mut [u8] = data.as_mut_slice();
386
387 let (w_, h_) = prev_frame.get_dimensions(0);
388 let w = (w_ + 15) & !15;
389 let h = (h_ + 15) & !15;
390
391 let (dx, cx) = rv30_div_mv(mv.x);
392 let (dy, cy) = rv30_div_mv(mv.y);
393 let mode = cx + cy * 3;
394
395 if check_pos(x, y, size, w, h, dx, dy, RV30_EDGE1[cx], RV30_EDGE2[cx], RV30_EDGE1[cy], RV30_EDGE2[cy]) {
396 let sstride = prev_frame.get_stride(0);
397 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
398 let data = prev_frame.get_data();
399 let src: &[u8] = data.as_slice();
400 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
401 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
402 } else {
403 let mut ebuf: [u8; 32*20] = [0; 32*20];
404 edge_emu(prev_frame, (x as isize) + (dx as isize) - 1, (y as isize) + (dy as isize) - 1, 16+3, 16+3, &mut ebuf, 32, 0, 4);
405 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32 + 1, 32);
406 }
407 }
408 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
409 let size: usize = if use8 { 8 } else { 4 };
410 let dstride = frame.get_stride(comp);
411 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
412 let data = frame.get_data_mut().unwrap();
413 let dst: &mut [u8] = data.as_mut_slice();
414
415 let (w_, h_) = prev_frame.get_dimensions(comp);
416 let w = (w_ + 7) & !7;
417 let h = (h_ + 7) & !7;
418
419 let (dx, cx) = rv30_div_mv(mv.x / 2);
420 let (dy, cy) = rv30_div_mv(mv.y / 2);
421
422 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
423 let sstride = prev_frame.get_stride(comp);
424 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
425 let data = prev_frame.get_data();
426 let src: &[u8] = data.as_slice();
427 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
428 rv30_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
429 } else {
430 let mut ebuf: [u8; 16*10] = [0; 16*10];
431 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
432 rv30_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
433 }
434 }
435 }