1 use nihav_core::frame::{FrameType, NAVideoBuffer};
2 use nihav_core::codecs::MV;
3 use nihav_core::codecs::blockdsp::edge_emu;
4 use super::rv3040::{RV34DSP, RV34MBInfo};
6 fn clip8(a: i16) -> u8 {
8 else if a > 255 { 255 }
12 fn rv3_filter_h(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) {
15 dst[didx + x] = clip8((-((src[sidx + x - 1] as i16) + (src[sidx + x + 2] as i16)) + (src[sidx + x + 0] as i16) * c1 + (src[sidx + x + 1] as i16) * c2 + 8) >> 4);
22 fn rv3_filter_v(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) {
25 dst[didx + x] = clip8((-((src[sidx + x - 1 * sstride] as i16) + (src[sidx + x + 2 * sstride] as i16)) + (src[sidx + x + 0 * sstride] as i16) * c1 + (src[sidx + x + 1 * sstride] as i16) * c2 + 8) >> 4);
32 macro_rules! mc_matrix {
33 ($s: ident, $o: expr, $c1: expr) => (
34 ($c1 * 6) * ($s[$o] as i32) + ($c1 * 9) * ($s[$o + 1] as i32) + ($c1) * ($s[$o + 2] as i32)
36 ($s: ident, $o: expr, $c1: expr, $d1: expr, $d2: expr) => (
37 -($c1) * ($s[$o - 1] as i32) + ($c1 * $d1) * ($s[$o] as i32) + ($c1 * $d2) * ($s[$o + 1] as i32) + -($c1) * ($s[$o + 2] as i32)
39 ($s: ident, $o: expr, $ss: expr, $c1: expr, $c2: expr, $d1: expr, $d2: expr) => (
40 ((mc_matrix!($s, $o - $ss, -1, $d1, $d2) +
41 mc_matrix!($s, $o , $c1, $d1, $d2) +
42 mc_matrix!($s, $o + $ss, $c2, $d1, $d2) +
43 mc_matrix!($s, $o + 2 * $ss, -1, $d1, $d2) + 128) >> 8) as i16
45 (m22; $s: ident, $o: expr, $ss: expr) => (
46 ((mc_matrix!($s, $o + 0 * $ss, 6) +
47 mc_matrix!($s, $o + 1 * $ss, 9) +
48 mc_matrix!($s, $o + 2 * $ss, 1) + 128) >> 8) as i16
52 macro_rules! mc_func {
53 (copy; $name: ident, $size: expr) => (
54 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
56 let d = &mut dst[didx..][..$size];
57 let s = &src[sidx..][..$size];
58 for x in 0..$size { d[x] = s[x]; }
64 (hor; $name: ident, $c1: expr, $c2: expr, $size: expr) => (
65 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
66 rv3_filter_h(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2);
69 (ver; $name: ident, $c1: expr, $c2: expr, $size: expr) => (
70 fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) {
71 rv3_filter_v(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2);
74 (m11; $name: ident, $size: expr) => (
75 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
78 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 12, 6));
85 (m12; $name: ident, $size: expr) => (
86 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
89 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 6, 12, 12, 6));
96 (m21; $name: ident, $size: expr) => (
97 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
100 dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 6, 12));
107 (m22; $name: ident, $size: expr) => (
108 fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
111 dst[didx + x] = clip8(mc_matrix!(m22; src, sidx + x, sstride));
119 mc_func!(copy; copy_16, 16);
120 mc_func!(copy; copy_8, 8);
121 mc_func!(hor; luma_mc_10_16, 12, 6, 16);
122 mc_func!(hor; luma_mc_20_16, 6, 12, 16);
123 mc_func!(hor; luma_mc_10_8, 12, 6, 8);
124 mc_func!(hor; luma_mc_20_8, 6, 12, 8);
125 mc_func!(ver; luma_mc_01_16, 12, 6, 16);
126 mc_func!(ver; luma_mc_02_16, 6, 12, 16);
127 mc_func!(ver; luma_mc_01_8, 12, 6, 8);
128 mc_func!(ver; luma_mc_02_8, 6, 12, 8);
129 mc_func!(m11; luma_mc_11_16, 16);
130 mc_func!(m11; luma_mc_11_8, 8);
131 mc_func!(m21; luma_mc_21_16, 16);
132 mc_func!(m21; luma_mc_21_8, 8);
133 mc_func!(m12; luma_mc_12_16, 16);
134 mc_func!(m12; luma_mc_12_8, 8);
135 mc_func!(m22; luma_mc_22_16, 16);
136 mc_func!(m22; luma_mc_22_8, 8);
138 const RV30_CHROMA_FRAC1: [u16; 3] = [ 8, 5, 3 ];
139 const RV30_CHROMA_FRAC2: [u16; 3] = [ 0, 3, 5 ];
140 fn rv30_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) {
141 if (x == 0) && (y == 0) {
143 for x in 0..size { dst[didx + x] = src[sidx + x]; }
149 let a = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC1[y];
150 let b = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC1[y];
151 let c = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC2[y];
152 let d = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC2[y];
155 dst[didx + x] = ((a * (src[sidx + x] as u16)
156 + b * (src[sidx + x + 1] as u16)
157 + c * (src[sidx + x + sstride] as u16)
158 + d * (src[sidx + x + 1 + sstride] as u16) + 32) >> 6) as u8;
166 luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 9]; 2],
170 pub fn new() -> Self {
173 [ copy_16, luma_mc_10_16, luma_mc_20_16,
174 luma_mc_01_16, luma_mc_11_16, luma_mc_21_16,
175 luma_mc_02_16, luma_mc_12_16, luma_mc_22_16 ],
176 [ copy_8, luma_mc_10_8, luma_mc_20_8,
177 luma_mc_01_8, luma_mc_11_8, luma_mc_21_8,
178 luma_mc_02_8, luma_mc_12_8, luma_mc_22_8 ] ],
184 ($src: ident, $o: expr) => ($src[$o] as i16);
187 fn clip_symm(a: i16, lim: i16) -> i16 {
197 const RV30_LOOP_FILTER_STRENGTH: [i16; 32] = [
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
199 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5
202 macro_rules! test_bit {
203 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
206 fn rv30_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize, lim: i16) {
208 let a = el!(pix, off - 2*step);
209 let b = el!(pix, off - step);
210 let c = el!(pix, off);
211 let d = el!(pix, off + step);
212 let diff0 = ((a - d) - (b - c) * 4) >> 3;
213 let diff = clip_symm(diff0, lim);
214 pix[off - step] = clip8(b + diff);
215 pix[off ] = clip8(c - diff);
220 fn rv30_div_mv(mv: i16) -> (i16, usize) {
224 (i - 1, (f + 3) as usize)
230 fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool {
231 let xn = (x as isize) + (dx as isize);
232 let yn = (y as isize) + (dy as isize);
234 (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize))
237 const RV30_EDGE1: [isize; 3] = [ 0, 1, 1 ];
238 const RV30_EDGE2: [isize; 3] = [ 0, 2, 2 ];
240 impl RV34DSP for RV30DSP {
241 fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, row: usize) {
242 let mut offs: [usize; 3] = [0; 3];
243 let mut stride: [usize; 3] = [0; 3];
246 stride[comp] = frame.get_stride(comp);
247 let start = if comp == 0 { row * 16 } else { row * 8 };
248 offs[comp] = frame.get_offset(comp) + start * stride[comp];
251 let data = frame.get_data_mut().unwrap();
252 let dst: &mut [u8] = data.as_mut_slice();
255 let mut left_cbp = 0;
256 let mut left_lim = 0;
257 let mut left_dbk = 0;
258 let mut mb_pos: usize = row * mb_w;
259 for mb_x in 0..mb_w {
260 let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize];
261 let cur_dbk = mbinfo[mb_pos].deblock;
262 let cur_cbp = mbinfo[mb_pos].cbp_c;
263 let xstart = if mb_x == 0 { 1 } else { 0 };
265 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
270 if test_bit!(cur_dbk, cs) {
272 } else if (x == 0) && test_bit!(left_dbk, cs + 3) {
274 } else if (x != 0) && test_bit!(cur_dbk, cs - 1) {
280 rv30_loop_filter4(dst, yoff + x * 4, 1, stride[0], loc_lim);
287 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
289 let cs = x + y * 2 + (comp - 1) * 4;
292 if test_bit!(cur_cbp, cs) {
294 } else if (x == 0) && test_bit!(left_cbp, cs + 1) {
296 } else if (x != 0) && test_bit!(cur_cbp, cs - 1) {
302 rv30_loop_filter4(dst, coff + x * 4, 1, stride[comp], loc_lim);
315 let mut mb_pos: usize = row * mb_w;
316 for mb_x in 0..mb_w {
317 let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize];
318 let cur_dbk = mbinfo[mb_pos].deblock;
319 let cur_cbp = mbinfo[mb_pos].cbp_c;
320 let ystart = if row == 0 { 1 } else { 0 };
325 top_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos - mb_w].q as usize];
326 top_dbk = mbinfo[mb_pos - mb_w].deblock;
327 top_cbp = mbinfo[mb_pos - mb_w].cbp_c;
334 let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
339 if test_bit!(cur_dbk, cs) {
341 } else if (y == 0) && test_bit!(top_dbk, cs + 12) {
343 } else if (y != 0) && test_bit!(cur_dbk, cs - 4) {
349 rv30_loop_filter4(dst, yoff + x * 4, stride[0], 1, loc_lim);
356 let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
358 let cs = x + y * 2 + (comp - 1) * 4;
361 if test_bit!(cur_cbp, cs) {
363 } else if (y == 0) && test_bit!(top_cbp, cs + 2) {
365 } else if (y != 0) && test_bit!(cur_cbp, cs - 2) {
371 rv30_loop_filter4(dst, coff + x * 4, stride[comp], 1, loc_lim);
380 fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) {
381 let size: usize = if use16 { 16 } else { 8 };
382 let dstride = frame.get_stride(0);
383 let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 });
384 let data = frame.get_data_mut().unwrap();
385 let dst: &mut [u8] = data.as_mut_slice();
387 let (w_, h_) = prev_frame.get_dimensions(0);
388 let w = (w_ + 15) & !15;
389 let h = (h_ + 15) & !15;
391 let (dx, cx) = rv30_div_mv(mv.x);
392 let (dy, cy) = rv30_div_mv(mv.y);
393 let mode = cx + cy * 3;
395 if check_pos(x, y, size, w, h, dx, dy, RV30_EDGE1[cx], RV30_EDGE2[cx], RV30_EDGE1[cy], RV30_EDGE2[cy]) {
396 let sstride = prev_frame.get_stride(0);
397 let mut soffset = prev_frame.get_offset(0) + x + y * sstride;
398 let data = prev_frame.get_data();
399 let src: &[u8] = data.as_slice();
400 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
401 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
403 let mut ebuf: [u8; 32*20] = [0; 32*20];
404 edge_emu(prev_frame, (x as isize) + (dx as isize) - 1, (y as isize) + (dy as isize) - 1, 16+3, 16+3, &mut ebuf, 32, 0);
405 self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32 + 1, 32);
408 fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) {
409 let size: usize = if use8 { 8 } else { 4 };
410 let dstride = frame.get_stride(comp);
411 let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 });
412 let data = frame.get_data_mut().unwrap();
413 let dst: &mut [u8] = data.as_mut_slice();
415 let (w_, h_) = prev_frame.get_dimensions(comp);
416 let w = (w_ + 7) & !7;
417 let h = (h_ + 7) & !7;
419 let (dx, cx) = rv30_div_mv(mv.x / 2);
420 let (dy, cy) = rv30_div_mv(mv.y / 2);
422 if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) {
423 let sstride = prev_frame.get_stride(comp);
424 let mut soffset = prev_frame.get_offset(comp) + x + y * sstride;
425 let data = prev_frame.get_data();
426 let src: &[u8] = data.as_slice();
427 soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize;
428 rv30_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
430 let mut ebuf: [u8; 16*10] = [0; 16*10];
431 edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp);
432 rv30_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);