X-Git-Url: https://git.nihav.org/?p=nihav.git;a=blobdiff_plain;f=nihav-realmedia%2Fsrc%2Fcodecs%2Frv40dsp.rs;h=a883b0b9e0e84e7943302e6fbf89341baaf9b2da;hp=0dfec4772a1285ea217092409e38aa4e3cf32c0a;hb=b7c882c1ce6f86c07c2340751200e3a060942826;hpb=b4d5b8515e75383b4fc59ea2813c90c615d59a96 diff --git a/nihav-realmedia/src/codecs/rv40dsp.rs b/nihav-realmedia/src/codecs/rv40dsp.rs index 0dfec47..a883b0b 100644 --- a/nihav-realmedia/src/codecs/rv40dsp.rs +++ b/nihav-realmedia/src/codecs/rv40dsp.rs @@ -38,6 +38,12 @@ macro_rules! filter { -5 * el!($s, $o + 2 * $step) + el!($s, $o + 3 * $step) + 32) >> 6) as i16) ); + (33; $s: ident, $o: expr, $stride: expr) => ( + clip8((( el!($s, $o) + + el!($s, $o + 1) + + el!($s, $o + $stride) + + el!($s, $o + 1 + $stride) + 2) >> 2) as i16) + ); } macro_rules! mc_func { @@ -130,6 +136,15 @@ macro_rules! mc_func { $ofilt(dst, didx, dstride, &buf, 2*bstride, $size); } ); + (mc33; $name: ident, $size: expr) => ( + fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + for _ in 0..$size { + for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); } + sidx += sstride; + didx += dstride; + } + } + ); } mc_func!(copy; copy_16, 16); mc_func!(copy; copy_8, 8); @@ -161,8 +176,8 @@ mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16); mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8); mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16); mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8); -mc_func!(cm03; luma_mc_33_16, 16, luma_mc_03_16); -mc_func!(cm03; luma_mc_33_8, 8, luma_mc_03_8); +mc_func!(mc33; luma_mc_33_16, 16); +mc_func!(mc33; luma_mc_33_8, 8); const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [ [ 0, 4, 8, 4 ], @@ -174,7 +189,7 @@ const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [ fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) { if (x == 0) && (y == 0) { for _ in 0..size { - for x in 0..size { dst[didx + x] = src[sidx + x]; } + dst[didx..][..size].copy_from_slice(&src[sidx..][..size]); didx += dstride; sidx += sstride; } @@ -211,6 +226,7 @@ fn rv40_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], m } } +#[allow(clippy::type_complexity)] pub struct RV40DSP { luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 16]; 2], } @@ -280,12 +296,12 @@ fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: u pix[off - step] = clip8(p0 + diff); pix[off ] = clip8(q0 - diff); - if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) { + if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) { let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1; pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1)); } - if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) { + if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) { let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1; pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1)); } @@ -335,12 +351,12 @@ fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize, ch[3 - 1] = clip8(p0 + diff); ch[3 ] = clip8(q0 - diff); - if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) { + if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) { let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1; ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1)); } - if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) { + if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) { let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1; ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1)); } @@ -365,7 +381,7 @@ fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims } else if val > c + lims { c + lims } else { - c + val } } else { val @@ -398,7 +414,7 @@ fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: let q3 = el!(pix, off + 3*step); let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims); - let nq0 = sfilter( p1, p0, q0, q1, q0, RV40_DITHER_R[dmode + i], fmode != 0, lims); + let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims); let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims); let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims); @@ -409,8 +425,8 @@ fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: pix[off + step] = nq1 as u8; if !chroma { - let np2 = sfilter(np0, np1, p2, p3, np1, 64, false, 0); - let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0); + let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0); + let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0); pix[off - 3*step] = np2 as u8; pix[off + 2*step] = nq2 as u8; } @@ -565,8 +581,18 @@ fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ]; const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ]; +const Y_TOP_ROW_MASK: u32 = 0x000F; +const Y_BOT_ROW_MASK: u32 = 0xF000; +const Y_LEFT_COL_MASK: u32 = 0x1111; +const Y_RIGHT_COL_MASK: u32 = 0x8888; +const C_TOP_ROW_MASK: u32 = 0x3; +const C_BOT_ROW_MASK: u32 = 0xC; +const C_LEFT_COL_MASK: u32 = 0x5; +const C_RIGHT_COL_MASK: u32 = 0xA; + impl RV34DSP for RV40DSP { - fn loop_filter(&self, frame: &mut NAVideoBuffer, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, row: usize) { + #[allow(clippy::cognitive_complexity)] + fn loop_filter(&self, frame: &mut NAVideoBuffer, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) { // todo proper B-frame filtering? let mut offs: [usize; 3] = [0; 3]; let mut stride: [usize; 3] = [0; 3]; @@ -582,8 +608,12 @@ impl RV34DSP for RV40DSP { let data = frame.get_data_mut().unwrap(); let dst: &mut [u8] = data.as_mut_slice(); + let is_last_row = row == mb_h - 1; + let mut mb_pos: usize = row * mb_w; let mut left_q: usize = 0; + let mut left_cbp = 0; + let mut left_dbk = 0; for mb_x in 0..mb_w { let q = mbinfo[mb_pos].q as usize; let alpha = RV40_ALPHA_TAB[q]; @@ -591,21 +621,45 @@ impl RV34DSP for RV40DSP { let beta_y = if small_frame { beta * 4 } else { beta * 3 }; let beta_c = beta * 3; - let cur_dbk = mbinfo[mb_pos].deblock; - let cur_cbp = mbinfo[mb_pos].cbp_c; - let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16(); - let top_is_strong = is_strong || (row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16()); - let left_is_strong = is_strong || (mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16()); + let top_is_strong = row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16(); + let left_is_strong = mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16(); + let bot_is_strong = !is_last_row && mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16(); + + let cur_dbk = mbinfo[mb_pos].deblock; + let cur_cbp = if is_strong { 0xFFFFFF } else { mbinfo[mb_pos].cbp }; + + let (top_cbp, top_dbk) = if row > 0 { + (if top_is_strong { 0xFFFFFF } else { mbinfo[mb_pos - mb_w].cbp }, mbinfo[mb_pos - mb_w].deblock) + } else { + (0, 0) + }; + let (bot_cbp, bot_dbk) = if !is_last_row { + (mbinfo[mb_pos + mb_w].cbp, mbinfo[mb_pos + mb_w].deblock) + } else { + (0, 0) + }; + + let y_cbp = cur_cbp & 0xFFFF; + let y_to_deblock = (cur_dbk as u32) | ((bot_dbk as u32) << 16); + let mut y_h_deblock = y_to_deblock | ((y_cbp << 4) & !Y_TOP_ROW_MASK) | ((top_cbp & Y_BOT_ROW_MASK) >> 12); + let mut y_v_deblock = y_to_deblock | ((y_cbp << 1) & !Y_LEFT_COL_MASK) | ((left_cbp & Y_RIGHT_COL_MASK) >> 3); + + if mb_x == 0 { + y_v_deblock &= !Y_LEFT_COL_MASK; + } + if row == 0 { + y_h_deblock &= !Y_TOP_ROW_MASK; + } + if is_last_row || is_strong || bot_is_strong { + y_h_deblock &= !(Y_TOP_ROW_MASK << 16); + } for y in 0..4 { let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0]; for x in 0..4 { let bpos = x + y * 4; - let filter_hor_down = (y != 3) && !is_strong; - let filter_ver = (x > 0) || (mb_x > 0); - let filter_hor_up = (row > 0) && (x == 0) && top_is_strong; - let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong; + let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong); let cur_strength: usize; if is_strong { @@ -628,7 +682,7 @@ impl RV34DSP for RV40DSP { } else if mb_x > 0 { if left_is_strong { left_strength = 2; - } else if test_bit!(mbinfo[mb_pos - 1].deblock, bpos + 3) { + } else if test_bit!(left_dbk, bpos + 3) { left_strength = 1; } else { left_strength = 0; @@ -646,6 +700,14 @@ impl RV34DSP for RV40DSP { } else { bot_strength = 0; } + } else if !is_last_row { + if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() { + bot_strength = 2; + } else if test_bit!(bot_dbk, x) { + bot_strength = 1; + } else { + bot_strength = 0; + } } else { bot_strength = 0; } @@ -662,7 +724,7 @@ impl RV34DSP for RV40DSP { } else if row > 0 { if top_is_strong { top_strength = 2; - } else if test_bit!(mbinfo[mb_pos - mb_w].deblock, bpos + 12) { + } else if test_bit!(top_dbk, bpos + 12) { top_strength = 1; } else { top_strength = 0; @@ -681,19 +743,19 @@ impl RV34DSP for RV40DSP { let dmode = if y > 0 { x + y * 4 } else { x * 4 }; - if filter_hor_down { + if test_bit!(y_h_deblock, bpos + 4) { rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0], dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false); } - if filter_ver && !ver_strong { + if test_bit!(y_v_deblock, bpos) && !ver_strong { rv40_loop_filter4_v(dst, yoff + x * 4, stride[0], dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false); } - if filter_hor_up { + if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) { rv40_loop_filter4_h(dst, yoff + x * 4, stride[0], dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true); } - if filter_ver && ver_strong { + if test_bit!(y_v_deblock, bpos) && ver_strong { rv40_loop_filter4_v(dst, yoff + x * 4, stride[0], dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true); } @@ -701,20 +763,36 @@ impl RV34DSP for RV40DSP { } for comp in 1..3 { + let cshift = 16 - 4 + comp * 4; + let c_cur_cbp = (cur_cbp >> cshift) & 0xF; + let c_top_cbp = (top_cbp >> cshift) & 0xF; + let c_left_cbp = (left_cbp >> cshift) & 0xF; + let c_bot_cbp = (bot_cbp >> cshift) & 0xF; + + let c_deblock = c_cur_cbp | (c_bot_cbp << 4); + let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1); + let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2); + if mb_x == 0 { + c_v_deblock &= !C_LEFT_COL_MASK; + } + if row == 0 { + c_h_deblock &= !C_TOP_ROW_MASK; + } + if is_last_row || is_strong || bot_is_strong { + c_h_deblock &= !(C_TOP_ROW_MASK << 4); + } + for y in 0..2 { let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp]; for x in 0..2 { - let bpos = x + y * 2 + (comp - 1) * 4; + let bpos = x + y * 2; - let filter_hor_down = (y != 1) && !is_strong; - let filter_ver = (x > 0) || (mb_x > 0); - let filter_hor_up = (row > 0) && (x == 0) && top_is_strong; - let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong; + let ver_strong = (x == 0) && (is_strong || left_is_strong); let cur_strength: usize; if is_strong { cur_strength = 2; - } else if test_bit!(cur_cbp, bpos) { + } else if test_bit!(c_cur_cbp, bpos) { cur_strength = 1; } else { cur_strength = 0; @@ -724,7 +802,7 @@ impl RV34DSP for RV40DSP { if x > 0 { if is_strong { left_strength = 2; - } else if test_bit!(cur_cbp, bpos - 1) { + } else if test_bit!(c_cur_cbp, bpos - 1) { left_strength = 1; } else { left_strength = 0; @@ -732,7 +810,7 @@ impl RV34DSP for RV40DSP { } else if mb_x > 0 { if left_is_strong { left_strength = 2; - } else if test_bit!(mbinfo[mb_pos - 1].cbp_c, bpos + 1) { + } else if test_bit!(c_left_cbp, bpos + 1) { left_strength = 1; } else { left_strength = 0; @@ -742,10 +820,18 @@ impl RV34DSP for RV40DSP { } let bot_strength: usize; - if y == 0 { + if y != 3 { if is_strong { bot_strength = 2; - } else if test_bit!(cur_cbp, bpos + 2) { + } else if test_bit!(c_cur_cbp, bpos + 2) { + bot_strength = 1; + } else { + bot_strength = 0; + } + } else if !is_last_row { + if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() { + bot_strength = 2; + } else if test_bit!(c_bot_cbp, x) { bot_strength = 1; } else { bot_strength = 0; @@ -758,7 +844,7 @@ impl RV34DSP for RV40DSP { if y > 0 { if is_strong { top_strength = 2; - } else if test_bit!(cur_cbp, bpos - 2) { + } else if test_bit!(c_cur_cbp, bpos - 2) { top_strength = 1; } else { top_strength = 0; @@ -766,7 +852,7 @@ impl RV34DSP for RV40DSP { } else if row > 0 { if top_is_strong { top_strength = 2; - } else if test_bit!(mbinfo[mb_pos - mb_w].cbp_c, bpos + 2) { + } else if test_bit!(c_top_cbp, bpos + 2) { top_strength = 1; } else { top_strength = 0; @@ -783,19 +869,19 @@ impl RV34DSP for RV40DSP { let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q]; let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q]; - if filter_hor_down { + if test_bit!(c_h_deblock, bpos + 2) { rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp], x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false); } - if filter_ver && !ver_strong { + if test_bit!(c_v_deblock, bpos) && !ver_strong { rv40_loop_filter4_v(dst, coff + x * 4, stride[comp], y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false); } - if filter_hor_up { + if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) { rv40_loop_filter4_h(dst, coff + x * 4, stride[comp], x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true); } - if filter_ver && ver_strong { + if test_bit!(c_v_deblock, bpos) && ver_strong { rv40_loop_filter4_v(dst, coff + x * 4, stride[comp], y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true); } @@ -804,6 +890,8 @@ impl RV34DSP for RV40DSP { } left_q = q; + left_dbk = cur_dbk; + left_cbp = cur_cbp; mb_pos += 1; } @@ -834,7 +922,7 @@ impl RV34DSP for RV40DSP { self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride); } else { let mut ebuf: [u8; 32*22] = [0; 32*22]; - edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0); + edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4); self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32); } } @@ -870,7 +958,7 @@ impl RV34DSP for RV40DSP { rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy); } else { let mut ebuf: [u8; 16*10] = [0; 16*10]; - edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp); + edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4); rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy); } }