use nihav_core::frame::{FrameType, NAVideoBuffer};
-use nihav_core::codecs::MV;
-use nihav_core::codecs::blockdsp::edge_emu;
+use nihav_codec_support::codecs::MV;
+use nihav_codec_support::codecs::blockdsp::edge_emu;
use super::rv3040::{RV34DSP, RV34MBInfo};
fn clip8(a: i16) -> u8 {
-5 * el!($s, $o + 2 * $step)
+ el!($s, $o + 3 * $step) + 32) >> 6) as i16)
);
+ (33; $s: ident, $o: expr, $stride: expr) => (
+ clip8((( el!($s, $o)
+ + el!($s, $o + 1)
+ + el!($s, $o + $stride)
+ + el!($s, $o + 1 + $stride) + 2) >> 2) as i16)
+ );
}
macro_rules! mc_func {
$ofilt(dst, didx, dstride, &buf, 2*bstride, $size);
}
);
+ (mc33; $name: ident, $size: expr) => (
+ fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) {
+ for _ in 0..$size {
+ for x in 0..$size { dst[didx + x] = filter!(33; src, sidx + x, sstride); }
+ sidx += sstride;
+ didx += dstride;
+ }
+ }
+ );
}
mc_func!(copy; copy_16, 16);
mc_func!(copy; copy_8, 8);
mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8);
mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16);
mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8);
-mc_func!(cm03; luma_mc_33_16, 16, luma_mc_03_16);
-mc_func!(cm03; luma_mc_33_8, 8, luma_mc_03_8);
+mc_func!(mc33; luma_mc_33_16, 16);
+mc_func!(mc33; luma_mc_33_8, 8);
const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [
[ 0, 4, 8, 4 ],
pix[off - step] = clip8(p0 + diff);
pix[off ] = clip8(q0 - diff);
- if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) {
+ if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
}
- if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) {
+ if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
}
ch[3 - 1] = clip8(p0 + diff);
ch[3 ] = clip8(q0 - diff);
- if filter_p1 && ((p1 - p0).wrapping_abs() <= beta) {
+ if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
}
- if filter_q1 && ((q1 - q0).wrapping_abs() <= beta) {
+ if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
}
} else if val > c + lims {
c + lims
} else {
- c
+ val
}
} else {
val
let q3 = el!(pix, off + 3*step);
let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
- let nq0 = sfilter( p1, p0, q0, q1, q0, RV40_DITHER_R[dmode + i], fmode != 0, lims);
+ let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
pix[off + step] = nq1 as u8;
if !chroma {
- let np2 = sfilter(np0, np1, p2, p3, np1, 64, false, 0);
- let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
+ let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
+ let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
pix[off - 3*step] = np2 as u8;
pix[off + 2*step] = nq2 as u8;
}
const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ];
const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ];
+const Y_TOP_ROW_MASK: u32 = 0x000F;
+const Y_BOT_ROW_MASK: u32 = 0xF000;
+const Y_LEFT_COL_MASK: u32 = 0x1111;
+const Y_RIGHT_COL_MASK: u32 = 0x8888;
+const C_TOP_ROW_MASK: u32 = 0x3;
+const C_BOT_ROW_MASK: u32 = 0xC;
+const C_LEFT_COL_MASK: u32 = 0x5;
+const C_RIGHT_COL_MASK: u32 = 0xA;
+
impl RV34DSP for RV40DSP {
- fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, row: usize) {
+ fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, mb_h: usize, row: usize) {
// todo proper B-frame filtering?
let mut offs: [usize; 3] = [0; 3];
let mut stride: [usize; 3] = [0; 3];
let data = frame.get_data_mut().unwrap();
let dst: &mut [u8] = data.as_mut_slice();
+ let is_last_row = row == mb_h - 1;
+
let mut mb_pos: usize = row * mb_w;
let mut left_q: usize = 0;
+ let mut left_cbp = 0;
+ let mut left_dbk = 0;
for mb_x in 0..mb_w {
let q = mbinfo[mb_pos].q as usize;
let alpha = RV40_ALPHA_TAB[q];
let beta_y = if small_frame { beta * 4 } else { beta * 3 };
let beta_c = beta * 3;
- let cur_dbk = mbinfo[mb_pos].deblock;
- let cur_cbp = mbinfo[mb_pos].cbp_c;
-
let is_strong = mbinfo[mb_pos].mbtype.is_intra_or_16();
- let top_is_strong = is_strong || (row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16());
- let left_is_strong = is_strong || (mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16());
+ let top_is_strong = row > 0 && mbinfo[mb_pos - mb_w].mbtype.is_intra_or_16();
+ let left_is_strong = mb_x > 0 && mbinfo[mb_pos - 1].mbtype.is_intra_or_16();
+ let bot_is_strong = !is_last_row && mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16();
+
+ let cur_dbk = mbinfo[mb_pos].deblock;
+ let cur_cbp = if is_strong { 0xFFFFFF } else { mbinfo[mb_pos].cbp };
+
+ let (top_cbp, top_dbk) = if row > 0 {
+ (if top_is_strong { 0xFFFFFF } else { mbinfo[mb_pos - mb_w].cbp }, mbinfo[mb_pos - mb_w].deblock)
+ } else {
+ (0, 0)
+ };
+ let (bot_cbp, bot_dbk) = if !is_last_row {
+ (mbinfo[mb_pos + mb_w].cbp, mbinfo[mb_pos + mb_w].deblock)
+ } else {
+ (0, 0)
+ };
+
+ let y_cbp = cur_cbp & 0xFFFF;
+ let y_to_deblock = (cur_dbk as u32) | ((bot_dbk as u32) << 16);
+ let mut y_h_deblock = y_to_deblock | ((y_cbp << 4) & !Y_TOP_ROW_MASK) | ((top_cbp & Y_BOT_ROW_MASK) >> 12);
+ let mut y_v_deblock = y_to_deblock | ((y_cbp << 1) & !Y_LEFT_COL_MASK) | ((left_cbp & Y_RIGHT_COL_MASK) >> 3);
+
+ if mb_x == 0 {
+ y_v_deblock &= !Y_LEFT_COL_MASK;
+ }
+ if row == 0 {
+ y_h_deblock &= !Y_TOP_ROW_MASK;
+ }
+ if is_last_row || is_strong || bot_is_strong {
+ y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
+ }
for y in 0..4 {
let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0];
for x in 0..4 {
let bpos = x + y * 4;
- let filter_hor_down = (y != 3) && !is_strong;
- let filter_ver = (x > 0) || (mb_x > 0);
- let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
- let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
+ let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
let cur_strength: usize;
if is_strong {
} else if mb_x > 0 {
if left_is_strong {
left_strength = 2;
- } else if test_bit!(mbinfo[mb_pos - 1].deblock, bpos + 3) {
+ } else if test_bit!(left_dbk, bpos + 3) {
left_strength = 1;
} else {
left_strength = 0;
} else {
bot_strength = 0;
}
+ } else if !is_last_row {
+ if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
+ bot_strength = 2;
+ } else if test_bit!(bot_dbk, x) {
+ bot_strength = 1;
+ } else {
+ bot_strength = 0;
+ }
} else {
bot_strength = 0;
}
} else if row > 0 {
if top_is_strong {
top_strength = 2;
- } else if test_bit!(mbinfo[mb_pos - mb_w].deblock, bpos + 12) {
+ } else if test_bit!(top_dbk, bpos + 12) {
top_strength = 1;
} else {
top_strength = 0;
let dmode = if y > 0 { x + y * 4 } else { x * 4 };
- if filter_hor_down {
+ if test_bit!(y_h_deblock, bpos + 4) {
rv40_loop_filter4_h(dst, yoff + 4 * stride[0] + x * 4, stride[0],
dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
}
- if filter_ver && !ver_strong {
+ if test_bit!(y_v_deblock, bpos) && !ver_strong {
rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
}
- if filter_hor_up {
+ if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
rv40_loop_filter4_h(dst, yoff + x * 4, stride[0],
dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
}
- if filter_ver && ver_strong {
+ if test_bit!(y_v_deblock, bpos) && ver_strong {
rv40_loop_filter4_v(dst, yoff + x * 4, stride[0],
dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
}
}
for comp in 1..3 {
+ let cshift = 16 - 4 + comp * 4;
+ let c_cur_cbp = (cur_cbp >> cshift) & 0xF;
+ let c_top_cbp = (top_cbp >> cshift) & 0xF;
+ let c_left_cbp = (left_cbp >> cshift) & 0xF;
+ let c_bot_cbp = (bot_cbp >> cshift) & 0xF;
+
+ let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
+ let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
+ let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
+ if mb_x == 0 {
+ c_v_deblock &= !C_LEFT_COL_MASK;
+ }
+ if row == 0 {
+ c_h_deblock &= !C_TOP_ROW_MASK;
+ }
+ if is_last_row || is_strong || bot_is_strong {
+ c_h_deblock &= !(C_TOP_ROW_MASK << 4);
+ }
+
for y in 0..2 {
let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp];
for x in 0..2 {
- let bpos = x + y * 2 + (comp - 1) * 4;
+ let bpos = x + y * 2;
- let filter_hor_down = (y != 1) && !is_strong;
- let filter_ver = (x > 0) || (mb_x > 0);
- let filter_hor_up = (row > 0) && (x == 0) && top_is_strong;
- let ver_strong = (x == 0) && (mb_x > 0) && left_is_strong;
+ let ver_strong = (x == 0) && (is_strong || left_is_strong);
let cur_strength: usize;
if is_strong {
cur_strength = 2;
- } else if test_bit!(cur_cbp, bpos) {
+ } else if test_bit!(c_cur_cbp, bpos) {
cur_strength = 1;
} else {
cur_strength = 0;
if x > 0 {
if is_strong {
left_strength = 2;
- } else if test_bit!(cur_cbp, bpos - 1) {
+ } else if test_bit!(c_cur_cbp, bpos - 1) {
left_strength = 1;
} else {
left_strength = 0;
} else if mb_x > 0 {
if left_is_strong {
left_strength = 2;
- } else if test_bit!(mbinfo[mb_pos - 1].cbp_c, bpos + 1) {
+ } else if test_bit!(c_left_cbp, bpos + 1) {
left_strength = 1;
} else {
left_strength = 0;
}
let bot_strength: usize;
- if y == 0 {
+ if y != 3 {
if is_strong {
bot_strength = 2;
- } else if test_bit!(cur_cbp, bpos + 2) {
+ } else if test_bit!(c_cur_cbp, bpos + 2) {
+ bot_strength = 1;
+ } else {
+ bot_strength = 0;
+ }
+ } else if !is_last_row {
+ if mbinfo[mb_pos + mb_w].mbtype.is_intra_or_16() {
+ bot_strength = 2;
+ } else if test_bit!(c_bot_cbp, x) {
bot_strength = 1;
} else {
bot_strength = 0;
if y > 0 {
if is_strong {
top_strength = 2;
- } else if test_bit!(cur_cbp, bpos - 2) {
+ } else if test_bit!(c_cur_cbp, bpos - 2) {
top_strength = 1;
} else {
top_strength = 0;
} else if row > 0 {
if top_is_strong {
top_strength = 2;
- } else if test_bit!(mbinfo[mb_pos - mb_w].cbp_c, bpos + 2) {
+ } else if test_bit!(c_top_cbp, bpos + 2) {
top_strength = 1;
} else {
top_strength = 0;
let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
- if filter_hor_down {
+ if test_bit!(c_h_deblock, bpos + 2) {
rv40_loop_filter4_h(dst, coff + 4 * stride[comp] + x * 4, stride[comp],
x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
}
- if filter_ver && !ver_strong {
+ if test_bit!(c_v_deblock, bpos) && !ver_strong {
rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
}
- if filter_hor_up {
+ if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
rv40_loop_filter4_h(dst, coff + x * 4, stride[comp],
x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
}
- if filter_ver && ver_strong {
+ if test_bit!(c_v_deblock, bpos) && ver_strong {
rv40_loop_filter4_v(dst, coff + x * 4, stride[comp],
y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
}
}
left_q = q;
+ left_dbk = cur_dbk;
+ left_cbp = cur_cbp;
mb_pos += 1;
}
self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride);
} else {
let mut ebuf: [u8; 32*22] = [0; 32*22];
- edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0);
+ edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4);
self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32*2 + 2, 32);
}
}
rv40_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy);
} else {
let mut ebuf: [u8; 16*10] = [0; 16*10];
- edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp);
+ edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp, 4);
rv40_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy);
}
}