From: Kostya Shishkov Date: Wed, 31 Aug 2022 14:14:47 +0000 (+0200) Subject: h264/dsp: split chroma_interp() by width X-Git-Url: https://git.nihav.org/?a=commitdiff_plain;h=82f1307ee4a5a34978694f132076e86a11b26967;p=nihav.git h264/dsp: split chroma_interp() by width --- diff --git a/nihav-itu/src/codecs/h264/dsp/mc/debug.rs b/nihav-itu/src/codecs/h264/dsp/mc/debug.rs index 34d59da..e180800 100644 --- a/nihav-itu/src/codecs/h264/dsp/mc/debug.rs +++ b/nihav-itu/src/codecs/h264/dsp/mc/debug.rs @@ -164,7 +164,7 @@ fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usiz } -pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { +fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { let a0 = 8 - dx; let a1 = dx; let b0 = 8 - dy; @@ -202,6 +202,18 @@ pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, } } +pub fn chroma_interp_8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h); +} + +pub fn chroma_interp_4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h); +} + +pub fn chroma_interp_2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h); +} + macro_rules! luma_mc { ($orig:ident, $func4:ident, $func8:ident, $func16:ident) => { fn $func4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs index e2036cc..55b124e 100644 --- a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs @@ -23,6 +23,7 @@ pub struct H264MC { avg_buf: NAVideoBufferRef, pub put_block_weighted: [fn (dst: &mut [u8], stride: usize, src: &[u8], h: usize, wparams: [i8; 3]); 4], pub put_block_weighted2: [fn (dst: &mut [u8], stride: usize, src0: &[u8], src1: &[u8], h: usize, wparams: [i8; 5]); 4], + pub chroma_interp: [fn (dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize); 3], } impl H264MC { @@ -31,6 +32,7 @@ impl H264MC { avg_buf, put_block_weighted: [put_blk_w_2, put_blk_w_4, put_blk_w_8, put_blk_w_16], put_block_weighted2: [put_blk_w2_2, put_blk_w2_4, put_blk_w2_8, put_blk_w2_16], + chroma_interp: [chroma_interp_2, chroma_interp_4, chroma_interp_8], }; obj.register_simd(); obj @@ -85,7 +87,7 @@ impl H264MC { }; for chroma in 1..3 { let off = frm.offset[chroma] + xpos / 2 + (ypos / 2) * frm.stride[chroma]; - chroma_interp(&mut frm.data[off..], frm.stride[chroma], csrc[chroma - 1], cstride[chroma - 1], dx, dy, cbw, cbh); + (self.chroma_interp[wmode])(&mut frm.data[off..], frm.stride[chroma], csrc[chroma - 1], cstride[chroma - 1], dx, dy, cbh); } } @@ -144,8 +146,8 @@ impl H264MC { &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], [sustride, svstride]) }; - chroma_interp(udst, 16, csrc[0], cstride[0], dx, dy, cbw, cbh); - chroma_interp(vdst, 16, csrc[1], cstride[1], dx, dy, cbw, cbh); + (self.chroma_interp[wmode])(udst, 16, csrc[0], cstride[0], dx, dy, cbh); + (self.chroma_interp[wmode])(vdst, 16, csrc[1], cstride[1], dx, dy, cbh); } pub fn avg(&mut self, dst: &mut [u8], dstride: usize, bw: usize, bh: usize, comp: usize) { diff --git a/nihav-itu/src/codecs/h264/dsp/mc/release.rs b/nihav-itu/src/codecs/h264/dsp/mc/release.rs index 0a521d0..e4e7e16 100644 --- a/nihav-itu/src/codecs/h264/dsp/mc/release.rs +++ b/nihav-itu/src/codecs/h264/dsp/mc/release.rs @@ -204,7 +204,7 @@ fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usiz } -pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { +fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { let a0 = 8 - dx; let a1 = dx; let b0 = 8 - dy; @@ -274,6 +274,18 @@ pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, } } +pub fn chroma_interp_8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h); +} + +pub fn chroma_interp_4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h); +} + +pub fn chroma_interp_2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h); +} + macro_rules! luma_mc { ($orig:ident, $func4:ident, $func8:ident, $func16:ident) => { fn $func4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {