From 834e7b28f36dbc5f46197730781b0b74c0c215f7 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Fri, 4 Aug 2023 18:55:18 +0200 Subject: [PATCH] h264: reduce number of arguments for mc_blocks() --- nihav-itu/src/codecs/h264/dsp/mc/mod.rs | 26 +++++++++++++++---- nihav-itu/src/codecs/h264/dsp/mod.rs | 2 +- nihav-itu/src/codecs/h264/mb_recon.rs | 34 ++++++++++--------------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs index 27bffe5..19f2f28 100644 --- a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs @@ -36,6 +36,22 @@ trait RegisterSIMD { fn register_simd(&mut self); } +#[repr(align(16))] +pub struct McBlock { + pub y: [u8; 16 * 16], + pub u: [u8; 16 * 16], + pub v: [u8; 16 * 16], +} + +impl McBlock { + pub fn new() -> Self { + unsafe { + let blk = std::mem::MaybeUninit::uninit(); + blk.assume_init() + } + } +} + #[allow(clippy::type_complexity)] pub struct H264MC { avg_buf: NAVideoBufferRef, @@ -119,7 +135,7 @@ impl H264MC { } } - pub fn mc_blocks(&mut self, ydst: &mut [u8], udst: &mut [u8], vdst: &mut [u8], refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; let pre = if mode != 0 { 2 } else { 0 }; @@ -141,14 +157,14 @@ impl H264MC { let edge = (pre + post) as usize; edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge, &mut ebuf, EBUF_STRIDE, 0, 0); - (H264_LUMA_INTERP[wmode][mode])(ydst, 16, &ebuf, EBUF_STRIDE, h); + (H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &ebuf, EBUF_STRIDE, h); } else { let sstride = refpic.get_stride(0); let soff = refpic.get_offset(0); let sdta = refpic.get_data(); let sbuf: &[u8] = sdta.as_slice(); let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; - (H264_LUMA_INTERP[wmode][mode])(ydst, 16, &sbuf[saddr..], sstride, h); + (H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &sbuf[saddr..], sstride, h); } let (cw, ch) = (self.width >> 1, self.height >> 1); @@ -174,8 +190,8 @@ impl H264MC { &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], [sustride, svstride]) }; - (self.chroma_interp[wmode])(udst, 16, csrc[0], cstride[0], dx, dy, cbh); - (self.chroma_interp[wmode])(vdst, 16, csrc[1], cstride[1], dx, dy, cbh); + (self.chroma_interp[wmode])(&mut dst.u, 16, csrc[0], cstride[0], dx, dy, cbh); + (self.chroma_interp[wmode])(&mut dst.v, 16, csrc[1], cstride[1], dx, dy, cbh); } pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { diff --git a/nihav-itu/src/codecs/h264/dsp/mod.rs b/nihav-itu/src/codecs/h264/dsp/mod.rs index 05b46e2..b07ffe8 100644 --- a/nihav-itu/src/codecs/h264/dsp/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mod.rs @@ -1,5 +1,5 @@ mod mc; -pub use mc::H264MC; +pub use mc::{H264MC, McBlock}; #[cfg(target_arch="x86_64")] use std::arch::asm; diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs index 0a63edf..6b0beed 100644 --- a/nihav-itu/src/codecs/h264/mb_recon.rs +++ b/nihav-itu/src/codecs/h264/mb_recon.rs @@ -210,10 +210,8 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, if !weight.is_weighted() { mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv); } else { - let mut ytmp = [0; 16 * 16]; - let mut utmp = [0; 16 * 16]; - let mut vtmp = [0; 16 * 16]; - mc_dsp.mc_blocks(&mut ytmp, &mut utmp, &mut vtmp, buf, xpos, ypos, w, h, mv); + let mut tmp = McBlock::new(); + mc_dsp.mc_blocks(&mut tmp, buf, xpos, ypos, w, h, mv); let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; let yw = if weight.luma_weighted { @@ -227,7 +225,7 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, 8 => 2, _ => 3, }; - (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &ytmp, h, yw); + (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp.y, h, yw); for chroma in 0..2 { let cstride = frm.stride[chroma + 1]; @@ -237,7 +235,7 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, } else { [1, 0, 0] }; - let csrc = if chroma == 0 { &utmp } else { &vtmp }; + let csrc = if chroma == 0 { &tmp.u } else { &tmp.v }; (mc_dsp.put_block_weighted[wmode - 1])(&mut frm.data[coff..], cstride, csrc, h / 2, cw); } } @@ -289,16 +287,12 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi }, }; } else { - let mut ytmp0 = [0x80; 16 * 16]; - let mut utmp0 = [0x80; 16 * 16]; - let mut vtmp0 = [0x80; 16 * 16]; - let mut ytmp1 = [0x80; 16 * 16]; - let mut utmp1 = [0x80; 16 * 16]; - let mut vtmp1 = [0x80; 16 * 16]; + let mut tmp0 = McBlock::new(); + let mut tmp1 = McBlock::new(); match (mode, ref_pic0, ref_pic1) { (BMode::L0, Some(buf), _) | (BMode::L1, _, Some(buf)) => { let (mv, weight) = if mode == BMode::L0 { (mv0, weight0) } else { (mv1, weight1) }; - mc_dsp.mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf, xpos, ypos, w, h, mv); + mc_dsp.mc_blocks(&mut tmp0, buf, xpos, ypos, w, h, mv); let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; let yw = if weight.luma_weighted { @@ -312,7 +306,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi 8 => 2, _ => 3, }; - (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &ytmp0, h, yw); + (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, h, yw); for chroma in 0..2 { let cstride = frm.stride[chroma + 1]; @@ -322,13 +316,13 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi } else { [1, 0, 0] }; - let csrc = if chroma == 0 { &utmp0 } else { &vtmp0 }; + let csrc = if chroma == 0 { &tmp0.u } else { &tmp0.v }; (mc_dsp.put_block_weighted[wmode - 1])(&mut frm.data[coff..], cstride, csrc, h / 2, cw); } }, (BMode::Bi, Some(buf0), Some(buf1)) => { // do both and avg - mc_dsp.mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf0, xpos, ypos, w, h, mv0); - mc_dsp.mc_blocks(&mut ytmp1, &mut utmp1, &mut vtmp1, buf1, xpos, ypos, w, h, mv1); + mc_dsp.mc_blocks(&mut tmp0, buf0, xpos, ypos, w, h, mv0); + mc_dsp.mc_blocks(&mut tmp1, buf1, xpos, ypos, w, h, mv1); let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; let yw = match (weight0.luma_weighted, weight1.luma_weighted) { @@ -343,7 +337,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi 8 => 2, _ => 3, }; - (mc_dsp.put_block_weighted2[wmode])(&mut frm.data[yoff..], frm.stride[0], &ytmp0, &ytmp1, h, yw); + (mc_dsp.put_block_weighted2[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, &tmp1.y, h, yw); for chroma in 0..2 { let cstride = frm.stride[chroma + 1]; @@ -358,8 +352,8 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8], (false, false) => [1, 0, 1, 0, 0], }; - let csrc0 = if chroma == 0 { &utmp0 } else { &vtmp0 }; - let csrc1 = if chroma == 0 { &utmp1 } else { &vtmp1 }; + let csrc0 = if chroma == 0 { &tmp0.u } else { &tmp0.v }; + let csrc1 = if chroma == 0 { &tmp1.u } else { &tmp1.v }; (mc_dsp.put_block_weighted2[wmode - 1])(&mut frm.data[coff..], cstride, csrc0, csrc1, h / 2, cw); } }, -- 2.39.5