X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=nihav-itu%2Fsrc%2Fcodecs%2Fh264%2Fdsp%2Fmc%2Fmod.rs;h=f55844194ad6972aabacfaad2c14e5c040b0b0a2;hb=754ab49a62c862e8c6e66ec88bb7ad626247140e;hp=27bffe5e96041dd27bbaa840bd7fab3c234787fc;hpb=42005e259dd77147b77c7a0057aa3cf033e331d0;p=nihav.git diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs index 27bffe5..f558441 100644 --- a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs @@ -36,6 +36,22 @@ trait RegisterSIMD { fn register_simd(&mut self); } +#[repr(align(16))] +pub struct McBlock { + pub y: [u8; 16 * 16], + pub u: [u8; 16 * 16], + pub v: [u8; 16 * 16], +} + +impl McBlock { + pub fn new() -> Self { + unsafe { + let blk = std::mem::MaybeUninit::uninit(); + blk.assume_init() + } + } +} + #[allow(clippy::type_complexity)] pub struct H264MC { avg_buf: NAVideoBufferRef, @@ -119,7 +135,7 @@ impl H264MC { } } - pub fn mc_blocks(&mut self, ydst: &mut [u8], udst: &mut [u8], vdst: &mut [u8], refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; let pre = if mode != 0 { 2 } else { 0 }; @@ -141,14 +157,14 @@ impl H264MC { let edge = (pre + post) as usize; edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge, &mut ebuf, EBUF_STRIDE, 0, 0); - (H264_LUMA_INTERP[wmode][mode])(ydst, 16, &ebuf, EBUF_STRIDE, h); + (H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &ebuf, EBUF_STRIDE, h); } else { let sstride = refpic.get_stride(0); let soff = refpic.get_offset(0); let sdta = refpic.get_data(); let sbuf: &[u8] = sdta.as_slice(); let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; - (H264_LUMA_INTERP[wmode][mode])(ydst, 16, &sbuf[saddr..], sstride, h); + (H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &sbuf[saddr..], sstride, h); } let (cw, ch) = (self.width >> 1, self.height >> 1); @@ -174,8 +190,8 @@ impl H264MC { &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], [sustride, svstride]) }; - (self.chroma_interp[wmode])(udst, 16, csrc[0], cstride[0], dx, dy, cbh); - (self.chroma_interp[wmode])(vdst, 16, csrc[1], cstride[1], dx, dy, cbh); + (self.chroma_interp[wmode])(&mut dst.u, 16, csrc[0], cstride[0], dx, dy, cbh); + (self.chroma_interp[wmode])(&mut dst.v, 16, csrc[1], cstride[1], dx, dy, cbh); } pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { @@ -257,7 +273,7 @@ fn put_block_weighted(dst: &mut [u8], stride: usize, src: &[u8], w: usize, h: us let wshift = wparams[2] as u8; let bias = (1 << wshift) >> 1; - for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks(16)).take(h) { + for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks_exact(16)).take(h) { for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) { *dst = clip_u8(((i16::from(src) * weight + bias) >> wshift) + offset); } @@ -286,7 +302,7 @@ fn put_block_weighted2(dst: &mut [u8], stride: usize, src0: &[u8], src1: &[u8], let offset = (offset0 + offset1 + 1) >> 1; let bias = (1 << wshift) >> 1; - for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks(16).zip(src1.chunks(16))).take(h) { + for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks_exact(16).zip(src1.chunks_exact(16))).take(h) { for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) { *dst = clip_u8(((i16::from(src0) * weight0 + i16::from(src1) * weight1 + bias) >> wshift) + offset); }