From 2f9923e6d1505270e2647ba3c3251dd6cfbc7c09 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Fri, 26 Aug 2022 18:15:50 +0200 Subject: [PATCH] h264: make a separate structure for motion compensation DSP --- .../src/codecs/h264/dsp/{ => mc}/debug.rs | 0 nihav-itu/src/codecs/h264/dsp/mc/mod.rs | 197 ++++++++++++++++++ .../src/codecs/h264/dsp/{ => mc}/release.rs | 0 nihav-itu/src/codecs/h264/dsp/mod.rs | 182 +--------------- nihav-itu/src/codecs/h264/mb_recon.rs | 88 ++++---- nihav-itu/src/codecs/h264/mod.rs | 6 +- 6 files changed, 246 insertions(+), 227 deletions(-) rename nihav-itu/src/codecs/h264/dsp/{ => mc}/debug.rs (100%) create mode 100644 nihav-itu/src/codecs/h264/dsp/mc/mod.rs rename nihav-itu/src/codecs/h264/dsp/{ => mc}/release.rs (100%) diff --git a/nihav-itu/src/codecs/h264/dsp/debug.rs b/nihav-itu/src/codecs/h264/dsp/mc/debug.rs similarity index 100% rename from nihav-itu/src/codecs/h264/dsp/debug.rs rename to nihav-itu/src/codecs/h264/dsp/mc/debug.rs diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs new file mode 100644 index 0000000..1f5ff20 --- /dev/null +++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs @@ -0,0 +1,197 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::MV; +use nihav_codec_support::codecs::blockdsp::*; + +#[cfg(not(debug_assertions))] +mod release; +#[cfg(not(debug_assertions))] +use release::*; +#[cfg(debug_assertions)] +mod debug; +#[cfg(debug_assertions)] +use debug::*; + +fn clip_u8(val: i16) -> u8 { val.max(0).min(255) as u8 } + +pub struct H264MC { + avg_buf: NAVideoBufferRef, +} + +impl H264MC { + pub fn new(avg_buf: NAVideoBufferRef) -> Self { + Self { + avg_buf + } + } + pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let mut ebuf = [0u8; 22 * 22]; + let mvx = mv.x >> 2; + let mvy = mv.y >> 2; + let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; + let pre = if mode != 0 { 2isize } else { 0 }; + let post = if mode != 0 { 3isize } else { 0 }; + let (yw, yh) = refpic.get_dimensions(0); + let src = refpic.get_data(); + let systride = refpic.get_stride(0); + let src_x = (xpos as isize) + (mvx as isize); + let src_y = (ypos as isize) + (mvy as isize); + let (ysrc, ystride) = if (src_x - pre < 0) || (src_x + (w as isize) + post > (yw as isize)) || (src_y - pre < 0) || (src_y + (h as isize) + post > (yh as isize)) { + let add = (pre + post) as usize; + edge_emu(&refpic, src_x - pre, src_y - pre, w + add, h + add, &mut ebuf, 22, 0, 0); + (ebuf.as_slice(), 22) + } else { + (&src[refpic.get_offset(0) + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride) + }; + (H264_LUMA_INTERP[mode])(&mut frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..], frm.stride[0], ysrc, ystride, w, h); + + let (cw, ch) = refpic.get_dimensions(1); + let mvx = mv.x >> 3; + let mvy = mv.y >> 3; + let dx = (mv.x & 7) as u16; + let dy = (mv.y & 7) as u16; + let src_x = ((xpos >> 1) as isize) + (mvx as isize); + let src_y = ((ypos >> 1) as isize) + (mvy as isize); + let suoff = refpic.get_offset(1); + let svoff = refpic.get_offset(2); + let sustride = refpic.get_stride(1); + let svstride = refpic.get_stride(2); + let cbw = w / 2; + let cbh = h / 2; + let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { + edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); + edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); + ([&ebuf, &ebuf[9..]], [18, 18]) + } else { + ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], + &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], + [sustride, svstride]) + }; + for chroma in 1..3 { + let off = frm.offset[chroma] + xpos / 2 + (ypos / 2) * frm.stride[chroma]; + chroma_interp(&mut frm.data[off..], frm.stride[chroma], csrc[chroma - 1], cstride[chroma - 1], dx, dy, cbw, cbh); + } + } + + pub fn mc_blocks(&mut self, ydst: &mut [u8], udst: &mut [u8], vdst: &mut [u8], refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; + + let pre = if mode != 0 { 2 } else { 0 }; + let post = if mode != 0 { 3 } else { 0 }; + let (width, height) = refpic.get_dimensions(0); + let sx = (xpos as isize) + ((mv.x >> 2) as isize); + let sy = (ypos as isize) + ((mv.y >> 2) as isize); + + const EBUF_STRIDE: usize = 32; + let mut ebuf = [0u8; EBUF_STRIDE * (16 + 2 + 3)]; + + if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) || + (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) { + let edge = (pre + post) as usize; + edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge, + &mut ebuf, EBUF_STRIDE, 0, 0); + (H264_LUMA_INTERP[mode])(ydst, 16, &ebuf, EBUF_STRIDE, w, h); + } else { + let sstride = refpic.get_stride(0); + let soff = refpic.get_offset(0); + let sdta = refpic.get_data(); + let sbuf: &[u8] = sdta.as_slice(); + let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; + (H264_LUMA_INTERP[mode])(ydst, 16, &sbuf[saddr..], sstride, w, h); + } + + let (cw, ch) = refpic.get_dimensions(1); + let mvx = mv.x >> 3; + let mvy = mv.y >> 3; + let dx = (mv.x & 7) as u16; + let dy = (mv.y & 7) as u16; + let src_x = ((xpos >> 1) as isize) + (mvx as isize); + let src_y = ((ypos >> 1) as isize) + (mvy as isize); + let suoff = refpic.get_offset(1); + let svoff = refpic.get_offset(2); + let sustride = refpic.get_stride(1); + let svstride = refpic.get_stride(2); + let src = refpic.get_data(); + let cbw = w / 2; + let cbh = h / 2; + let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { + edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); + edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); + ([&ebuf, &ebuf[9..]], [18, 18]) + } else { + ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], + &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], + [sustride, svstride]) + }; + chroma_interp(udst, 16, csrc[0], cstride[0], dx, dy, cbw, cbh); + chroma_interp(vdst, 16, csrc[1], cstride[1], dx, dy, cbw, cbh); + } + + pub fn avg(&mut self, dst: &mut [u8], dstride: usize, bw: usize, bh: usize, comp: usize) { + let afrm = NASimpleVideoFrame::from_video_buf(&mut self.avg_buf).unwrap(); + let src = &afrm.data[afrm.offset[comp]..]; + let sstride = afrm.stride[comp]; + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { + for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { + *dst = ((u16::from(*dst) + u16::from(*src) + 1) >> 1) as u8; + } + } + } + + pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let mut abuf = self.avg_buf.clone(); + let mut afrm = NASimpleVideoFrame::from_video_buf(&mut abuf).unwrap(); + let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 }; + self.do_mc(&mut afrm, refpic, 0, 0, w, h, amv); + for comp in 0..3 { + let shift = if comp == 0 { 0 } else { 1 }; + self.avg(&mut frm.data[frm.offset[comp] + (xpos >> shift) + (ypos >> shift) * frm.stride[comp]..], frm.stride[comp], w >> shift, h >> shift, comp); + } + } + + pub fn put_block_weighted(&mut self, dst: &mut [u8], stride: usize, src: &[u8], w: usize, h: usize, wparams: [i8; 3]) { + let weight = i16::from(wparams[0]); + let offset = i16::from(wparams[1]); + let wshift = wparams[2] as u8; + let bias = (1 << wshift) >> 1; + + for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks(16)).take(h) { + for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) { + *dst = clip_u8(((i16::from(src) * weight + bias) >> wshift) + offset); + } + } + } + + pub fn put_block_weighted2(&mut self, dst: &mut [u8], stride: usize, src0: &[u8], src1: &[u8], w: usize, h: usize, wparams: [i8; 5]) { + let weight0 = i16::from(wparams[0]); + let offset0 = i16::from(wparams[1]); + let weight1 = i16::from(wparams[2]); + let offset1 = i16::from(wparams[3]); + let wshift = (wparams[4] as u8) + 1; + let offset = (offset0 + offset1 + 1) >> 1; + let bias = (1 << wshift) >> 1; + + for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks(16).zip(src1.chunks(16))).take(h) { + for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) { + *dst = clip_u8(((i16::from(src0) * weight0 + i16::from(src1) * weight1 + bias) >> wshift) + offset); + } + } + } + + pub fn gray_block(&mut self, frm: &mut NASimpleVideoFrame, x: usize, y: usize, w: usize, h: usize) { + let yoff = frm.offset[0] + x + y * frm.stride[0]; + let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1], + frm.offset[2] + x / 2 + y / 2 * frm.stride[2]]; + for row in frm.data[yoff..].chunks_mut(frm.stride[0]).take(h) { + for el in row[..w].iter_mut() { + *el = 128; + } + } + for chroma in 0..2 { + for row in frm.data[coff[chroma]..].chunks_mut(frm.stride[chroma + 1]).take(h / 2) { + for el in row[..w / 2].iter_mut() { + *el = 128; + } + } + } + } +} diff --git a/nihav-itu/src/codecs/h264/dsp/release.rs b/nihav-itu/src/codecs/h264/dsp/mc/release.rs similarity index 100% rename from nihav-itu/src/codecs/h264/dsp/release.rs rename to nihav-itu/src/codecs/h264/dsp/mc/release.rs diff --git a/nihav-itu/src/codecs/h264/dsp/mod.rs b/nihav-itu/src/codecs/h264/dsp/mod.rs index ef706be..2d98ddf 100644 --- a/nihav-itu/src/codecs/h264/dsp/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mod.rs @@ -1,15 +1,5 @@ -use nihav_core::frame::*; -use nihav_codec_support::codecs::blockdsp::*; -use nihav_codec_support::codecs::MV; - -#[cfg(not(debug_assertions))] -mod release; -#[cfg(not(debug_assertions))] -use release::*; -#[cfg(debug_assertions)] -mod debug; -#[cfg(debug_assertions)] -use debug::*; +mod mc; +pub use mc::H264MC; pub const CHROMA_QUANTS: [u8; 52] = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -266,15 +256,6 @@ pub fn add_coeffs8(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16; } } -pub fn avg(dst: &mut [u8], dstride: usize, - src: &[u8], sstride: usize, bw: usize, bh: usize) { - for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { - for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { - *dst = ((u16::from(*dst) + u16::from(*src) + 1) >> 1) as u8; - } - } -} - fn clip8(val: i16) -> u8 { val.max(0).min(255) as u8 } fn ipred_dc128(buf: &mut [u8], stride: usize, bsize: usize) { @@ -907,165 +888,6 @@ pub const IPRED_FUNCS16X16: [IPred8x8Func; 7] = [ ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128 ]; -fn clip_u8(val: i16) -> u8 { val.max(0).min(255) as u8 } - -pub fn do_mc(frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { - let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; - copy_block(frm, refpic.clone(), 0, xpos, ypos, mv.x >> 2, mv.y >> 2, w, h, 2, 3, mode, H264_LUMA_INTERP); - - let (cw, ch) = refpic.get_dimensions(1); - let mvx = mv.x >> 3; - let mvy = mv.y >> 3; - let dx = (mv.x & 7) as u16; - let dy = (mv.y & 7) as u16; - let mut ebuf = [0u8; 18 * 9]; - let src_x = ((xpos >> 1) as isize) + (mvx as isize); - let src_y = ((ypos >> 1) as isize) + (mvy as isize); - let suoff = refpic.get_offset(1); - let svoff = refpic.get_offset(2); - let sustride = refpic.get_stride(1); - let svstride = refpic.get_stride(2); - let src = refpic.get_data(); - let cbw = w / 2; - let cbh = h / 2; - let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); - ([&ebuf, &ebuf[9..]], [18, 18]) - } else { - ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], - &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], - [sustride, svstride]) - }; - for chroma in 1..3 { - let off = frm.offset[chroma] + xpos / 2 + (ypos / 2) * frm.stride[chroma]; - chroma_interp(&mut frm.data[off..], frm.stride[chroma], csrc[chroma - 1], cstride[chroma - 1], dx, dy, cbw, cbh); - } -} - -pub fn mc_blocks(ydst: &mut [u8], udst: &mut [u8], vdst: &mut [u8], refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { - let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; - - let pre = if mode != 0 { 2 } else { 0 }; - let post = if mode != 0 { 3 } else { 0 }; - let (width, height) = refpic.get_dimensions(0); - let sx = (xpos as isize) + ((mv.x >> 2) as isize); - let sy = (ypos as isize) + ((mv.y >> 2) as isize); - - const EBUF_STRIDE: usize = 32; - let mut ebuf = [0u8; EBUF_STRIDE * (16 + 2 + 3)]; - - if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) || - (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) { - let edge = (pre + post) as usize; - edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge, - &mut ebuf, EBUF_STRIDE, 0, 0); - (H264_LUMA_INTERP[mode])(ydst, 16, &ebuf, EBUF_STRIDE, w, h); - } else { - let sstride = refpic.get_stride(0); - let soff = refpic.get_offset(0); - let sdta = refpic.get_data(); - let sbuf: &[u8] = sdta.as_slice(); - let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; - (H264_LUMA_INTERP[mode])(ydst, 16, &sbuf[saddr..], sstride, w, h); - } - - let (cw, ch) = refpic.get_dimensions(1); - let mvx = mv.x >> 3; - let mvy = mv.y >> 3; - let dx = (mv.x & 7) as u16; - let dy = (mv.y & 7) as u16; - let src_x = ((xpos >> 1) as isize) + (mvx as isize); - let src_y = ((ypos >> 1) as isize) + (mvy as isize); - let suoff = refpic.get_offset(1); - let svoff = refpic.get_offset(2); - let sustride = refpic.get_stride(1); - let svstride = refpic.get_stride(2); - let src = refpic.get_data(); - let cbw = w / 2; - let cbh = h / 2; - let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); - ([&ebuf, &ebuf[9..]], [18, 18]) - } else { - ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], - &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], - [sustride, svstride]) - }; - chroma_interp(udst, 16, csrc[0], cstride[0], dx, dy, cbw, cbh); - chroma_interp(vdst, 16, csrc[1], cstride[1], dx, dy, cbw, cbh); -} - -pub fn gray_block(frm: &mut NASimpleVideoFrame, x: usize, y: usize, w: usize, h: usize) { - let yoff = frm.offset[0] + x + y * frm.stride[0]; - let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1], - frm.offset[2] + x / 2 + y / 2 * frm.stride[2]]; - if w == 16 && h == 16 { - IPRED_FUNCS16X16[IPRED8_DC128](&mut frm.data[yoff..], frm.stride[0], &[], &[]); - for chroma in 1..2 { - IPRED_FUNCS8X8_CHROMA[IPRED8_DC128](&mut frm.data[coff[chroma - 1]..], frm.stride[chroma], &[], &[]); - } - } else if w == 8 && h == 8 { - IPRED_FUNCS8X8_CHROMA[IPRED8_DC128](&mut frm.data[yoff..], frm.stride[0], &[], &[]); - for chroma in 1..2 { - IPRED_FUNCS4X4[IPRED4_DC128](&mut frm.data[coff[chroma - 1]..], frm.stride[chroma], &[128; 4], &[128; 4], &[128; 4]); - } - } else { - for row in frm.data[yoff..].chunks_mut(frm.stride[0]).take(h) { - for el in row[..w].iter_mut() { - *el = 128; - } - } - for chroma in 0..2 { - for row in frm.data[coff[chroma]..].chunks_mut(frm.stride[chroma + 1]).take(h / 2) { - for el in row[..w / 2].iter_mut() { - *el = 128; - } - } - } - } -} - -pub fn do_mc_avg(frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, avg_buf: &mut NAVideoBufferRef) { - let mut afrm = NASimpleVideoFrame::from_video_buf(avg_buf).unwrap(); - let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 }; - do_mc(&mut afrm, refpic, 0, 0, w, h, amv); - for comp in 0..3 { - let shift = if comp == 0 { 0 } else { 1 }; - avg(&mut frm.data[frm.offset[comp] + (xpos >> shift) + (ypos >> shift) * frm.stride[comp]..], frm.stride[comp], &afrm.data[afrm.offset[comp]..], afrm.stride[comp], w >> shift, h >> shift); - } -} - -pub fn put_block_weighted(dst: &mut [u8], stride: usize, src: &[u8], w: usize, h: usize, wparams: [i8; 3]) { - let weight = i16::from(wparams[0]); - let offset = i16::from(wparams[1]); - let wshift = wparams[2] as u8; - let bias = (1 << wshift) >> 1; - - for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks(16)).take(h) { - for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) { - *dst = clip8(((i16::from(src) * weight + bias) >> wshift) + offset); - } - } -} - -pub fn put_block_weighted2(dst: &mut [u8], stride: usize, src0: &[u8], src1: &[u8], w: usize, h: usize, wparams: [i8; 5]) { - let weight0 = i16::from(wparams[0]); - let offset0 = i16::from(wparams[1]); - let weight1 = i16::from(wparams[2]); - let offset1 = i16::from(wparams[3]); - let wshift = (wparams[4] as u8) + 1; - let offset = (offset0 + offset1 + 1) >> 1; - let bias = (1 << wshift) >> 1; - - for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks(16).zip(src1.chunks(16))).take(h) { - for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) { - *dst = clip8(((i16::from(src0) * weight0 + i16::from(src1) * weight1 + bias) >> wshift) + offset); - } - } -} - macro_rules! loop_filter { (lumaedge; $buf: expr, $off: expr, $step: expr, $alpha: expr, $beta: expr) => { let p2 = i16::from($buf[$off - $step * 3]); diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs index 4f3b7cd..97637a2 100644 --- a/nihav-itu/src/codecs/h264/mb_recon.rs +++ b/nihav-itu/src/codecs/h264/mb_recon.rs @@ -203,15 +203,15 @@ fn add_chroma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &C } } -fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option>, weight: &WeightInfo) { +fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option>, weight: &WeightInfo, mc_dsp: &mut H264MC) { if let Some(buf) = ref_pic { if !weight.is_weighted() { - do_mc(frm, buf, xpos, ypos, w, h, mv); + mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv); } else { let mut ytmp = [0; 16 * 16]; let mut utmp = [0; 16 * 16]; let mut vtmp = [0; 16 * 16]; - mc_blocks(&mut ytmp, &mut utmp, &mut vtmp, buf, xpos, ypos, w, h, mv); + mc_dsp.mc_blocks(&mut ytmp, &mut utmp, &mut vtmp, buf, xpos, ypos, w, h, mv); let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; let yw = if weight.luma_weighted { @@ -219,7 +219,7 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, } else { [1, 0, 0] }; - put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp, w, h, yw); + mc_dsp.put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp, w, h, yw); for chroma in 0..2 { let cstride = frm.stride[chroma + 1]; @@ -230,15 +230,15 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, [1, 0, 0] }; let csrc = if chroma == 0 { &utmp } else { &vtmp }; - put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw); + mc_dsp.put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw); } } } else { - gray_block(frm, xpos, ypos, w, h); + mc_dsp.gray_block(frm, xpos, ypos, w, h); } } -fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option>, weight1: &WeightInfo, avg_buf: &mut NAVideoBufferRef) { +fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) { (BMode::L0, true, _) => true, (BMode::L1, _, true) => true, @@ -249,32 +249,32 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi match mode { BMode::L0 => { if let Some(buf) = ref_pic0 { - do_mc(frm, buf, xpos, ypos, w, h, mv0); + mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv0); } else { - gray_block(frm, xpos, ypos, w, h); + mc_dsp.gray_block(frm, xpos, ypos, w, h); } }, BMode::L1 => { if let Some(buf) = ref_pic1 { - do_mc(frm, buf, xpos, ypos, w, h, mv1); + mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv1); } else { - gray_block(frm, xpos, ypos, w, h); + mc_dsp.gray_block(frm, xpos, ypos, w, h); } }, BMode::Bi => { match (ref_pic0, ref_pic1) { (Some(buf0), Some(buf1)) => { - do_mc(frm, buf0, xpos, ypos, w, h, mv0); - do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1, avg_buf); + mc_dsp.do_mc(frm, buf0, xpos, ypos, w, h, mv0); + mc_dsp.do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1); }, (Some(buf0), None) => { - do_mc(frm, buf0, xpos, ypos, w, h, mv0); + mc_dsp.do_mc(frm, buf0, xpos, ypos, w, h, mv0); }, (None, Some(buf1)) => { - do_mc(frm, buf1, xpos, ypos, w, h, mv1); + mc_dsp.do_mc(frm, buf1, xpos, ypos, w, h, mv1); }, (None, None) => { - gray_block(frm, xpos, ypos, w, h); + mc_dsp.gray_block(frm, xpos, ypos, w, h); }, }; }, @@ -289,7 +289,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi match (mode, ref_pic0, ref_pic1) { (BMode::L0, Some(buf), _) | (BMode::L1, _, Some(buf)) => { let (mv, weight) = if mode == BMode::L0 { (mv0, weight0) } else { (mv1, weight1) }; - mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf, xpos, ypos, w, h, mv); + mc_dsp.mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf, xpos, ypos, w, h, mv); let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; let yw = if weight.luma_weighted { @@ -297,7 +297,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi } else { [1, 0, 0] }; - put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp0, w, h, yw); + mc_dsp.put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp0, w, h, yw); for chroma in 0..2 { let cstride = frm.stride[chroma + 1]; @@ -308,12 +308,12 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi [1, 0, 0] }; let csrc = if chroma == 0 { &utmp0 } else { &vtmp0 }; - put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw); + mc_dsp.put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw); } }, (BMode::Bi, Some(buf0), Some(buf1)) => { // do both and avg - mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf0, xpos, ypos, w, h, mv0); - mc_blocks(&mut ytmp1, &mut utmp1, &mut vtmp1, buf1, xpos, ypos, w, h, mv1); + mc_dsp.mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf0, xpos, ypos, w, h, mv0); + mc_dsp.mc_blocks(&mut ytmp1, &mut utmp1, &mut vtmp1, buf1, xpos, ypos, w, h, mv1); let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; let yw = match (weight0.luma_weighted, weight1.luma_weighted) { @@ -322,7 +322,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8], (false, false) => [1, 0, 1, 0, 0], }; - put_block_weighted2(&mut frm.data[yoff..], frm.stride[0], &ytmp0, &ytmp1, w, h, yw); + mc_dsp.put_block_weighted2(&mut frm.data[yoff..], frm.stride[0], &ytmp0, &ytmp1, w, h, yw); for chroma in 0..2 { let cstride = frm.stride[chroma + 1]; @@ -339,11 +339,11 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi }; let csrc0 = if chroma == 0 { &utmp0 } else { &vtmp0 }; let csrc1 = if chroma == 0 { &utmp1 } else { &vtmp1 }; - put_block_weighted2(&mut frm.data[coff..], cstride, csrc0, csrc1, w / 2, h / 2, cw); + mc_dsp.put_block_weighted2(&mut frm.data[coff..], cstride, csrc0, csrc1, w / 2, h / 2, cw); } }, _ => { - gray_block(frm, xpos, ypos, w, h); + mc_dsp.gray_block(frm, xpos, ypos, w, h); }, }; } @@ -399,7 +399,7 @@ fn get_weights(slice_hdr: &SliceHeader, frame_refs: &FrameRefs, mode: BMode, wei } } -pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &FrameRefs, avg_buf: &mut NAVideoBufferRef, weight_mode: u8) { +pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &FrameRefs, mc_dsp: &mut H264MC, weight_mode: u8) { let xpos = sstate.mb_x * 16; let ypos = sstate.mb_y * 16; @@ -415,13 +415,13 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let mv = sstate.get_cur_blk4(0).mv[0]; let rpic = frame_refs.select_ref_pic(0, 0); let weight = &slice_hdr.get_weight(0, 0); - do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); }, MBType::P16x16 => { let mv = sstate.get_cur_blk4(0).mv[0]; let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); - do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); }, MBType::P16x8 | MBType::P8x16 => { let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 { @@ -432,11 +432,11 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let mv = sstate.get_cur_blk4(0).mv[0]; let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); - do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight); + do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index()); let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index()); - do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight); + do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight, mc_dsp); }, MBType::P8x8 | MBType::P8x8Ref0 => { for part in 0..4 { @@ -448,17 +448,17 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in match mb_info.sub_mb_type[part] { SubMBType::P8x8 => { - do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight); + do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp); }, SubMBType::P8x4 => { - do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight); + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; - do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight); + do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp); }, SubMBType::P4x8 => { - do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight); + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; - do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight); + do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); }, SubMBType::P4x4 => { for sb_no in 0..4 { @@ -466,7 +466,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let sypos = ypos + by + (sb_no & 2) * 2; let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; let mv = sstate.get_cur_blk4(sblk_no).mv[0]; - do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight); + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight, mc_dsp); } }, _ => unreachable!(), @@ -479,7 +479,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let mv1 = sstate.get_cur_blk4(0).mv[1]; let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index()); let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]); - do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, avg_buf); + do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); }, MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { let (pw, ph) = mb_info.mb_type.size(); @@ -493,7 +493,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let mv1 = sstate.get_cur_blk4(blk).mv[1]; let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index()); let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]); - do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, avg_buf); + do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); bx += px; by += py; } @@ -508,7 +508,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); - do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); } else { for blk4 in 0..16 { let mv = sstate.get_cur_blk4(blk4).mv; @@ -516,7 +516,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); - do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); } } sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); }); @@ -538,7 +538,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); - do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); bx += 4; if blk == 1 { bx -= 8; @@ -551,23 +551,23 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in SubMBType::B8x8(mode) => { let mv = sstate.get_cur_blk4(blk8).mv; let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); - do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); }, SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => { let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); let (pw, ph) = subtype.size(); let mv = sstate.get_cur_blk4(blk8).mv; - do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, avg_buf); + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); let mv = sstate.get_cur_blk4(addr2).mv; - do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); }, SubMBType::B4x4(mode) => { let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); for i in 0..4 { let addr2 = blk8 + (i & 1) + (i & 2) * 2; let mv = sstate.get_cur_blk4(addr2).mv; - do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, avg_buf); + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); bx += 4; if i == 1 { bx -= 8; diff --git a/nihav-itu/src/codecs/h264/mod.rs b/nihav-itu/src/codecs/h264/mod.rs index df46760..20224d3 100644 --- a/nihav-itu/src/codecs/h264/mod.rs +++ b/nihav-itu/src/codecs/h264/mod.rs @@ -187,7 +187,7 @@ struct H264Decoder { ipcm_buf: [u8; 256 + 64 + 64], - avg_buf: NAVideoBufferRef, + mc_dsp: H264MC, transform_8x8_mode: bool, } @@ -254,7 +254,7 @@ impl H264Decoder { ipcm_buf: [0; 256 + 64 + 64], - avg_buf, + mc_dsp: H264MC::new(avg_buf), transform_8x8_mode: false, } @@ -593,7 +593,7 @@ println!("PAFF?"); } else { 0 }; - recon_mb(&mut frm, slice_hdr, &mb_info, &mut self.sstate, &self.frame_refs, &mut self.avg_buf, weight_mode); + recon_mb(&mut frm, slice_hdr, &mb_info, &mut self.sstate, &self.frame_refs, &mut self.mc_dsp, weight_mode); } else { for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { dline[..16].copy_from_slice(src); -- 2.30.2