From 5f223cdb5a7834fe58bf05d4dd0da36325f4f11c Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Tue, 8 Aug 2023 14:39:17 +0200 Subject: [PATCH] h264: prepare data references before decoding This speeds up decoding by eliminating the need for refcounted accesses. --- nihav-itu/src/codecs/h264/decoder_mt.rs | 14 +-- nihav-itu/src/codecs/h264/decoder_st.rs | 25 +++-- nihav-itu/src/codecs/h264/dsp/mc/mod.rs | 78 +++++++++----- nihav-itu/src/codecs/h264/mb_recon.rs | 38 +++---- nihav-itu/src/codecs/h264/pic_ref.rs | 134 ++++++++++++++++++++++++ nihav-itu/src/codecs/h264/types.rs | 34 ++++-- 6 files changed, 256 insertions(+), 67 deletions(-) diff --git a/nihav-itu/src/codecs/h264/decoder_mt.rs b/nihav-itu/src/codecs/h264/decoder_mt.rs index fac66c5..1824042 100644 --- a/nihav-itu/src/codecs/h264/decoder_mt.rs +++ b/nihav-itu/src/codecs/h264/decoder_mt.rs @@ -36,18 +36,20 @@ impl FrameDecoder { } validate!(full_size > 0); + let sslice_refs = SimplifiedSliceRefs::new(refs); + let mut br = BitReader::new(&nal[hdr_size / 8..], BitReaderMode::BE); if !self.pps.entropy_coding_mode { br.skip((hdr_size & 7) as u32)?; - self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, refs) + self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, &sslice_refs) } else { let csrc = &nal[(hdr_size + 7) / 8..]; validate!(csrc.len() >= 2); let mut cabac = CABAC::new(csrc, hdr.slice_type, hdr.slice_qp, hdr.cabac_init_idc as usize)?; - self.decode_slice_cabac(&mut cabac, hdr, refs) + self.decode_slice_cabac(&mut cabac, hdr, &sslice_refs) } } - fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SliceRefs) -> DecoderResult { + fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs) -> DecoderResult { const INTRA_CBP: [u8; 48] = [ 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, @@ -152,7 +154,7 @@ impl FrameDecoder { } Ok(mb_idx) } - fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SliceRefs) -> DecoderResult { + fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs) -> DecoderResult { let mut mb_idx = slice_hdr.first_mb_in_slice; let mut prev_mb_skipped = false; let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; @@ -264,7 +266,7 @@ impl FrameDecoder { Err(DecoderError::InvalidData) } #[allow(clippy::cognitive_complexity)] - fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SliceRefs) -> DecoderResult<()> { + fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SimplifiedSliceRefs) -> DecoderResult<()> { let qp_y = mb_info.qp_y; let qpr = ((qp_y as i8) + self.pps.chroma_qp_index_offset).max(0).min(51) as usize; let qp_u = CHROMA_QUANTS[qpr]; @@ -394,7 +396,7 @@ impl FrameDecoder { Ok(()) } - fn pred_mv(sstate: &mut SliceState, frame_refs: &SliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { + fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { let mb_type = mb_info.mb_type; if !mb_type.is_4x4() { let (pw, ph) = mb_type.size(); diff --git a/nihav-itu/src/codecs/h264/decoder_st.rs b/nihav-itu/src/codecs/h264/decoder_st.rs index 7e8b83a..b9b7308 100644 --- a/nihav-itu/src/codecs/h264/decoder_st.rs +++ b/nihav-itu/src/codecs/h264/decoder_st.rs @@ -277,7 +277,7 @@ println!("PAFF?"); Ok(()) } - fn pred_mv(sstate: &mut SliceState, frame_refs: &SliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { + fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { let mb_type = mb_info.mb_type; if !mb_type.is_4x4() { let (pw, ph) = mb_type.size(); @@ -337,7 +337,7 @@ println!("PAFF?"); } } #[allow(clippy::cognitive_complexity)] - fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo) { + fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, slice_refs: &SimplifiedSliceRefs) { let pps = &self.pps[self.cur_pps]; let qp_y = mb_info.qp_y; @@ -408,7 +408,7 @@ println!("PAFF?"); self.sstate.reset_mb_mv(); } if !mb_info.mb_type.is_intra() { - Self::pred_mv(&mut self.sstate, &self.frame_refs.cur_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference); + Self::pred_mv(&mut self.sstate, slice_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference); } if !pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { self.sstate.fill_ipred(IntraPredMode::DC); @@ -426,7 +426,7 @@ println!("PAFF?"); } else { 0 }; - recon_mb(&mut frm, slice_hdr, mb_info, &mut self.sstate, &self.frame_refs.cur_refs, &mut self.mc_dsp, weight_mode); + recon_mb(&mut frm, slice_hdr, mb_info, &mut self.sstate, slice_refs, &mut self.mc_dsp, weight_mode); } else { for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { dline[..16].copy_from_slice(src); @@ -460,13 +460,13 @@ _ => {}, mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv; } for blk8 in 0..4 { - mb.ref_poc[blk8] = self.frame_refs.cur_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx); + mb.ref_poc[blk8] = slice_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx); mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx; } mv_info.mbs[mb_pos] = mb; } if !self.deblock_skip && self.deblock_mode != 1 { - self.sstate.fill_deblock(&self.frame_refs.cur_refs, self.deblock_mode, self.is_s); + self.sstate.fill_deblock(slice_refs, self.deblock_mode, self.is_s); if let Some(ref mut pic) = self.cur_pic { let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap(); loop_filter_mb(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta); @@ -489,6 +489,10 @@ _ => {}, let mut mb_idx = slice_hdr.first_mb_in_slice; let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; + + let slice_refs = self.frame_refs.cur_refs.clone(); + let sslice_refs = SimplifiedSliceRefs::new(&slice_refs); + while br.tell() < full_size && mb_idx < self.num_mbs { mb_info.coded = [false; 25]; mb_info.ref_l0 = [ZERO_REF; 4]; @@ -504,7 +508,7 @@ _ => {}, validate!(mb_idx + mb_skip_run <= self.num_mbs); mb_info.mb_type = skip_type; for _ in 0..mb_skip_run { - self.handle_macroblock(slice_hdr, &mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs); mb_idx += 1; } if mb_idx == self.num_mbs || br.tell() >= full_size { @@ -570,7 +574,7 @@ _ => {}, decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?; } } - self.handle_macroblock(slice_hdr, &mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs); } mb_idx += 1; } @@ -587,6 +591,9 @@ _ => {}, let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + let slice_refs = self.frame_refs.cur_refs.clone(); + let sslice_refs = SimplifiedSliceRefs::new(&slice_refs); + while mb_idx < self.num_mbs { mb_info.coded = [false; 25]; mb_info.ref_l0 = [ZERO_REF; 4]; @@ -675,7 +682,7 @@ _ => {}, mb_info.transform_size_8x8 = false; last_qp_diff = false; } - self.handle_macroblock(slice_hdr, &mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs); prev_mb_skipped = mb_skip; if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() { if let Some(ref mut pic) = self.cur_pic { diff --git a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs index f558441..5845d92 100644 --- a/nihav-itu/src/codecs/h264/dsp/mc/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mc/mod.rs @@ -1,6 +1,6 @@ use nihav_core::frame::*; use nihav_codec_support::codecs::MV; -use nihav_codec_support::codecs::blockdsp::*; +use super::super::SimpleFrame; macro_rules! module_selector { ($( ($cond:meta, $module:ident) ),*) => { @@ -81,7 +81,7 @@ impl H264MC { self.width = width; self.height = height; } - pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { let mut ebuf = [0u8; 22 * 22]; let mvx = mv.x >> 2; let mvy = mv.y >> 2; @@ -89,16 +89,16 @@ impl H264MC { let pre = if mode != 0 { 2isize } else { 0 }; let post = if mode != 0 { 3isize } else { 0 }; let (yw, yh) = (self.width, self.height); - let src = refpic.get_data(); - let systride = refpic.get_stride(0); + let src = refpic.data; + let systride = refpic.stride[0]; let src_x = (xpos as isize) + (mvx as isize); let src_y = (ypos as isize) + (mvy as isize); let (ysrc, ystride) = if (src_x - pre < 0) || (src_x + (w as isize) + post > (yw as isize)) || (src_y - pre < 0) || (src_y + (h as isize) + post > (yh as isize)) { let add = (pre + post) as usize; - edge_emu(&refpic, src_x - pre, src_y - pre, w + add, h + add, &mut ebuf, 22, 0, 0); + edge_emu_sf(refpic, src_x - pre, src_y - pre, yw, yh, w + add, h + add, &mut ebuf, 22, 0); (&ebuf[..], 22) } else { - (&src[refpic.get_offset(0) + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride) + (&src[refpic.offset[0] + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride) }; let wmode = match w { 4 => 0, @@ -114,15 +114,17 @@ impl H264MC { let dy = (mv.y & 7) as u16; let src_x = ((xpos >> 1) as isize) + (mvx as isize); let src_y = ((ypos >> 1) as isize) + (mvy as isize); - let suoff = refpic.get_offset(1); - let svoff = refpic.get_offset(2); - let sustride = refpic.get_stride(1); - let svstride = refpic.get_stride(2); + let suoff = refpic.offset[1]; + let svoff = refpic.offset[2]; + let sustride = refpic.stride[1]; + let svstride = refpic.stride[2]; let cbw = w / 2; let cbh = h / 2; let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); + let aw = (cw + 7) & !7; + let ah = (ch + 7) & !7; + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1); + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2); ([&ebuf, &ebuf[9..]], [18, 18]) } else { ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], @@ -135,7 +137,7 @@ impl H264MC { } } - pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; let pre = if mode != 0 { 2 } else { 0 }; @@ -155,14 +157,13 @@ impl H264MC { if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) || (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) { let edge = (pre + post) as usize; - edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge, - &mut ebuf, EBUF_STRIDE, 0, 0); + edge_emu_sf(refpic, sx - pre, sy - pre, width, height, w + edge, h + edge, + &mut ebuf, EBUF_STRIDE, 0); (H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &ebuf, EBUF_STRIDE, h); } else { - let sstride = refpic.get_stride(0); - let soff = refpic.get_offset(0); - let sdta = refpic.get_data(); - let sbuf: &[u8] = sdta.as_slice(); + let sstride = refpic.stride[0]; + let soff = refpic.offset[0]; + let sbuf = refpic.data; let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; (H264_LUMA_INTERP[wmode][mode])(&mut dst.y, 16, &sbuf[saddr..], sstride, h); } @@ -174,16 +175,18 @@ impl H264MC { let dy = (mv.y & 7) as u16; let src_x = ((xpos >> 1) as isize) + (mvx as isize); let src_y = ((ypos >> 1) as isize) + (mvy as isize); - let suoff = refpic.get_offset(1); - let svoff = refpic.get_offset(2); - let sustride = refpic.get_stride(1); - let svstride = refpic.get_stride(2); - let src = refpic.get_data(); + let suoff = refpic.offset[1]; + let svoff = refpic.offset[2]; + let sustride = refpic.stride[1]; + let svstride = refpic.stride[2]; + let src = refpic.data; let cbw = w / 2; let cbh = h / 2; let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); - edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); + let aw = (cw + 7) & !7; + let ah = (ch + 7) & !7; + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1); + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2); ([&ebuf, &ebuf[9..]], [18, 18]) } else { ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], @@ -194,7 +197,7 @@ impl H264MC { (self.chroma_interp[wmode])(&mut dst.v, 16, csrc[1], cstride[1], dx, dy, cbh); } - pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { let mut abuf = self.avg_buf.clone(); let mut afrm = NASimpleVideoFrame::from_video_buf(&mut abuf).unwrap(); let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 }; @@ -233,6 +236,27 @@ impl H264MC { } } +fn edge_emu_sf(src: &SimpleFrame, xpos: isize, ypos: isize, w: usize, h: usize, bw: usize, bh: usize, dst: &mut [u8], dstride: usize, comp: usize) { + let stride = src.stride[comp]; + let offs = src.offset[comp]; + let framebuf = src.data; + + for y in 0..bh { + let srcy; + if (y as isize) + ypos < 0 { srcy = 0; } + else if (y as isize) + ypos >= (h as isize) { srcy = h - 1; } + else { srcy = ((y as isize) + ypos) as usize; } + + for x in 0..bw { + let srcx; + if (x as isize) + xpos < 0 { srcx = 0; } + else if (x as isize) + xpos >= (w as isize) { srcx = w - 1; } + else { srcx = ((x as isize) + xpos) as usize; } + dst[x + y * dstride] = framebuf[offs + srcx + srcy * stride]; + } + } +} + fn avg(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs index 5a204f3..d8e51f3 100644 --- a/nihav-itu/src/codecs/h264/mb_recon.rs +++ b/nihav-itu/src/codecs/h264/mb_recon.rs @@ -4,7 +4,7 @@ use nihav_codec_support::codecs::{MV, ZERO_MV}; use super::{CurrentMBInfo, I4X4_SCAN, Shareable}; use super::dispatch::{ThreadDispatcher, FrameDecodingStatus}; use super::dsp::*; -use super::pic_ref::SliceRefs; +use super::pic_ref::SimplifiedSliceRefs; use super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO}; use super::types::*; @@ -205,7 +205,7 @@ fn add_chroma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &C } } -fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option>, weight: &WeightInfo, mc_dsp: &mut H264MC) { +fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<&SimpleFrame>, weight: &WeightInfo, mc_dsp: &mut H264MC) { if let Some(buf) = ref_pic { if !weight.is_weighted() { mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv); @@ -245,7 +245,7 @@ fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, } #[allow(clippy::match_like_matches_macro)] -fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { +fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) { (BMode::L0, true, _) => true, (BMode::L1, _, true) => true, @@ -364,7 +364,7 @@ fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usi } } -fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, mv: &[MV; 2], ref_pic0: Option>, weight0: &WeightInfo, ref_pic1: Option>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { +fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, mv: &[MV; 2], ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { if !weight0.is_weighted() || !weight1.is_weighted() { match (ref_pic0, ref_pic1) { (Some(buf0), Some(buf1)) => { @@ -423,7 +423,7 @@ fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, mv: } } -fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SliceRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) { +fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SimplifiedSliceRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) { let idx_l0 = ref_l0.index(); let idx_l1 = ref_l1.index(); if mode != BMode::Bi || weight_mode != 2 { @@ -432,7 +432,7 @@ fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SliceRefs, mode: BMode, wei let r0_poc = pic0.full_id as u16; let r1_poc = pic1.full_id as u16; let cur_id = frame_refs.cur_id as u16; - if (r0_poc == r1_poc) || pic0.long_term.is_some() || pic1.long_term.is_some() { + if (r0_poc == r1_poc) || pic0.long_term || pic1.long_term { return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO); } @@ -473,7 +473,7 @@ fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SliceRefs, mode: BMode, wei } } -pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SliceRefs, mc_dsp: &mut H264MC, weight_mode: u8) { +pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8) { let xpos = sstate.mb_x * 16; let ypos = sstate.mb_y * 16; @@ -525,12 +525,12 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp); }, SubMBType::P8x4 => { - do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight, mc_dsp); + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp); }, SubMBType::P4x8 => { - do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight, mc_dsp); + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); }, @@ -540,7 +540,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let sypos = ypos + by + (sb_no & 2) * 2; let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; let mv = sstate.get_cur_blk4(sblk_no).mv[0]; - do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight, mc_dsp); + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp); } }, _ => unreachable!(), @@ -631,7 +631,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); let (pw, ph) = subtype.size(); let mv = sstate.get_cur_blk4(blk8).mv; - do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); let mv = sstate.get_cur_blk4(addr2).mv; do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); @@ -641,7 +641,7 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in for i in 0..4 { let addr2 = blk8 + (i & 1) + (i & 2) * 2; let mv = sstate.get_cur_blk4(addr2).mv; - do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); bx += 4; if i == 1 { bx -= 8; @@ -680,7 +680,7 @@ pub fn wait_for_mb(disp: &Shareable, sstate: &SliceState, xpos } } -fn wait_b_mc(disp: &Shareable, sstate: &SliceState, frame_refs: &SliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> { +fn wait_b_mc(disp: &Shareable, sstate: &SliceState, frame_refs: &SimplifiedSliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> { if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) { wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[0], ref_id)?; } @@ -690,7 +690,7 @@ fn wait_b_mc(disp: &Shareable, sstate: &SliceState, frame_refs Ok(()) } -pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable) -> DecoderResult<()> { +pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable) -> DecoderResult<()> { let xpos = sstate.mb_x * 16; let ypos = sstate.mb_y * 16; @@ -760,7 +760,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 4, mv, ref_id)?; } - do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight, mc_dsp); + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; @@ -771,7 +771,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { wait_for_mb(disp, sstate, xpos + bx + 4, ypos + by + 8, mv, ref_id)?; } - do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight, mc_dsp); + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; @@ -787,7 +787,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { wait_for_mb(disp, sstate, sxpos + 4, sypos + 4, mv, ref_id)?; } - do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight, mc_dsp); + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp); } }, _ => unreachable!(), @@ -900,7 +900,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb let (pw, ph) = subtype.size(); let mv = sstate.get_cur_blk4(blk8).mv; wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, pw, ph)?; - do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); let mv = sstate.get_cur_blk4(addr2).mv; wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph)?; @@ -912,7 +912,7 @@ pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb let addr2 = blk8 + (i & 1) + (i & 2) * 2; let mv = sstate.get_cur_blk4(addr2).mv; wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 4, 4)?; - do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); bx += 4; if i == 1 { bx -= 8; diff --git a/nihav-itu/src/codecs/h264/pic_ref.rs b/nihav-itu/src/codecs/h264/pic_ref.rs index a24e9d0..b027d06 100644 --- a/nihav-itu/src/codecs/h264/pic_ref.rs +++ b/nihav-itu/src/codecs/h264/pic_ref.rs @@ -56,6 +56,7 @@ pub struct SliceRefs { pub cur_id: u32, } +#[allow(dead_code)] impl SliceRefs { pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option { let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; @@ -150,6 +151,139 @@ impl SliceRefs { } } +#[derive(Clone)] +pub struct SimplePictureInfo<'a> { + pub full_id: u32, + pub buf: SimpleFrame<'a>, + pub long_term: bool, + pub mv_info: &'a FrameMV, +} + +#[derive(Clone)] +pub struct SimplifiedSliceRefs<'a> { + pub ref_list0: Vec>>, + pub ref_list1: Vec>>, + pub cur_id: u32, +} + +impl<'a> SimplifiedSliceRefs<'a> { + pub fn new(srefs: &'a SliceRefs) -> Self { + let mut ref_list0 = Vec::with_capacity(srefs.ref_list0.len()); + let mut ref_list1 = Vec::with_capacity(srefs.ref_list1.len()); + for entry in srefs.ref_list0.iter() { + ref_list0.push(entry.as_ref().map(|pic| SimplePictureInfo { + full_id: pic.full_id, + buf: SimpleFrame::new(&pic.buf), + long_term: pic.long_term.is_some(), + mv_info: &pic.mv_info, + })); + } + for entry in srefs.ref_list1.iter() { + ref_list1.push(entry.as_ref().map(|pic| SimplePictureInfo { + full_id: pic.full_id, + buf: SimpleFrame::new(&pic.buf), + long_term: pic.long_term.is_some(), + mv_info: &pic.mv_info, + })); + } + Self { + cur_id: srefs.cur_id, + ref_list0, ref_list1 + } + } + pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| pic.full_id) + } else { + None + } + } + pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option<&SimpleFrame> { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| &pic.buf) + } else { + None + } + } + pub fn get_colocated_info(&self, mb_x: usize, mb_y: usize) -> (FrameMBInfo, u16, bool) { + if let Some(ref ref_pic) = &self.ref_list1[0] { + let mv_info = ref_pic.mv_info; + let mb = mv_info.mbs[mb_x + mb_y * mv_info.mb_stride]; + (mb, ref_pic.full_id as u16, ref_pic.long_term) + } else { + (FrameMBInfo::default(), 0, false) + } + } + pub fn map_ref0(&self, ref0_id: u16) -> (PicRef, bool) { + let mut r0_idx = 0; + let mut long = false; + for (i, rpic0) in self.ref_list0.iter().enumerate() { + if let Some(ref pic) = rpic0 { + if (pic.full_id as u16) == ref0_id { + r0_idx = i as u8; + long = pic.long_term; + break; + } + } + } + (PicRef::new(r0_idx), long) + } + pub fn map_refs(&self, ref_idx: [PicRef; 2]) -> [u16; 2] { + let r0 = ref_idx[0].index(); + let r1 = ref_idx[1].index(); + let ref0 = if r0 < self.ref_list0.len() { + if let Some(ref pic) = self.ref_list0[r0] { + pic.full_id as u16 + } else { + MISSING_POC + } + } else { + MISSING_POC + }; + let ref1 = if r1 < self.ref_list1.len() { + if let Some(ref pic) = self.ref_list1[r1] { + pic.full_id as u16 + } else { + MISSING_POC + } + } else { + MISSING_POC + }; + [ref0, ref1] + } + pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool { + if ref1 != ref2 { + self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1) + } else { + true + } + } + fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool { + if ref1 == ref2 { + true + } else { + let idx0 = ref1.index(); + let idx1 = ref2.index(); + if idx0 == idx1 { + return true; + } + let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if idx0 >= src.len() || idx1 >= src.len() { +//panic!("wrong refs"); + return false; + } + if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) { + pic0.full_id == pic1.full_id + } else { +//panic!("missing pics"); + false + } + } + } +} + pub struct FrameRefs { pub ref_pics: Vec, pub cur_refs: SliceRefs, diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs index 4cc1fca..1310daa 100644 --- a/nihav-itu/src/codecs/h264/types.rs +++ b/nihav-itu/src/codecs/h264/types.rs @@ -1,9 +1,31 @@ -use nihav_core::frame::NASimpleVideoFrame; +use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame}; use nihav_codec_support::codecs::{MV, ZERO_MV}; use nihav_codec_support::data::GenericCache; -use super::SliceRefs; +use super::SimplifiedSliceRefs; use super::pic_ref::FrameMBInfo; +#[derive(Clone,Copy)] +pub struct SimpleFrame<'a> { + pub data: &'a [u8], + pub offset: [usize; 3], + pub stride: [usize; 3], +} + +impl<'a> SimpleFrame<'a> { + pub fn new(buf: &'a NAVideoBuffer) -> Self { + let mut offset = [0; 3]; + let mut stride = [0; 3]; + for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() { + *offs = buf.get_offset(plane); + *strd = buf.get_stride(plane); + } + Self { + data: buf.get_data(), + offset, stride + } + } +} + #[repr(u8)] #[derive(Clone,Copy,Debug,PartialEq)] pub enum BMode { @@ -478,7 +500,7 @@ impl SliceState { } } } - pub fn fill_deblock(&mut self, frefs: &SliceRefs, deblock_mode: u8, is_s: bool) { + pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) { if deblock_mode == 1 { return; } @@ -773,7 +795,7 @@ impl SliceState { self.fill_mv (0, 0, 16, 16, 0, mv); self.fill_ref(0, 0, 16, 16, 0, ref_idx); } - pub fn predict_direct_mb(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) { + pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) { let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); if direct_8x8 { for blk4 in 0..16 { @@ -793,7 +815,7 @@ impl SliceState { } } } - pub fn predict_direct_sub(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) { + pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) { let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] }; let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk); @@ -801,7 +823,7 @@ impl SliceState { self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; } #[allow(clippy::nonminimal_bool)] - pub fn get_direct_mv(&self, frame_refs: &SliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { + pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { let blk8 = blk4_to_blk8(blk4); let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] { (ZERO_MV, MISSING_POC, MISSING_REF) -- 2.39.5