From: Kostya Shishkov Date: Wed, 14 Jun 2023 16:51:44 +0000 (+0200) Subject: h264: add multi-threaded decoder X-Git-Url: https://git.nihav.org/?a=commitdiff_plain;h=11d7aef2ac58fe8d3341ffdcec7eaf9ee4e266f1;p=nihav.git h264: add multi-threaded decoder --- diff --git a/nihav-itu/src/codecs/h264/decoder_st.rs b/nihav-itu/src/codecs/h264/decoder_mt.rs similarity index 66% copy from nihav-itu/src/codecs/h264/decoder_st.rs copy to nihav-itu/src/codecs/h264/decoder_mt.rs index 63815f8..69a335b 100644 --- a/nihav-itu/src/codecs/h264/decoder_st.rs +++ b/nihav-itu/src/codecs/h264/decoder_mt.rs @@ -1,473 +1,55 @@ -use std::sync::Arc; +/* TODO: + * buffer pool for DSP avg frames +*/ use nihav_core::codecs::*; use nihav_core::io::bitreader::*; use super::*; - -struct H264Decoder { - info: NACodecInfoRef, - width: usize, - height: usize, - num_mbs: usize, - nal_len: u8, - sps: Vec>, - cur_sps: usize, - pps: Vec>, - cur_pps: usize, - - skip_mode: FrameSkipMode, - deblock_skip: bool, - - is_mbaff: bool, - - cavlc_cb: CAVLCTables, - - sstate: SliceState, - - cur_pic: Option, - cur_id: u16, - has_pic: bool, - frame_refs: FrameRefs, - - temporal_mv: bool, - deblock_mode: u8, - lf_alpha: i8, - lf_beta: i8, - is_s: bool, - - ipcm_buf: [u8; 256 + 64 + 64], - - mc_dsp: H264MC, - - transform_8x8_mode: bool, +use super::dispatch::*; + +pub struct FrameDecoder { + pub slices: Vec<(SliceHeader, usize, SliceRefs, Vec)>, + pub cur_pic: PictureInfo, + sps: Arc, + pps: Arc, + pub num_mbs: usize, + mc_dsp: H264MC, + dispatch: Shareable, + sstate: SliceState, + cavlc_cb: Arc, + ipcm_buf: [u8; 256 + 64 + 64], + is_mbaff: bool, + deblock_skip: bool, } - -impl H264Decoder { - fn new() -> Self { - let avg_vi = NAVideoInfo { width: 32, height: 32, flipped: false, format: YUV420_FORMAT, bits: 12 }; - let avg_buf = alloc_video_buffer(avg_vi, 4).unwrap().get_vbuf().unwrap(); - H264Decoder{ - info: NACodecInfoRef::default(), - width: 0, - height: 0, - num_mbs: 0, - nal_len: 0, - sps: Vec::with_capacity(1), - cur_sps: 0, - pps: Vec::with_capacity(3), - cur_pps: 0, - - skip_mode: FrameSkipMode::default(), - deblock_skip: false, - - is_mbaff: false, - - cavlc_cb: CAVLCTables::new(), - - sstate: SliceState::new(), - cur_pic: None, - cur_id: 0, - has_pic: false, - frame_refs: FrameRefs::new(), - - temporal_mv: false, - deblock_mode: 0, - lf_alpha: 0, - lf_beta: 0, - is_s: false, - - ipcm_buf: [0; 256 + 64 + 64], - - mc_dsp: H264MC::new(avg_buf), - - transform_8x8_mode: false, - } - } - fn handle_nal(&mut self, src: &[u8], supp: &mut NADecoderSupport, skip_decoding: bool) -> DecoderResult<()> { - validate!(!src.is_empty()); - validate!((src[0] & 0x80) == 0); - let nal_ref_idc = src[0] >> 5; - let nal_unit_type = src[0] & 0x1F; - - let mut full_size = src.len() * 8; - for &byte in src.iter().rev() { - if byte == 0 { - full_size -= 8; - } else { - full_size -= (byte.trailing_zeros() + 1) as usize; - break; - } - } - validate!(full_size > 0); - match nal_unit_type { - 1 | 5 if !skip_decoding => { - let is_idr = nal_unit_type == 5; - let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); - br.skip(8)?; - - let slice_hdr = parse_slice_header(&mut br, self.sps.as_slice(), self.pps.as_slice(), is_idr, nal_ref_idc)?; - validate!(br.tell() < full_size); - let full_id; - if slice_hdr.first_mb_in_slice == 0 { - validate!(self.cur_pic.is_none()); - for (i, pps) in self.pps.iter().enumerate() { - if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { - self.cur_pps = i; - break; - } - } - for (i, sps) in self.sps.iter().enumerate() { - if sps.seq_parameter_set_id == self.pps[self.cur_pps].seq_parameter_set_id { - self.cur_sps = i; - break; - } - } - - full_id = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, &self.sps[self.cur_sps]); - - let sps = &self.sps[self.cur_sps]; - if sps.chroma_format_idc != 1 || sps.bit_depth_luma != 8 || sps.bit_depth_chroma != 8 { -println!(" chroma fmt {} bits {}/{}", sps.chroma_format_idc, sps.bit_depth_luma, sps.bit_depth_chroma); - return Err(DecoderError::NotImplemented); - } - //let pps = &self.pps[self.cur_pps]; - - if is_idr { - self.frame_refs.clear_refs(); - } - - self.width = sps.pic_width_in_mbs << 4; - self.height = sps.pic_height_in_mbs << 4; - self.num_mbs = sps.pic_width_in_mbs * sps.pic_height_in_mbs; - self.mc_dsp.set_dimensions(self.width, self.height); - - self.is_mbaff = sps.mb_adaptive_frame_field && !slice_hdr.field_pic; - if self.is_mbaff { -println!("MBAFF"); - return Err(DecoderError::NotImplemented); - } - if !sps.frame_mbs_only { -println!("PAFF?"); - return Err(DecoderError::NotImplemented); - } - -//if slice_hdr.slice_type.is_b() { return Ok(()); } - self.cur_id = full_id as u16; - } else { - if let Some(ref mut pic) = self.cur_pic { - validate!(pic.cur_mb == slice_hdr.first_mb_in_slice); - let new_type = slice_hdr.slice_type.to_frame_type(); - pic.pic_type = match (pic.pic_type, new_type) { - (FrameType::I, _) => new_type, - (_, FrameType::B) => FrameType::B, - _ => pic.pic_type, - }; - full_id = pic.full_id; - } else { - return Ok(());//Err(DecoderError::InvalidData); - } - validate!(self.cur_pps < self.pps.len() && self.pps[self.cur_pps].pic_parameter_set_id == slice_hdr.pic_parameter_set_id); - } - - let sps = &self.sps[self.cur_sps]; - let pps = &self.pps[self.cur_pps]; - - self.temporal_mv = !slice_hdr.direct_spatial_mv_pred; - self.is_s = slice_hdr.slice_type == SliceType::SI || slice_hdr.slice_type == SliceType::SP; - self.deblock_mode = slice_hdr.disable_deblocking_filter_idc; - self.lf_alpha = slice_hdr.slice_alpha_c0_offset; - self.lf_beta = slice_hdr.slice_beta_offset; - - self.frame_refs.select_refs(sps, &slice_hdr, full_id); - - if slice_hdr.adaptive_ref_pic_marking_mode { - self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << self.sps[self.cur_sps].log2_max_frame_num)?; - } - if slice_hdr.first_mb_in_slice == 0 { - let ret = supp.pool_u8.get_free(); - if ret.is_none() { - return Err(DecoderError::AllocError); - } - let tmp_vinfo = NAVideoInfo::new(self.width, self.height, false, YUV420_FORMAT); - let mut buf = ret.unwrap(); - if buf.get_info() != tmp_vinfo { - supp.pool_u8.reset(); - supp.pool_u8.prealloc_video(tmp_vinfo, 4)?; - let ret = supp.pool_u8.get_free(); - if ret.is_none() { - return Err(DecoderError::AllocError); - } - buf = ret.unwrap(); - } - self.cur_pic = Some(PictureInfo { - id: slice_hdr.frame_num, - full_id, - pic_type: slice_hdr.slice_type.to_frame_type(), - buf, - cur_mb: 0, - is_ref: nal_ref_idc != 0, - long_term: get_long_term_id(is_idr, &slice_hdr), - mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)), - }); - } - - self.transform_8x8_mode = pps.transform_8x8_mode; - - self.sstate.reset(sps.pic_width_in_mbs, sps.pic_height_in_mbs, slice_hdr.first_mb_in_slice); - if !pps.entropy_coding_mode { - self.has_pic = self.decode_slice_cavlc(&mut br, &slice_hdr, full_size)?; - } else { - br.align(); - let start = (br.tell() / 8) as usize; - let csrc = &src[start..]; - validate!(csrc.len() >= 2); - let mut cabac = CABAC::new(csrc, slice_hdr.slice_type, slice_hdr.slice_qp, slice_hdr.cabac_init_idc as usize)?; - self.has_pic = self.decode_slice_cabac(&mut cabac, &slice_hdr)?; - } - }, - 2 => { // slice data partition A - //slice header - //slice id = read_ue() - //cat 2 slice data (all but MB layer residual) - return Err(DecoderError::NotImplemented); - }, - 3 => { // slice data partition B - //slice id = read_ue() - //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } - //cat 3 slice data (MB layer residual) - return Err(DecoderError::NotImplemented); - }, - 4 => { // slice data partition C - //slice id = read_ue() - //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } - //cat 4 slice data (MB layer residual) - return Err(DecoderError::NotImplemented); - }, - 6 => {}, //SEI - 7 => { - let sps = parse_sps(&src[1..])?; - self.sps.push(Arc::new(sps)); - }, - 8 => { - validate!(full_size >= 8 + 16); - let pps = parse_pps(&src[1..], &self.sps, full_size - 8)?; - let mut found = false; - for stored_pps in self.pps.iter_mut() { - if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { - *stored_pps = Arc::clone(&pps); - found = true; - break; - } - } - if !found { - self.pps.push(pps); - } - }, - 9 => { // access unit delimiter - }, - 10 => {}, //end of sequence - 11 => {}, //end of stream - 12 => {}, //filler - _ => {}, - }; - - Ok(()) - } - fn pred_mv(sstate: &mut SliceState, frame_refs: &SliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { - let mb_type = mb_info.mb_type; - if !mb_type.is_4x4() { - let (pw, ph) = mb_type.size(); - let mut xoff = 0; - let mut yoff = 0; - if mb_type == MBType::Direct || mb_type == MBType::BSkip { - sstate.predict_direct_mb(frame_refs, temporal_mv, direct_8x8, cur_id); - } - for part in 0..mb_type.num_parts() { - if !mb_type.is_l1(part) { - match mb_type { - MBType::PSkip => sstate.predict_pskip(), - MBType::BSkip | MBType::Direct => { - }, - _ => { - sstate.predict(xoff, yoff, pw, ph, 0, - mb_info.mv_l0[part], mb_info.ref_l0[part]); - }, - }; - } - if !mb_type.is_l0(part) && mb_type != MBType::BSkip && mb_type != MBType::Direct { - sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part], mb_info.ref_l1[part]); - } - if pw != 16 { - xoff += pw; - } else { - yoff += ph; - } - } - } else { - for part in 0..4 { - let sub_type = mb_info.sub_mb_type[part]; - let mut xoff = (part & 1) * 8; - let mut yoff = (part & 2) * 4; - let orig_x = xoff; - let (pw, ph) = sub_type.size(); - for subpart in 0..sub_type.num_parts() { - if sub_type != SubMBType::Direct8x8 { - if !sub_type.is_l1() { - sstate.predict(xoff, yoff, pw, ph, 0, mb_info.mv_l0[part * 4 + subpart], mb_info.ref_l0[part]); - } - if !sub_type.is_l0() { - sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part * 4 + subpart], mb_info.ref_l1[part]); - } - } else { - for sblk in 0..4 { - sstate.predict_direct_sub(frame_refs, temporal_mv, direct_8x8, cur_id, (xoff / 4) + (sblk & 1) + (yoff / 4) * 4 + (sblk & 2) * 2); - } - } - xoff += pw; - if xoff == orig_x + 8 { - xoff -= 8; - yoff += ph; - } - } - } - } - } - #[allow(clippy::cognitive_complexity)] - fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo) { - let pps = &self.pps[self.cur_pps]; - - let qp_y = mb_info.qp_y; - let qpr = ((qp_y as i8) + pps.chroma_qp_index_offset).max(0).min(51) as usize; - let qp_u = CHROMA_QUANTS[qpr]; - let qpb = ((qp_y as i8) + pps.second_chroma_qp_index_offset).max(0).min(51) as usize; - let qp_v = CHROMA_QUANTS[qpb]; - - let tx_bypass = qp_y == 0 && self.sps[self.cur_sps].qpprime_y_zero_transform_bypass; - - self.sstate.get_cur_mb().mb_type = mb_info.mb_type.into(); - if mb_info.mb_type != MBType::PCM { - self.sstate.get_cur_mb().qp_y = qp_y; - self.sstate.get_cur_mb().qp_u = qp_u; - self.sstate.get_cur_mb().qp_v = qp_v; - self.sstate.get_cur_mb().transform_8x8 = mb_info.transform_size_8x8; - } - let has_dc = mb_info.mb_type.is_intra16x16() && mb_info.coded[24]; - if has_dc { - idct_luma_dc(&mut mb_info.coeffs[24], qp_y); - for i in 0..16 { - mb_info.coeffs[i][0] = mb_info.coeffs[24][i]; - } - } - if !mb_info.transform_size_8x8 { - let quant_dc = !mb_info.mb_type.is_intra16x16(); - for i in 0..16 { - if mb_info.coded[i] { - if !tx_bypass { - idct(&mut mb_info.coeffs[i], qp_y, quant_dc); - } - } else if has_dc { - if !tx_bypass { - idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc); - } - mb_info.coded[i] = true; - } - } - } else { - for i in 0..4 { - if mb_info.coded[(i & 1) * 2 + (i & 2) * 4] && !tx_bypass { - dequant8x8(&mut mb_info.coeffs8x8[i].coeffs, &pps.scaling_list_8x8[!mb_info.mb_type.is_intra() as usize]); - idct8x8(&mut mb_info.coeffs8x8[i].coeffs, qp_y); - } - } - } - for chroma in 0..2 { - let qp_c = if chroma == 0 { qp_u } else { qp_v }; - if mb_info.cbpc != 0 { - chroma_dc_transform(&mut mb_info.chroma_dc[chroma], qp_c); - } - for i in 0..4 { - let blk_no = 16 + chroma * 4 + i; - mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i]; - if mb_info.coded[blk_no] { - idct(&mut mb_info.coeffs[blk_no], qp_c, false); - } else if mb_info.coeffs[blk_no][0] != 0 { - idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false); - mb_info.coded[blk_no] = true; - } - } - } - if !pps.entropy_coding_mode || mb_info.mb_type.is_skip() || mb_info.mb_type.is_intra() { - self.sstate.reset_mb_mv(); - } - if !mb_info.mb_type.is_intra() { - Self::pred_mv(&mut self.sstate, &self.frame_refs.cur_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference); - } - if !pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { - self.sstate.fill_ipred(IntraPredMode::DC); - } - - let xpos = self.sstate.mb_x * 16; - let ypos = self.sstate.mb_y * 16; - if let Some(ref mut pic) = self.cur_pic { - let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap(); - if mb_info.mb_type != MBType::PCM { - let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() { - 1 - } else if slice_hdr.slice_type.is_b() { - self.pps[self.cur_pps].weighted_bipred_idc - } else { - 0 - }; - recon_mb(&mut frm, slice_hdr, mb_info, &mut self.sstate, &self.frame_refs.cur_refs, &mut self.mc_dsp, weight_mode); - } else { - for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { - dline[..16].copy_from_slice(src); - } - for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) { - dline[..8].copy_from_slice(src); - } - for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) { - dline[..8].copy_from_slice(src); - } - } -/*match mb_info.mb_type { -MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBType::B8x16(_, _) | MBType::B8x8 => { - let dstride = frm.stride[0]; - let dst = &mut frm.data[frm.offset[0] + self.sstate.mb_x * 16 + self.sstate.mb_y * 16 * dstride..]; - for el in dst[..16].iter_mut() { *el = 255; } - for row in dst.chunks_mut(dstride).skip(1).take(15) { - row[0] = 255; - } -}, -_ => {}, -};*/ - self.sstate.save_ipred_context(&frm); - } - if let Some(ref mut pic) = self.cur_pic { - let mv_info = &mut pic.mv_info; - let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride; - let mut mb = FrameMBInfo::new(); - mb.mb_type = mb_info.mb_type.into(); - for blk4 in 0..16 { - mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv; - } - for blk8 in 0..4 { - mb.ref_poc[blk8] = self.frame_refs.cur_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx); - mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx; + +impl FrameDecoder { + pub fn decode_slice(&mut self, hdr: &SliceHeader, hdr_size: usize, refs: &SliceRefs, nal: &[u8]) -> DecoderResult { + self.sstate.reset(self.sps.pic_width_in_mbs, self.sps.pic_height_in_mbs, hdr.first_mb_in_slice); + + let mut full_size = nal.len() * 8; + for &byte in nal.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; } - mv_info.mbs[mb_pos] = mb; } - if !self.deblock_skip && self.deblock_mode != 1 { - self.sstate.fill_deblock(&self.frame_refs.cur_refs, self.deblock_mode, self.is_s); - if let Some(ref mut pic) = self.cur_pic { - let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap(); - loop_filter_mb(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta); - } + validate!(full_size > 0); + + let mut br = BitReader::new(&nal[hdr_size / 8..], BitReaderMode::BE); + if !self.pps.entropy_coding_mode { + br.skip((hdr_size & 7) as u32)?; + self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, refs) + } else { + let csrc = &nal[(hdr_size + 7) / 8..]; + validate!(csrc.len() >= 2); + let mut cabac = CABAC::new(csrc, hdr.slice_type, hdr.slice_qp, hdr.cabac_init_idc as usize)?; + self.decode_slice_cabac(&mut cabac, hdr, refs) } - self.sstate.next_mb(); } - fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize) -> DecoderResult { + fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SliceRefs) -> DecoderResult { const INTRA_CBP: [u8; 48] = [ 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, @@ -497,7 +79,7 @@ _ => {}, validate!(mb_idx + mb_skip_run <= self.num_mbs); mb_info.mb_type = skip_type; for _ in 0..mb_skip_run { - self.handle_macroblock(slice_hdr, &mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info, refs)?; mb_idx += 1; } if mb_idx == self.num_mbs || br.tell() >= full_size { @@ -518,7 +100,7 @@ _ => {}, } self.sstate.fill_ncoded(16); } else { - if self.transform_8x8_mode && mb_type == MBType::Intra4x4 { + if self.pps.transform_8x8_mode && mb_type == MBType::Intra4x4 { mb_info.transform_size_8x8 = br.read_bool()?; if mb_info.transform_size_8x8 { mb_type = MBType::Intra8x8; @@ -536,7 +118,7 @@ _ => {}, } else { INTER_CBP[cbp_id] }; - if self.transform_8x8_mode && (cbp & 0xF) != 0 && mb_info.can_have_8x8_tx(self.sps[self.cur_sps].direct_8x8_inference) { + if self.pps.transform_8x8_mode && (cbp & 0xF) != 0 && mb_info.can_have_8x8_tx(self.sps.direct_8x8_inference) { mb_info.transform_size_8x8 = br.read_bool()?; } ((cbp & 0xF), (cbp >> 4)) @@ -556,134 +138,568 @@ _ => {}, new_qp as u8 }; mb_info.coeffs = [[0; 16]; 25]; - if self.transform_8x8_mode { + if self.pps.transform_8x8_mode { mb_info.clear_coeffs8x8(); } mb_info.chroma_dc = [[0; 4]; 2]; decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?; } } - self.handle_macroblock(slice_hdr, &mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info, refs)?; + } + mb_idx += 1; + if let Ok(disp) = self.dispatch.read() { + disp.update_pos(self.cur_pic.full_id, mb_idx); + } + } + Ok(mb_idx) + } + fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SliceRefs) -> DecoderResult { + let mut mb_idx = slice_hdr.first_mb_in_slice as usize; + let mut prev_mb_skipped = false; + let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; + let mut last_qp_diff = false; + + let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + + while mb_idx < self.num_mbs { + mb_info.coded = [false; 25]; + mb_info.ref_l0 = [ZERO_REF; 4]; + mb_info.ref_l1 = [ZERO_REF; 4]; + mb_info.mv_l0 = [ZERO_MV; 16]; + mb_info.mv_l1 = [ZERO_MV; 16]; + mb_info.chroma_dc = [[0; 4]; 2]; + mb_info.cbpy = 0; + mb_info.cbpc = 0; + let mb_skip = cabac_decode_mbskip(cabac, &self.sstate, slice_hdr); + if !mb_skip { + if self.is_mbaff && (((mb_idx & 1) == 0) || (prev_mb_skipped && ((mb_idx & 1) == 1))) { + let _mb_field_decoding = cabac.decode_bit(70); + } + let mut mb_type = cabac_decode_mb_type(cabac, slice_hdr, &self.sstate); + mb_info.mb_type = mb_type; + mb_info.transform_size_8x8 = false; + if mb_type == MBType::PCM { + let ipcm_size = 256 + 64 + 64; + validate!(cabac.pos + ipcm_size <= cabac.src.len()); + self.ipcm_buf[..ipcm_size].copy_from_slice(&cabac.src[cabac.pos..][..ipcm_size]); + cabac.pos += ipcm_size; + cabac.reinit()?; + last_qp_diff = false; + } else { + if self.pps.transform_8x8_mode && mb_type == MBType::Intra4x4 { + let mut ctx = 0; + if self.sstate.get_top_mb().transform_8x8 { + ctx += 1; + } + if self.sstate.get_left_mb().transform_8x8 { + ctx += 1; + } + mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); + if mb_info.transform_size_8x8 { + mb_type = MBType::Intra8x8; + mb_info.mb_type = MBType::Intra8x8; + } + } + decode_mb_pred_cabac(cabac, slice_hdr, mb_type, &mut self.sstate, &mut mb_info); + let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type { + (cbpy, cbpc) + } else { + decode_cbp_cabac(cabac, &self.sstate) + }; + if self.pps.transform_8x8_mode && cbpy != 0 && mb_info.can_have_8x8_tx(self.sps.direct_8x8_inference) { + let mut ctx = 0; + if self.sstate.get_top_mb().transform_8x8 { + ctx += 1; + } + if self.sstate.get_left_mb().transform_8x8 { + ctx += 1; + } + mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); + } + if mb_type.is_intra() { + self.sstate.get_cur_mb().cmode = mb_info.chroma_ipred; + } + mb_info.cbpy = cbpy; + mb_info.cbpc = cbpc; + self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy; + if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() { + let mb_qp_delta = decode_mb_qp_delta_cabac(cabac, last_qp_diff as usize); + validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25); + last_qp_diff = mb_qp_delta != 0; + let new_qp = mb_qp_delta + i32::from(mb_info.qp_y); + mb_info.qp_y = if new_qp < 0 { + (new_qp + 52) as u8 + } else if new_qp >= 52 { + (new_qp - 52) as u8 + } else { + new_qp as u8 + }; + mb_info.coeffs = [[0; 16]; 25]; + if self.pps.transform_8x8_mode { + mb_info.clear_coeffs8x8(); + } + mb_info.chroma_dc = [[0; 4]; 2]; + decode_residual_cabac(cabac, &mut self.sstate, &mut mb_info); + } else { + last_qp_diff = false; + } + } + } else { + mb_info.mb_type = skip_type; + mb_info.transform_size_8x8 = false; + last_qp_diff = false; + } + self.handle_macroblock(slice_hdr, &mut mb_info, refs)?; + prev_mb_skipped = mb_skip; + if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() { + if let Ok(disp) = self.dispatch.read() { + disp.update_pos(self.cur_pic.full_id, mb_idx + 1); + } + return Ok(mb_idx + 1); + } + mb_idx += 1; + if let Ok(disp) = self.dispatch.read() { + disp.update_pos(self.cur_pic.full_id, mb_idx); + } + } + Err(DecoderError::InvalidData) + } + #[allow(clippy::cognitive_complexity)] + fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SliceRefs) -> DecoderResult<()> { + let qp_y = mb_info.qp_y; + let qpr = ((qp_y as i8) + self.pps.chroma_qp_index_offset).max(0).min(51) as usize; + let qp_u = CHROMA_QUANTS[qpr]; + let qpb = ((qp_y as i8) + self.pps.second_chroma_qp_index_offset).max(0).min(51) as usize; + let qp_v = CHROMA_QUANTS[qpb]; + + let tx_bypass = qp_y == 0 && self.sps.qpprime_y_zero_transform_bypass; + + self.sstate.get_cur_mb().mb_type = mb_info.mb_type.into(); + if mb_info.mb_type != MBType::PCM { + self.sstate.get_cur_mb().qp_y = qp_y; + self.sstate.get_cur_mb().qp_u = qp_u; + self.sstate.get_cur_mb().qp_v = qp_v; + self.sstate.get_cur_mb().transform_8x8 = mb_info.transform_size_8x8; + } + let has_dc = mb_info.mb_type.is_intra16x16() && mb_info.coded[24]; + if has_dc { + idct_luma_dc(&mut mb_info.coeffs[24], qp_y); + for i in 0..16 { + mb_info.coeffs[i][0] = mb_info.coeffs[24][i]; + } + } + if !mb_info.transform_size_8x8 { + let quant_dc = !mb_info.mb_type.is_intra16x16(); + for i in 0..16 { + if mb_info.coded[i] { + if !tx_bypass { + idct(&mut mb_info.coeffs[i], qp_y, quant_dc); + } + } else if has_dc { + if !tx_bypass { + idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc); + } + mb_info.coded[i] = true; + } + } + } else { + for i in 0..4 { + if mb_info.coded[(i & 1) * 2 + (i & 2) * 4] && !tx_bypass { + dequant8x8(&mut mb_info.coeffs8x8[i].coeffs, &self.pps.scaling_list_8x8[!mb_info.mb_type.is_intra() as usize]); + idct8x8(&mut mb_info.coeffs8x8[i].coeffs, qp_y); + } + } + } + for chroma in 0..2 { + let qp_c = if chroma == 0 { qp_u } else { qp_v }; + if mb_info.cbpc != 0 { + chroma_dc_transform(&mut mb_info.chroma_dc[chroma], qp_c); + } + for i in 0..4 { + let blk_no = 16 + chroma * 4 + i; + mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i]; + if mb_info.coded[blk_no] { + idct(&mut mb_info.coeffs[blk_no], qp_c, false); + } else if mb_info.coeffs[blk_no][0] != 0 { + idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false); + mb_info.coded[blk_no] = true; + } + } + } + if !self.pps.entropy_coding_mode || mb_info.mb_type.is_skip() || mb_info.mb_type.is_intra() { + self.sstate.reset_mb_mv(); + } + if !mb_info.mb_type.is_intra() { + let temporal_mv = !slice_hdr.direct_spatial_mv_pred; + let cur_id = self.cur_pic.full_id as u16; + // wait for the reference macroblock MV to be available + if matches!(mb_info.mb_type, MBType::Direct | MBType::BSkip | MBType::B8x8) { + if let Some(ref_id) = refs.get_ref_id(0, mb_info.ref_l1[0].index()) { + wait_for_mb(&self.dispatch, &self.sstate, self.sstate.mb_x * 16, self.sstate.mb_y * 16, ZERO_MV, ref_id)?; + } + } + Self::pred_mv(&mut self.sstate, refs, mb_info, cur_id, temporal_mv, self.sps.direct_8x8_inference); + } + if !self.pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + self.sstate.fill_ipred(IntraPredMode::DC); + } + + let xpos = self.sstate.mb_x * 16; + let ypos = self.sstate.mb_y * 16; + let mut frm = NASimpleVideoFrame::from_video_buf(&mut self.cur_pic.buf).unwrap(); + if mb_info.mb_type != MBType::PCM { + let weight_mode = if self.pps.weighted_pred && slice_hdr.slice_type.is_p() { + 1 + } else if slice_hdr.slice_type.is_b() { + self.pps.weighted_bipred_idc + } else { + 0 + }; + recon_mb_mt(&mut frm, slice_hdr, mb_info, &mut self.sstate, refs, &mut self.mc_dsp, weight_mode, &self.dispatch)?; + } else { + for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { + dline[..16].copy_from_slice(src); + } + for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) { + dline[..8].copy_from_slice(src); + } + for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) { + dline[..8].copy_from_slice(src); + } + } + self.sstate.save_ipred_context(&frm); + + let mv_info = &mut self.cur_pic.mv_info; + let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride; + let mut mb = FrameMBInfo::new(); + mb.mb_type = mb_info.mb_type.into(); + for blk4 in 0..16 { + mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv; + } + for blk8 in 0..4 { + mb.ref_poc[blk8] = refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx); + mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx; + } + mv_info.mbs[mb_pos] = mb; + + let deblock_mode = slice_hdr.disable_deblocking_filter_idc; + if !self.deblock_skip && deblock_mode != 1 { + let is_s = slice_hdr.slice_type == SliceType::SI || slice_hdr.slice_type == SliceType::SP; + self.sstate.fill_deblock(refs, deblock_mode, is_s); + let mut frm = NASimpleVideoFrame::from_video_buf(&mut self.cur_pic.buf).unwrap(); + let lf_alpha = slice_hdr.slice_alpha_c0_offset; + let lf_beta = slice_hdr.slice_beta_offset; + loop_filter_mb(&mut frm, &self.sstate, lf_alpha, lf_beta); + } + self.sstate.next_mb(); + Ok(()) + } + + fn pred_mv(sstate: &mut SliceState, frame_refs: &SliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { + let mb_type = mb_info.mb_type; + if !mb_type.is_4x4() { + let (pw, ph) = mb_type.size(); + let mut xoff = 0; + let mut yoff = 0; + if mb_type == MBType::Direct || mb_type == MBType::BSkip { + sstate.predict_direct_mb(frame_refs, temporal_mv, direct_8x8, cur_id); + } + for part in 0..mb_type.num_parts() { + if !mb_type.is_l1(part) { + match mb_type { + MBType::PSkip => sstate.predict_pskip(), + MBType::BSkip | MBType::Direct => { + }, + _ => { + sstate.predict(xoff, yoff, pw, ph, 0, + mb_info.mv_l0[part], mb_info.ref_l0[part]); + }, + }; + } + if !mb_type.is_l0(part) && mb_type != MBType::BSkip && mb_type != MBType::Direct { + sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part], mb_info.ref_l1[part]); + } + if pw != 16 { + xoff += pw; + } else { + yoff += ph; + } + } + } else { + for part in 0..4 { + let sub_type = mb_info.sub_mb_type[part]; + let mut xoff = (part & 1) * 8; + let mut yoff = (part & 2) * 4; + let orig_x = xoff; + let (pw, ph) = sub_type.size(); + for subpart in 0..sub_type.num_parts() { + if sub_type != SubMBType::Direct8x8 { + if !sub_type.is_l1() { + sstate.predict(xoff, yoff, pw, ph, 0, mb_info.mv_l0[part * 4 + subpart], mb_info.ref_l0[part]); + } + if !sub_type.is_l0() { + sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part * 4 + subpart], mb_info.ref_l1[part]); + } + } else { + for sblk in 0..4 { + sstate.predict_direct_sub(frame_refs, temporal_mv, direct_8x8, cur_id, (xoff / 4) + (sblk & 1) + (yoff / 4) * 4 + (sblk & 2) * 2); + } + } + xoff += pw; + if xoff == orig_x + 8 { + xoff -= 8; + yoff += ph; + } + } } - mb_idx += 1; } - if let Some(ref mut pic) = self.cur_pic { - pic.cur_mb = mb_idx; + } +} + +struct H264MTDecoder { + info: NACodecInfoRef, + nal_len: u8, + dispatch: Shareable, + frame_refs: FrameRefs, + skip_mode: FrameSkipMode, + sps: Vec>, + cur_sps: usize, + pps: Vec>, + cur_pps: usize, + cur_fdec: Option, + cavlc_cb: Arc, + deblock_skip: bool, + max_last_poc: u32, + poc_base: u32, +} + +impl H264MTDecoder { + fn new() -> Self { + Self { + info: NACodecInfoRef::default(), + nal_len: 0, + dispatch: Arc::new(RwLock::new(ThreadDispatcher::new())), + frame_refs: FrameRefs::new(), + skip_mode: FrameSkipMode::default(), + sps: Vec::new(), + cur_sps: 0, + pps: Vec::new(), + cur_pps: 0, + cur_fdec: None, + cavlc_cb: Arc::new(CAVLCTables::new()), + deblock_skip: false, + max_last_poc: 0, + poc_base: 0, } - Ok(mb_idx == self.num_mbs) } - fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader) -> DecoderResult { - let mut mb_idx = slice_hdr.first_mb_in_slice as usize; - let mut prev_mb_skipped = false; - let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; - let mut last_qp_diff = false; + fn handle_nal(&mut self, src: Vec, supp: &mut NADecoderSupport, skip_decoding: bool, user_id: u32, time: NATimeInfo) -> DecoderResult<()> { + validate!(!src.is_empty()); + validate!((src[0] & 0x80) == 0); + let nal_ref_idc = src[0] >> 5; + let nal_unit_type = src[0] & 0x1F; - let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + let mut full_size = src.len() * 8; + for &byte in src.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; + } + } + validate!(full_size > 0); + match nal_unit_type { + 1 | 5 if !skip_decoding => { + let is_idr = nal_unit_type == 5; + let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); + br.skip(8)?; - while mb_idx < self.num_mbs { - mb_info.coded = [false; 25]; - mb_info.ref_l0 = [ZERO_REF; 4]; - mb_info.ref_l1 = [ZERO_REF; 4]; - mb_info.mv_l0 = [ZERO_MV; 16]; - mb_info.mv_l1 = [ZERO_MV; 16]; - mb_info.chroma_dc = [[0; 4]; 2]; - mb_info.cbpy = 0; - mb_info.cbpc = 0; - let mb_skip = cabac_decode_mbskip(cabac, &self.sstate, slice_hdr); - if !mb_skip { - if self.is_mbaff && (((mb_idx & 1) == 0) || (prev_mb_skipped && ((mb_idx & 1) == 1))) { - let _mb_field_decoding = cabac.decode_bit(70); - } - let mut mb_type = cabac_decode_mb_type(cabac, slice_hdr, &self.sstate); - mb_info.mb_type = mb_type; - mb_info.transform_size_8x8 = false; - if mb_type == MBType::PCM { - let ipcm_size = 256 + 64 + 64; - validate!(cabac.pos + ipcm_size <= cabac.src.len()); - self.ipcm_buf[..ipcm_size].copy_from_slice(&cabac.src[cabac.pos..][..ipcm_size]); - cabac.pos += ipcm_size; - cabac.reinit()?; - last_qp_diff = false; - } else { - if self.transform_8x8_mode && mb_type == MBType::Intra4x4 { - let mut ctx = 0; - if self.sstate.get_top_mb().transform_8x8 { - ctx += 1; - } - if self.sstate.get_left_mb().transform_8x8 { - ctx += 1; - } - mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); - if mb_info.transform_size_8x8 { - mb_type = MBType::Intra8x8; - mb_info.mb_type = MBType::Intra8x8; + let slice_hdr = parse_slice_header(&mut br, self.sps.as_slice(), self.pps.as_slice(), is_idr, nal_ref_idc)?; + let hdr_size = br.tell(); + validate!(br.tell() < full_size); + let full_id; + if slice_hdr.first_mb_in_slice == 0 { + validate!(self.cur_fdec.is_none()); + for (i, pps) in self.pps.iter().enumerate() { + if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { + self.cur_pps = i; + break; } } - decode_mb_pred_cabac(cabac, slice_hdr, mb_type, &mut self.sstate, &mut mb_info); - let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type { - (cbpy, cbpc) - } else { - decode_cbp_cabac(cabac, &self.sstate) - }; - if self.transform_8x8_mode && cbpy != 0 && mb_info.can_have_8x8_tx(self.sps[self.cur_sps].direct_8x8_inference) { - let mut ctx = 0; - if self.sstate.get_top_mb().transform_8x8 { - ctx += 1; + for (i, sps) in self.sps.iter().enumerate() { + if sps.seq_parameter_set_id == self.pps[self.cur_pps].seq_parameter_set_id { + self.cur_sps = i; + break; } - if self.sstate.get_left_mb().transform_8x8 { - ctx += 1; + } + + let mut cur_full_id = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, &self.sps[self.cur_sps]) + self.poc_base; + if is_idr { + if cur_full_id <= self.max_last_poc { + self.poc_base = self.max_last_poc + 2 - (cur_full_id - self.poc_base); + cur_full_id = self.max_last_poc + 2; } - mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); } - if mb_type.is_intra() { - self.sstate.get_cur_mb().cmode = mb_info.chroma_ipred; + self.max_last_poc = self.max_last_poc.max(cur_full_id); + full_id = cur_full_id; + + let sps = &self.sps[self.cur_sps]; + if sps.chroma_format_idc != 1 || sps.bit_depth_luma != 8 || sps.bit_depth_chroma != 8 { + println!(" chroma fmt {} bits {}/{}", sps.chroma_format_idc, sps.bit_depth_luma, sps.bit_depth_chroma); + return Err(DecoderError::NotImplemented); } - mb_info.cbpy = cbpy; - mb_info.cbpc = cbpc; - self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy; - if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() { - let mb_qp_delta = decode_mb_qp_delta_cabac(cabac, last_qp_diff as usize); - validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25); - last_qp_diff = mb_qp_delta != 0; - let new_qp = mb_qp_delta + i32::from(mb_info.qp_y); - mb_info.qp_y = if new_qp < 0 { - (new_qp + 52) as u8 - } else if new_qp >= 52 { - (new_qp - 52) as u8 + + if is_idr { + self.frame_refs.clear_refs(); + } + + let width = sps.pic_width_in_mbs << 4; + let height = sps.pic_height_in_mbs << 4; + let num_mbs = sps.pic_width_in_mbs * sps.pic_height_in_mbs; + + let avg_vi = NAVideoInfo { width: 32, height: 32, flipped: false, format: YUV420_FORMAT, bits: 12 }; + let avg_buf = alloc_video_buffer(avg_vi, 4).unwrap().get_vbuf().unwrap(); + let mut mc_dsp = H264MC::new(avg_buf); + mc_dsp.set_dimensions(width, height); + + let is_mbaff = sps.mb_adaptive_frame_field && !slice_hdr.field_pic; + if is_mbaff { + println!("MBAFF"); + return Err(DecoderError::NotImplemented); + } + if !sps.frame_mbs_only { + println!("PAFF?"); + return Err(DecoderError::NotImplemented); + } + + let cur_vinfo = supp.pool_u8.get_info(); + let tmp_vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT); + if cur_vinfo != Some(tmp_vinfo) { + supp.pool_u8.reset(); + supp.pool_u8.prealloc_video(tmp_vinfo, 4)?; + } + + let buf = if let Some(pic) = supp.pool_u8.get_free() { + pic + } else { + if supp.pool_u8.get_num_used() > 256 { + return Err(DecoderError::AllocError); + } + if let Ok(nbuf) = alloc_video_buffer(tmp_vinfo, 4) { + let vbuf = nbuf.get_vbuf().unwrap(); + supp.pool_u8.add_frame(vbuf.clone()); + vbuf } else { - new_qp as u8 + return Err(DecoderError::AllocError); + } + }; + + let cur_pic = PictureInfo { + id: slice_hdr.frame_num, + full_id, user_id, time, + pic_type: slice_hdr.slice_type.to_frame_type(), + buf, + cur_mb: 0, + is_ref: nal_ref_idc != 0, + is_idr, + long_term: get_long_term_id(is_idr, &slice_hdr), + mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)), + }; + + self.cur_fdec = Some(FrameDecoder{ + slices: Vec::new(), + sstate: SliceState::new(), + ipcm_buf: [0; 256 + 64 + 64], + //width, height, + num_mbs, + sps: Arc::clone(sps), + pps: Arc::clone(&self.pps[self.cur_pps]), + dispatch: Arc::clone(&self.dispatch), + cavlc_cb: Arc::clone(&self.cavlc_cb), + mc_dsp, + cur_pic, + is_mbaff, + deblock_skip: self.deblock_skip, + }); + } else { + if let Some(ref mut fdec) = self.cur_fdec { + let new_type = slice_hdr.slice_type.to_frame_type(); + let pic = &mut fdec.cur_pic; + pic.pic_type = match (pic.pic_type, new_type) { + (FrameType::I, _) => new_type, + (_, FrameType::B) => FrameType::B, + _ => pic.pic_type, }; - mb_info.coeffs = [[0; 16]; 25]; - if self.transform_8x8_mode { - mb_info.clear_coeffs8x8(); - } - mb_info.chroma_dc = [[0; 4]; 2]; - decode_residual_cabac(cabac, &mut self.sstate, &mut mb_info); + full_id = pic.full_id; } else { - last_qp_diff = false; + return Ok(()); } } - } else { - mb_info.mb_type = skip_type; - mb_info.transform_size_8x8 = false; - last_qp_diff = false; - } - self.handle_macroblock(slice_hdr, &mut mb_info); - prev_mb_skipped = mb_skip; - if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() { - if let Some(ref mut pic) = self.cur_pic { - pic.cur_mb = mb_idx + 1; + + let sps = &self.sps[self.cur_sps]; + + self.frame_refs.select_refs(sps, &slice_hdr, full_id); + + if slice_hdr.adaptive_ref_pic_marking_mode { + self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << self.sps[self.cur_sps].log2_max_frame_num)?; } - return Ok(mb_idx + 1 == self.num_mbs); - } - mb_idx += 1; - } - Err(DecoderError::InvalidData) + if let Some(ref mut fdec) = self.cur_fdec { + fdec.slices.push((slice_hdr, hdr_size, self.frame_refs.cur_refs.clone(), src)); + } + }, + 2 => { // slice data partition A + //slice header + //slice id = read_ue() + //cat 2 slice data (all but MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 3 => { // slice data partition B + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 3 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 4 => { // slice data partition C + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 4 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 6 => {}, //SEI + 7 => { + let sps = parse_sps(&src[1..])?; + self.sps.push(Arc::new(sps)); + }, + 8 => { + validate!(full_size >= 8 + 16); + let pps = parse_pps(&src[1..], self.sps.as_slice(), full_size - 8)?; + let mut found = false; + for stored_pps in self.pps.iter_mut() { + if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { + *stored_pps = Arc::clone(&pps); + found = true; + break; + } + } + if !found { + self.pps.push(pps); + } + }, + 9 => { // access unit delimiter + }, + 10 => {}, //end of sequence + 11 => {}, //end of stream + 12 => {}, //filler + _ => {}, + }; + + Ok(()) } } -impl NADecoder for H264Decoder { - fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef) -> DecoderResult<()> { +impl NADecoderMT for H264MTDecoder { + fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef, nthreads: usize) -> DecoderResult<()> { if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { let fmt = YUV420_FORMAT; let myinfo = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, fmt)); @@ -694,7 +710,6 @@ impl NADecoder for H264Decoder { if edata.len() > 11 && &edata[0..4] == b"avcC" { let mut mr = MemoryReader::new_read(edata.as_slice()); let mut br = ByteReader::new(&mut mr); - let mut nal_buf = Vec::new(); br.read_skip(4)?; let version = br.read_byte()?; @@ -712,8 +727,9 @@ impl NADecoder for H264Decoder { let len = br.read_u16be()? as usize; let offset = br.tell() as usize; validate!((br.peek_byte()? & 0x1F) == 7); + let mut nal_buf = Vec::new(); let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); - self.handle_nal(&nal_buf, supp, true)?; + self.handle_nal(nal_buf, supp, true, 0, NATimeInfo::new(None, None, None, 0, 0))?; br.read_skip(len)?; } let num_pps = br.read_byte()? as usize; @@ -721,8 +737,9 @@ impl NADecoder for H264Decoder { let len = br.read_u16be()? as usize; let offset = br.tell() as usize; validate!((br.peek_byte()? & 0x1F) == 8); + let mut nal_buf = Vec::new(); let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); - self.handle_nal(&nal_buf, supp, true)?; + self.handle_nal(nal_buf, supp, true, 0, NATimeInfo::new(None, None, None, 0, 0))?; br.read_skip(len)?; } if br.left() > 0 { @@ -751,12 +768,12 @@ impl NADecoder for H264Decoder { return Err(DecoderError::NotImplemented); } - self.width = vinfo.get_width(); - self.height = vinfo.get_height(); + let mut width = vinfo.get_width(); + let mut height = vinfo.get_height(); - if (self.width == 0 || self.height == 0) && !self.sps.is_empty() { - self.width = self.sps[0].pic_width_in_mbs * 16; - self.height = self.sps[0].pic_height_in_mbs * 16; + if (width == 0 || height == 0) && !self.sps.is_empty() { + width = self.sps[0].pic_width_in_mbs * 16; + height = self.sps[0].pic_height_in_mbs * 16; } let num_bufs = if !self.sps.is_empty() { @@ -764,20 +781,37 @@ impl NADecoder for H264Decoder { } else { 3 }.max(16 + 1); - supp.pool_u8.set_dec_bufs(num_bufs); - supp.pool_u8.prealloc_video(NAVideoInfo::new(self.width, self.height, false, fmt), 4)?; + if let Ok(ref mut sd) = self.dispatch.write() { + sd.max_threads = nthreads; + } else { + return Err(DecoderError::Bug); + } + supp.pool_u8.set_dec_bufs(num_bufs + nthreads); + supp.pool_u8.prealloc_video(NAVideoInfo::new(width, height, false, fmt), 4)?; Ok(()) } else { Err(DecoderError::InvalidData) } } - fn decode(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket) -> DecoderResult { + fn can_take_input(&mut self) -> bool { + if let Ok(ref sd) = self.dispatch.read() { + sd.can_decode_more() + } else { + false + } + } + fn queue_pkt(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket, user_id: u32) -> DecoderResult { + if !self.can_take_input() { + return Ok(false); + } + let src = pkt.get_buffer(); let mut mr = MemoryReader::new_read(&src); let mut br = ByteReader::new(&mut mr); let mut nal_buf = Vec::with_capacity(src.len()); + if self.nal_len > 0 { let mut skip_decoding = false; if self.skip_mode != FrameSkipMode::None { @@ -825,6 +859,10 @@ impl NADecoder for H264Decoder { }; br.seek(SeekFrom::Start(0))?; } + + let mut initial_ref_frames = Vec::new(); + self.frame_refs.fill_ref_nums(&mut initial_ref_frames); + while br.left() > 0 { let size = match self.nal_len { 1 => br.read_byte()? as usize, @@ -835,47 +873,60 @@ impl NADecoder for H264Decoder { }; validate!(br.left() >= (size as i64)); let offset = br.tell() as usize; - let _size = unescape_nal(&src[offset..][..size], &mut nal_buf); - self.handle_nal(nal_buf.as_slice(), supp, skip_decoding)?; + let mut cur_nal_buf = Vec::with_capacity(size); + let _size = unescape_nal(&src[offset..][..size], &mut cur_nal_buf); + self.handle_nal(cur_nal_buf, supp, skip_decoding, user_id, pkt.ts)?; br.read_skip(size)?; } - } else { -//todo NAL detection - unimplemented!(); - } - - let (bufinfo, ftype, dts) = if self.has_pic && self.cur_pic.is_some() { - let mut npic = None; - std::mem::swap(&mut self.cur_pic, &mut npic); - let cpic = npic.unwrap(); - let ret = (NABufferType::Video(cpic.buf.clone()), cpic.pic_type, Some(u64::from(cpic.full_id))); + let mut fdec = None; + std::mem::swap(&mut fdec, &mut self.cur_fdec); + if let Some(fdc) = fdec { + let cpic = &fdc.cur_pic; if cpic.is_ref { self.frame_refs.add_short_term(cpic.clone(), self.sps[self.cur_sps].num_ref_frames); } if let Some(lt_idx) = cpic.long_term { - self.frame_refs.add_long_term(lt_idx, cpic); + self.frame_refs.add_long_term(lt_idx, cpic.clone()); } - ret - } else { - (NABufferType::None, FrameType::Skip, None) - }; - - let mut frm = NAFrame::new_from_pkt(pkt, self.info.clone(), bufinfo); - frm.set_keyframe(ftype == FrameType::I); - if let (Some(mydts), None) = (dts, frm.get_dts()) { - frm.set_dts(Some(mydts)); + let mut ref_frames = Vec::new(); + self.frame_refs.fill_ref_nums(&mut ref_frames); + queue_decoding(&mut self.dispatch, fdc, &initial_ref_frames, &ref_frames); + } + } else { +//todo NAL detection + unimplemented!(); + } + Ok(true) + } + fn has_output(&mut self) -> bool { + if let Ok(ref ds) = self.dispatch.read() { + ds.has_output() + } else { + panic!("can't peek into status"); } - if let Some(dts) = dts { - frm.set_id(dts as i64); + } + fn get_frame(&mut self) -> (DecoderResult, u32) { + match wait_for_one(&mut self.dispatch) { + Ok(cpic) => { + let bufinfo = NABufferType::Video(cpic.buf.clone()); + let ftype = cpic.pic_type; + let dts = Some(u64::from(cpic.full_id)); + let mut frm = NAFrame::new(cpic.time, ftype, cpic.is_idr, self.info.clone(), bufinfo); + if let (Some(mydts), None) = (dts, frm.get_dts()) { + frm.set_dts(Some(mydts)); + } + frm.set_id(cpic.user_id as i64); + (Ok(frm.into_ref()), cpic.user_id) + }, + Err((err, id)) => (Err(err), id), } - frm.set_frame_type(ftype); - Ok(frm.into_ref()) } fn flush(&mut self) { + clear_threads(&mut self.dispatch); } } -impl NAOptionHandler for H264Decoder { +impl NAOptionHandler for H264MTDecoder { fn get_supported_options(&self) -> &[NAOptionDefinition] { DECODER_OPTIONS } fn set_options(&mut self, options: &[NAOption]) { for option in options.iter() { @@ -905,6 +956,6 @@ impl NAOptionHandler for H264Decoder { } } -pub fn get_decoder() -> Box { - Box::new(H264Decoder::new()) +pub fn get_decoder_mt() -> Box { + Box::new(H264MTDecoder::new()) } diff --git a/nihav-itu/src/codecs/h264/decoder_st.rs b/nihav-itu/src/codecs/h264/decoder_st.rs index 63815f8..85deab9 100644 --- a/nihav-itu/src/codecs/h264/decoder_st.rs +++ b/nihav-itu/src/codecs/h264/decoder_st.rs @@ -203,10 +203,13 @@ println!("PAFF?"); self.cur_pic = Some(PictureInfo { id: slice_hdr.frame_num, full_id, + user_id: full_id, + time: NATimeInfo::new(None, None, None, 0, 0), pic_type: slice_hdr.slice_type.to_frame_type(), buf, cur_mb: 0, is_ref: nal_ref_idc != 0, + is_idr, long_term: get_long_term_id(is_idr, &slice_hdr), mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)), }); diff --git a/nihav-itu/src/codecs/h264/dispatch.rs b/nihav-itu/src/codecs/h264/dispatch.rs new file mode 100644 index 0000000..75afb26 --- /dev/null +++ b/nihav-itu/src/codecs/h264/dispatch.rs @@ -0,0 +1,315 @@ +use std::sync::{Arc, Barrier}; +use std::sync::atomic::*; +use std::thread; + +use nihav_core::codecs::{DecoderError, DecoderResult}; + +use super::{FrameDecoder, PictureInfo, Shareable}; + +#[derive(Clone,Copy,Debug,PartialEq)] +pub enum FrameDecodingStatus { + Ok, + NotReady, + Error, + NotFound, +} + +struct FrameState { + pinfo: PictureInfo, + mb_pos: AtomicUsize, + error: AtomicBool, + complete: AtomicBool, + output: AtomicBool, + worker: Option>>, + result: DecoderResult<()>, + num_refs: usize, + ref_frames: Vec, +} + +impl FrameState { + fn get_id(&self) -> u32 { self.pinfo.full_id } + fn get_user_id(&self) -> u32 { self.pinfo.user_id } + fn is_working(&self) -> bool { + self.worker.is_some() && + !self.complete.load(Ordering::Relaxed) && + !self.error.load(Ordering::Relaxed) + } + fn is_output_candidate(&self) -> bool { + !self.output.load(Ordering::Relaxed) && + (self.complete.load(Ordering::Relaxed) || self.error.load(Ordering::Relaxed)) + } +} + +pub struct ThreadDispatcher { + fstate: Vec, + pub max_threads: usize, + cur_threads: usize, +} + +impl ThreadDispatcher { + pub fn new() -> Self { + Self { + fstate: Vec::new(), + max_threads: 3, + cur_threads: 0, + } + } + pub fn can_decode_more(&self) -> bool { + let out_cand = self.fstate.iter().filter(|state| state.is_output_candidate()).count(); + if out_cand > self.max_threads { + return false; + } + if (self.cur_threads < self.max_threads) || (self.max_threads == 0) { + true + } else { + let real_workers = self.fstate.iter().fold(0usize, + |acc, state| acc + (state.is_working() as usize)); + real_workers < self.max_threads + } + } + fn cleanup(&mut self) { + for state in self.fstate.iter_mut() { + if state.worker.is_some() && !state.is_working() { + let mut ret = None; + std::mem::swap(&mut state.worker, &mut ret); + if let Some(handle) = ret { + state.result = handle.join().unwrap(); + } + self.cur_threads -= 1; + } + } + } + fn unref_frame(&mut self, id: u32) { + let mut toremove = Vec::new(); + for state in self.fstate.iter() { + if state.num_refs == 0 && state.output.load(Ordering::Relaxed) { + toremove.push(state.get_id()); + } + } + if let Some(idx) = self.find_by_id(id) { + let mut ref_frm = Vec::new(); + std::mem::swap(&mut ref_frm, &mut self.fstate[idx].ref_frames); + for state in self.fstate.iter_mut() { + if ref_frm.contains(&state.get_id()) { + assert!(state.num_refs >= 2); + state.num_refs -= 2; + } + } + if self.fstate[idx].num_refs == 0 && self.fstate[idx].output.load(Ordering::Relaxed) { + self.remove_frame(id); + } + } + for &id in toremove.iter() { + self.remove_frame(id); + } + } + fn find_by_id(&self, id: u32) -> Option { + self.fstate.iter().position(|x| x.get_id() == id) + } + fn set_completed(&self, id: u32) { + if let Some(idx) = self.find_by_id(id) { + self.fstate[idx].complete.store(true, Ordering::Relaxed); + } + } + fn set_error(&self, id: u32) { + if let Some(idx) = self.find_by_id(id) { + self.fstate[idx].error.store(true, Ordering::Relaxed); + } + } + pub fn update_pos(&self, id: u32, mb_pos: usize) { + if let Some(idx) = self.find_by_id(id) { + self.fstate[idx].mb_pos.store(mb_pos, Ordering::Relaxed); + } + } + pub fn check_pos(&self, id: u32, mb_pos: usize) -> FrameDecodingStatus { + if let Some(idx) = self.find_by_id(id) { + let state = &self.fstate[idx]; + if !state.error.load(Ordering::Relaxed) { + if state.complete.load(Ordering::Relaxed) || mb_pos < state.mb_pos.load(Ordering::Relaxed) { + FrameDecodingStatus::Ok + } else { + FrameDecodingStatus::NotReady + } + } else { + FrameDecodingStatus::Error + } + } else { + FrameDecodingStatus::NotFound + } + } + fn remove_frame(&mut self, id: u32) { + if let Some(idx) = self.find_by_id(id) { + self.fstate.remove(idx); + } + } + /*fn print_state(&self) { + print!(" state:"); + for state in self.fstate.iter() { + print!(" s{}b{}r{}{}{}{}", state.get_id(), + state.mb_pos.load(Ordering::Relaxed), state.num_refs, + if state.error.load(Ordering::Relaxed) { "E" } else {""}, + if state.complete.load(Ordering::Relaxed) {"C"} else {""}, + if state.output.load(Ordering::Relaxed) {"O"} else {""}); + } + println!(); + }*/ + pub fn has_output(&self) -> bool { + for state in self.fstate.iter() { + if state.is_output_candidate() { + return true; + } + } + false + } +} + +pub fn queue_decoding(disp: &mut Shareable, mut fdec: FrameDecoder, initial_ref_frames: &[u32], ref_frames: &[u32]) { + let barrier = Arc::new(Barrier::new(2)); + let starter = Arc::clone(&barrier); + + let pinfo = fdec.cur_pic.clone(); + let pic_id = pinfo.full_id; + let shared_disp = Arc::clone(disp); + let worker = thread::Builder::new().name("frame ".to_string() + &pic_id.to_string()).spawn(move || { + barrier.wait(); + + let mut slices = Vec::new(); + std::mem::swap(&mut slices, &mut fdec.slices); + let mut cur_mb = 0; + for (hdr, hdr_size, refs, nal) in slices.iter() { + if hdr.first_mb_in_slice != cur_mb { + if let Ok(rd) = shared_disp.read() { + rd.set_error(pic_id); + } else { + panic!("can't set error"); + } + return Err(DecoderError::InvalidData); + } + match fdec.decode_slice(hdr, *hdr_size, refs, nal) { + Ok(pos) => cur_mb = pos, + Err(err) => { + if let Ok(rd) = shared_disp.read() { + rd.set_error(pic_id); + } else { + panic!("can't set error"); + } + return Err(err); + }, + }; + } + + if cur_mb == fdec.num_mbs { + if let Ok(rd) = shared_disp.read() { + rd.set_completed(pic_id); + } else { + panic!("can't set status"); + } + } + + DecoderResult::Ok(()) + }).unwrap(); + let new_state = FrameState { + pinfo, + mb_pos: AtomicUsize::new(0), + error: AtomicBool::new(false), + complete: AtomicBool::new(false), + output: AtomicBool::new(false), + worker: Some(worker), + result: DecoderResult::Err(DecoderError::Bug), + num_refs: 0, + ref_frames: initial_ref_frames.to_vec(), + }; + if let Ok(ref mut ds) = disp.write() { + let new_id = new_state.get_id(); + if ds.find_by_id(new_id).is_some() { + ds.remove_frame(new_id); + } + ds.cleanup(); + ds.fstate.push(new_state); + for state in ds.fstate.iter_mut() { + if ref_frames.contains(&state.get_id()) { + state.num_refs += 1; + } + if initial_ref_frames.contains(&state.get_id()) { + state.num_refs += 1; + } + } + ds.cur_threads += 1; + starter.wait(); + } else { + panic!("cannot invoke thread dispatcher"); + } +} + +pub fn wait_for_one(dispatch: &mut Shareable) -> Result { + /*if let Ok(ref ds) = dispatch.read() { + ds.print_state(); + }*/ + let start = std::time::Instant::now(); + 'main_loop: loop { + if std::time::Instant::now().duration_since(start) > std::time::Duration::from_millis(20000) { panic!(" too long!"); } + if let Ok(ref ds) = dispatch.read() { + let mut nw = 0; + for state in ds.fstate.iter() { + if state.is_working() { + nw += 1; + } + if state.is_output_candidate() { + break 'main_loop; + } + } + if nw == 0 { + return Err((DecoderError::NoFrame, 0)); + } + } else { + panic!("can't peek into status"); + } + thread::yield_now(); + } + if let Ok(ref mut ds) = dispatch.write() { + ds.cleanup(); + let mut found = None; + for state in ds.fstate.iter() { + if state.is_output_candidate() { + state.output.store(true, Ordering::Relaxed); + if let DecoderResult::Err(err) = state.result { + let id = state.get_id(); + let user_id = state.get_user_id(); + ds.unref_frame(id); + return Err((err, user_id)); + } else { + found = Some(state.pinfo.clone()); + break; + } + } + } + if let Some(ret) = found { + ds.unref_frame(ret.full_id); + Ok(ret) + } else { + unreachable!(); + } + } else { + panic!("can't grab status"); + } +} + +pub fn clear_threads(dispatch: &mut Shareable) { + /*if let Ok(ref ds) = dispatch.read() { + ds.print_state(); + }*/ + let mut to_wait = Vec::new(); + if let Ok(ref mut ds) = dispatch.write() { + while let Some(state) = ds.fstate.pop() { + if let Some(handle) = state.worker { + to_wait.push(handle); + } + } + ds.cur_threads = 0; + } else { + panic!("can't grab status"); + } + while let Some(handle) = to_wait.pop() { + let _ = handle.join(); + } +} diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs index eb32792..0a63edf 100644 --- a/nihav-itu/src/codecs/h264/mb_recon.rs +++ b/nihav-itu/src/codecs/h264/mb_recon.rs @@ -1,6 +1,8 @@ +use nihav_core::codecs::{DecoderResult, DecoderError}; use nihav_core::frame::*; -use nihav_codec_support::codecs::MV; -use super::{CurrentMBInfo, I4X4_SCAN}; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::{CurrentMBInfo, I4X4_SCAN, Shareable}; +use super::dispatch::{ThreadDispatcher, FrameDecodingStatus}; use super::dsp::*; use super::pic_ref::SliceRefs; use super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO}; @@ -606,3 +608,275 @@ pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_in add_chroma(frm, sstate, mb_info); } } + +pub fn wait_for_mb(disp: &Shareable, sstate: &SliceState, xpos: usize, ypos: usize, mv: MV, ref_id: u32) -> DecoderResult<()> { + let xpos = xpos as isize + ((mv.x >> 2) as isize) + 4; + let ypos = ypos as isize + ((mv.y >> 2) as isize) + 4; + let dst_mb_x = ((xpos.max(0) as usize) / 16).min(sstate.mb_w - 1); + let dst_mb_y = ((ypos.max(0) as usize) / 16).min(sstate.mb_h - 1); + let expected_mb = dst_mb_x + dst_mb_y * sstate.mb_w; + loop { + if let Ok(ds) = disp.read() { + match ds.check_pos(ref_id, expected_mb) { + FrameDecodingStatus::Ok => return Ok(()), + FrameDecodingStatus::NotReady => {}, + _ => return Err(DecoderError::MissingReference), + }; + } + std::thread::yield_now(); + } +} + +fn wait_b_mc(disp: &Shareable, sstate: &SliceState, frame_refs: &SliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> { + if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) { + wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[0], ref_id)?; + } + if let Some(ref_id) = frame_refs.get_ref_id(1, ref_idx[1].index()) { + wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[1], ref_id)?; + } + Ok(()) +} + +pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable) -> DecoderResult<()> { + let xpos = sstate.mb_x * 16; + let ypos = sstate.mb_y * 16; + + match mb_info.mb_type { + MBType::Intra16x16(_, _, _) => { + pred_intra(frm, sstate, mb_info); + }, + MBType::Intra4x4 | MBType::Intra8x8 => { + pred_intra(frm, sstate, mb_info); + }, + MBType::PCM => {}, + MBType::PSkip => { + let mv = sstate.get_cur_blk4(0).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, 0) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, 0); + let weight = &slice_hdr.get_weight(0, 0); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); + }, + MBType::P16x16 => { + let mv = sstate.get_cur_blk4(0).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); + }, + MBType::P16x8 | MBType::P8x16 => { + let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 { + (16, 8, 0, 8) + } else { + (8, 16, 8, 0) + }; + let mv = sstate.get_cur_blk4(0).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) { + wait_for_mb(disp, sstate, xpos + bw, ypos + bh, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[1].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index()); + do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight, mc_dsp); + }, + MBType::P8x8 | MBType::P8x8Ref0 => { + for part in 0..4 { + let bx = (part & 1) * 8; + let by = (part & 2) * 4; + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index()); + + match mb_info.sub_mb_type[part] { + SubMBType::P8x8 => { + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp); + }, + SubMBType::P8x4 => { + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 4, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp); + }, + SubMBType::P4x8 => { + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 4, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); + }, + SubMBType::P4x4 => { + for sb_no in 0..4 { + let sxpos = xpos + bx + (sb_no & 1) * 4; + let sypos = ypos + by + (sb_no & 2) * 2; + let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; + let mv = sstate.get_cur_blk4(sblk_no).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, sxpos + 4, sypos + 4, mv, ref_id)?; + } + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight, mc_dsp); + } + }, + _ => unreachable!(), + }; + } + }, + MBType::B16x16(mode) => { + let mv0 = sstate.get_cur_blk4(0).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let mv1 = sstate.get_cur_blk4(0).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]); + wait_b_mc(disp, sstate, frame_refs, [mv0, mv1], [mb_info.ref_l0[0], mb_info.ref_l1[0]], xpos, ypos, 16, 16)?; + do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); + }, + MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { + let (pw, ph) = mb_info.mb_type.size(); + let (px, py) = (pw & 8, ph & 8); + let modes = [mode0, mode1]; + let (mut bx, mut by) = (0, 0); + for part in 0..2 { + let blk = if part == 0 { 0 } else { (px / 4) + py }; + let mv0 = sstate.get_cur_blk4(blk).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let mv1 = sstate.get_cur_blk4(blk).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]); + wait_b_mc(disp, sstate, frame_refs, [mv0, mv1], [mb_info.ref_l0[part], mb_info.ref_l1[part]], xpos + bx, ypos + by, pw, ph)?; + do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); + bx += px; + by += py; + } + }, + MBType::Direct | MBType::BSkip => { + if let Some(ref_id) = frame_refs.get_ref_id(1, mb_info.ref_l1[0].index()) { + wait_for_mb(disp, sstate, xpos, ypos, ZERO_MV, ref_id)?; + } + let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type; + let is_16x16 = colo_mb_type.is_16x16_ref(); + + if is_16x16 { + let mv = sstate.get_cur_blk4(0).mv; + let ref_idx = sstate.get_cur_blk8(0).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[0], ref_id)?; + } + if let Some(ref_id) = frame_refs.get_ref_id(1, mb_info.ref_l1[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[1], ref_id)?; + } + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos, ypos, 16, 16)?; + do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + } else { + for blk4 in 0..16 { + let mv = sstate.get_cur_blk4(blk4).mv; + let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx; + if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[0], ref_id)?; + } + if let Some(ref_id) = frame_refs.get_ref_id(1, ref_idx[1].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[1], ref_id)?; + } + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4)?; + do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + } + } + sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); }); + }, + MBType::B8x8 => { + for part in 0..4 { + let ridx = sstate.get_cur_blk8(part).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index()); + let subtype = mb_info.sub_mb_type[part]; + let blk8 = (part & 1) * 2 + (part & 2) * 4; + let mut bx = (part & 1) * 8; + let mut by = (part & 2) * 4; + match subtype { + SubMBType::Direct8x8 => { + for blk in 0..4 { + let mv = sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv; + let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos + bx, ypos + by, 4, 4)?; + do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + bx += 4; + if blk == 1 { + bx -= 8; + by += 4; + } + } + sstate.get_cur_blk8(part).ref_idx[0].set_direct(); + sstate.get_cur_blk8(part).ref_idx[1].set_direct(); + }, + SubMBType::B8x8(mode) => { + let mv = sstate.get_cur_blk4(blk8).mv; + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 8, 8)?; + do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + }, + SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + let (pw, ph) = subtype.size(); + let mv = sstate.get_cur_blk4(blk8).mv; + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, pw, ph)?; + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); + let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); + let mv = sstate.get_cur_blk4(addr2).mv; + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph)?; + do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + }, + SubMBType::B4x4(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + for i in 0..4 { + let addr2 = blk8 + (i & 1) + (i & 2) * 2; + let mv = sstate.get_cur_blk4(addr2).mv; + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 4, 4)?; + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, mc_dsp); + bx += 4; + if i == 1 { + bx -= 8; + by += 4; + } + } + }, + _ => unreachable!(), + }; + } + }, + }; + if !mb_info.mb_type.is_skip() { + if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + add_luma(frm, sstate, mb_info); + } + add_chroma(frm, sstate, mb_info); + } + Ok(()) +} diff --git a/nihav-itu/src/codecs/h264/mod.rs b/nihav-itu/src/codecs/h264/mod.rs index c657109..a867b6e 100644 --- a/nihav-itu/src/codecs/h264/mod.rs +++ b/nihav-itu/src/codecs/h264/mod.rs @@ -4,12 +4,16 @@ * not fully correct deblock strength selection for P/B-macroblocks * scaling lists for 4x4 blocks */ +use std::sync::{Arc, RwLock}; + use nihav_core::codecs::*; use nihav_core::io::byteio::*; use nihav_core::io::bitreader::*; use nihav_core::io::intcode::*; use nihav_codec_support::codecs::{MV, ZERO_MV}; +pub type Shareable = Arc>; + mod types; pub use types::*; mod pic_ref; @@ -37,6 +41,9 @@ use slice::*; mod decoder_st; pub use decoder_st::*; +mod dispatch; +mod decoder_mt; +pub use decoder_mt::*; trait ReadUE { fn read_ue(&mut self) -> DecoderResult; @@ -200,15 +207,16 @@ const DECODER_OPTIONS: &[NAOptionDefinition] = &[ #[cfg(test)] mod test { - use nihav_core::codecs::RegisteredDecoders; + use nihav_core::codecs::*; use nihav_core::demuxers::RegisteredDemuxers; use nihav_codec_support::test::dec_video::*; - use crate::itu_register_all_decoders; + use crate::*; use nihav_commonfmt::generic_register_all_demuxers; // samples if not specified otherwise come from H.264 conformance suite mod raw_demux; mod conformance; + mod conformance_mt; use self::raw_demux::RawH264DemuxerCreator; #[test] @@ -346,6 +354,66 @@ mod test { [0x26078d38, 0xf6a59d57, 0xcd14eaf8, 0x8eb08259], [0x31494337, 0x6f8d3f52, 0x4bc9ff92, 0x0c601b1c]])); } + #[test] + fn test_h264_mt_perframe() { + let mut dmx_reg = RegisteredDemuxers::new(); + dmx_reg.add_demuxer(&RawH264DemuxerCreator{}); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredMTDecoders::new(); + itu_register_all_mt_decoders(&mut dec_reg); + + test_mt_decoding("rawh264", "h264", + "assets/ITU/h264-conformance/CABAST3_Sony_E.jsv", + None, &dmx_reg, &dec_reg, ExpectedTestResult::MD5Frames(vec![ + [0xb5e5e368, 0x6ac59bfc, 0x82e35b7b, 0xbed17b81], + [0x940c38bc, 0x559fb990, 0x2b82a7ca, 0x3543188a], + [0x60d7544d, 0x2fc8cc23, 0x4acac90f, 0x44c2a91c], + [0x8343b34d, 0x0de80ae9, 0xe9c08cc9, 0x05161d82], + [0xaed8e194, 0xa24b3a8a, 0xbed9085d, 0x05d68293], + [0x1cddffac, 0x0ce9d209, 0xc4090b8a, 0xc3008856], + [0x26e08b9b, 0x84949759, 0x71622124, 0x9bfff254], + [0x19a70aa8, 0xd8bc987d, 0x51c04849, 0x71191523], + [0x74532da6, 0xecb92919, 0xd39cb150, 0x9ca9933d], + [0x68d86265, 0x15fc15b9, 0xe4946d83, 0x39d9584d], + [0xce209363, 0xf8d8331f, 0x72e0102f, 0x88de3a97], + [0xdbcfa40a, 0x7eed5940, 0xa5c53a66, 0xdfcd3cea], + [0x42ee0e5e, 0x4c1c3b64, 0xd91cc00b, 0x88be4b15], + [0x7673f569, 0xfccfb96a, 0x1f614c82, 0xf62ea376], + [0x8669d98b, 0x9fdf4e7d, 0xa4083a7f, 0x9b66d296], + [0x0444b315, 0x2ddfb91a, 0x1e21ce06, 0x0c8613e6], + [0xbde82067, 0x6cf23a0c, 0xdd29e64d, 0xcaa72ff3], + [0xcfcb544a, 0x1f1a81b0, 0x2217108c, 0x4888d5ef], + [0x00796b14, 0x58f16117, 0xb6a5efd1, 0xfb129acd], + [0x253a1f45, 0x85954311, 0x983dbabe, 0x658f4ce3], + [0xec97b332, 0xa17b26d0, 0xbead22af, 0xa6bd7d8e], + [0xf0537976, 0x924229ab, 0xd0f4612f, 0xad4b614e], + [0x5673d973, 0x78528036, 0xabfe5e13, 0xdcedfb26], + [0xd6110fa9, 0x532d6a30, 0xb7f0aa7c, 0xae7b544b], + [0x3369f874, 0x6a6dde75, 0x46d64780, 0xbf6ced32]])); + } + // a sample downloaded from gfycat.com + #[test] + fn test_h264_mt_real2() { + let mut dmx_reg = RegisteredDemuxers::new(); + dmx_reg.add_demuxer(&RawH264DemuxerCreator{}); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredMTDecoders::new(); + itu_register_all_mt_decoders(&mut dec_reg); + test_mt_decoding("mov", "h264", "assets/ITU/DimpledSpanishCuckoo-mobile.mp4", + Some(10), &dmx_reg, &dec_reg, + ExpectedTestResult::MD5Frames(vec![ + [0x674c6d60, 0xc7ab918d, 0x9db1beaf, 0xda9f2456], + [0x6a935350, 0x3d463ab2, 0xa3ab3c53, 0x97eb896b], + [0xa3d829e3, 0xb404dd32, 0x11983613, 0xbdf10ee6], + [0xc87afeaa, 0x79899908, 0x152e6320, 0xe689827f], + [0x2440ea01, 0x5b9d7fc7, 0x4fa5632b, 0xd2d76090], + [0xf6c60411, 0x19ea2c49, 0x3512371a, 0xce6cb26a], + [0x50b9fd9a, 0x64393126, 0xd03162ec, 0xfb54172a], + [0xd80e8bf9, 0xe9190ab7, 0x2be8fa38, 0xb94182e8], + [0x26078d38, 0xf6a59d57, 0xcd14eaf8, 0x8eb08259], + [0x80d1f58f, 0x12e454c0, 0x2140ca5c, 0xe19350ba], + [0x31494337, 0x6f8d3f52, 0x4bc9ff92, 0x0c601b1c]])); + } } pub const I4X4_SCAN: [(u8, u8); 16] = [ diff --git a/nihav-itu/src/codecs/h264/pic_ref.rs b/nihav-itu/src/codecs/h264/pic_ref.rs index fd2ae97..a047216 100644 --- a/nihav-itu/src/codecs/h264/pic_ref.rs +++ b/nihav-itu/src/codecs/h264/pic_ref.rs @@ -1,5 +1,5 @@ use nihav_core::codecs::DecoderResult; -use nihav_core::frame::{FrameType, NAVideoBufferRef}; +use nihav_core::frame::{FrameType, NAVideoBufferRef, NATimeInfo}; use nihav_core::refs::*; use nihav_codec_support::codecs::MV; use super::sets::SeqParameterSet; @@ -10,10 +10,13 @@ use super::types::*; pub struct PictureInfo { pub id: u16, pub full_id: u32, + pub time: NATimeInfo, + pub user_id: u32, pub pic_type: FrameType, pub buf: NAVideoBufferRef, pub cur_mb: usize, pub is_ref: bool, + pub is_idr: bool, pub long_term: Option, pub mv_info: NABufferRef, @@ -54,6 +57,14 @@ pub struct SliceRefs { } impl SliceRefs { + pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| pic.full_id) + } else { + None + } + } pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option> { let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; if ref_list.len() > ref_id { @@ -171,6 +182,20 @@ impl FrameRefs { max_frame_num: 0, } } + pub fn fill_ref_nums(&self, dst: &mut Vec) { + for pic in self.ref_pics.iter() { + if !dst.contains(&pic.full_id) { + dst.push(pic.full_id); + } + } + for ltpic in self.long_term.iter() { + if let Some(ref pic) = ltpic { + if !dst.contains(&pic.full_id) { + dst.push(pic.full_id); + } + } + } + } pub fn calc_picture_num(&mut self, slice_hdr: &SliceHeader, is_idr: bool, ref_id: u8, sps: &SeqParameterSet) -> u32 { self.max_frame_num = 1 << sps.log2_max_frame_num; match sps.pic_order_cnt_type { diff --git a/nihav-itu/src/codecs/h264/test/conformance_mt.rs b/nihav-itu/src/codecs/h264/test/conformance_mt.rs new file mode 100644 index 0000000..5b9294f --- /dev/null +++ b/nihav-itu/src/codecs/h264/test/conformance_mt.rs @@ -0,0 +1,378 @@ +use nihav_core::codecs::RegisteredMTDecoders; +use nihav_core::demuxers::RegisteredDemuxers; +use nihav_codec_support::test::dec_video::*; +use nihav_commonfmt::generic_register_all_demuxers; +use crate::itu_register_all_mt_decoders; + +use super::raw_demux::RawH264DemuxerCreator; + +const PREFIX: &str = "assets/ITU/h264-conformance/"; + +fn test_files(names: &[(&str, [u32; 4])]) { + let mut dmx_reg = RegisteredDemuxers::new(); + dmx_reg.add_demuxer(&RawH264DemuxerCreator{}); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredMTDecoders::new(); + itu_register_all_mt_decoders(&mut dec_reg); + + for (name, hash) in names.iter() { + let test_name = format!("{}{}", PREFIX, name); + println!("Testing {}", test_name); + test_mt_decoding("rawh264", "h264", &test_name, None, &dmx_reg, &dec_reg, ExpectedTestResult::MD5(*hash)); + } +} + +const GENERAL_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("NL1_Sony_D.jsv", [0xD4BB8D98, 0x0C1377EE, 0x45515763, 0xAE7989FD]), + ("SVA_NL1_B.264", [0xB5626983, 0xAC087749, 0x7FFF9A4B, 0x10D2F1D4]), + ("NL2_Sony_H.jsv", [0x48D8380C, 0xDB7EFF52, 0x116C1AAD, 0xDBC583F5]), + ("SVA_NL2_E.264", [0xB47E932D, 0x43628801, 0x3B8453D9, 0xA1D0F60D]), + ("BA1_Sony_D.jsv", [0x114D1CF9, 0x4A2FCAFF, 0xDA0CF1B4, 0x9964BF3D]), + ("SVA_BA1_B.264", [0xDAB92AA2, 0x145AB44A, 0xBAB2BEB2, 0x868DD326]), + ("BA2_Sony_F.jsv", [0x124D2830, 0x7B057028, 0x12A374CF, 0xC9AAD615]), + ("SVA_BA2_D.264", [0x66130B14, 0x295574BF, 0x35B725A8, 0xEADED3AE]), + ("BA_MW_D.264", [0x7d5d351a, 0xd0616402, 0x94bf43a4, 0x3150fbca]), + ("BANM_MW_D.264", [0xe637d38e, 0xd004df35, 0x40218e3d, 0x84b43e42]), + ("BA1_FT_C.264", [0x8598CFC0, 0x6EDE33D4, 0xF24D8552, 0x28E5C8BB]), + ("NLMQ1_JVC_C.264", [0xFFCABB64, 0x192CED39, 0x90872B46, 0x70AF05EB]), + ("NLMQ2_JVC_C.264", [0x90B70FBA, 0xA5CA679E, 0xC9BF5E01, 0x1DDBA8F9]), + ("BAMQ1_JVC_C.264", [0x2F4F0B86, 0xC76F0356, 0x491CE56D, 0x6331E885]), + ("BAMQ2_JVC_C.264", [0xE3F5D5B0, 0x774B5537, 0x0745F2D0, 0x4F009575]), + ("SVA_Base_B.264", [0x97F3F1F5, 0xB4034C8A, 0xBC29EF43, 0xE752005C]), + ("SVA_FM1_E.264", [0x079F354D, 0xAC1204EE, 0x1C31DCAE, 0xD421E99C]), + ("BASQP1_Sony_C.jsv", [0x630F0900, 0x8D248A40, 0xC3E04F7E, 0x43351EC6]), + /*"FM1_BT_B.h264", + "FM2_SVA_C.264", + "FM1_FT_E.264",*/ //special slice modes + ("CI_MW_D.264", [0x0eb95292, 0xad9fc21c, 0x89d93f8b, 0x049e451a]), + ("SVA_CL1_E.264", [0x5723A151, 0x8DE9FADC, 0xA7499C5B, 0xA34DA7C4]), + ("CI1_FT_B.264", [0x16F329D0, 0x196938FD, 0xB1AB2402, 0x5B208CFD]), + ("CVFC1_Sony_C.jsv", [0x4A8F8461, 0xD42A83C5, 0x126C4E5E, 0x5B2060D6]), + ("AUD_MW_E.264", [0xe96fe505, 0x4de0329a, 0x8868d060, 0x03375cdb]), + ("MIDR_MW_D.264", [0xd87bff88, 0xb2c5b96c, 0xcb291ef6, 0x8a45bbc2]), + ("NRF_MW_E.264", [0xa8635615, 0xb50c5a16, 0xdecc555a, 0x3c6c81c8]), + ("MPS_MW_A.264", [0x88bb5a51, 0x3bd7f3cc, 0x8190c7c0, 0x3688ab22]), + ("CVBS3_Sony_C.jsv", [0xe3c16329, 0x88100491, 0xe8431c3c, 0x88ed4096]), + ("BA3_SVA_C.264", [0x7032210d, 0xc0fc4a59, 0x49a2c941, 0x6fde4c27]), + ("SL1_SVA_B.264", [0xc9d2c518, 0xca433636, 0x77e70a17, 0x213c82a2]), + ("NL3_SVA_E.264", [0x21fa010c, 0x3c4bbb63, 0x1d17c4aa, 0xecd95df1]), + ("cvmp_mot_frm0_full_B.26l", [0xcb065db3, 0xa27b4a52, 0xb31f6839, 0xa3ec590a]), + + // no direct mention + //"FM2_SVA_B.264", //special slice mode +]; +#[test] +fn test_h264_general() { + test_files(GENERAL_TEST_STREAMS); +} + +const I_PCM_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CVPCMNL1_SVA_C.264", [0x5C1FD0F6, 0x8E875200, 0x711FEBF1, 0xD683E58F]), + ("CVPCMNL2_SVA_C.264", [0xAF1F1DBE, 0x1DD6569C, 0xB02271F0, 0x53217D88]), +]; +#[test] +fn test_h264_ipcm() { + test_files(I_PCM_TEST_STREAMS); +} + +const MMCO_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("MR1_BT_A.h264", [0x617BF915, 0x48E89440, 0xC899A917, 0xC73CF171]), + ("MR2_TANDBERG_E.264", [0x69C17B20, 0xDF6E89E6, 0x82BD82F1, 0x93B6D282]), + ("MR3_TANDBERG_B.264", [0xC8AAC175, 0xE5E73C68, 0x87EE02FF, 0x6DEA0F64]), +// ("MR4_TANDBERG_C.264", [0xA40042BC, 0xAB00C341, 0xA9651725, 0x46d31A2C]), // TODO later +// ("MR5_TANDBERG_C.264", [0x999EAE2E, 0x016DB374, 0x708B00E4, 0x335AE723]), //weird self-reference, TODO later + ("MR1_MW_A.264", [0xdd56dc8e, 0x403b18ec, 0x57eb5b3a, 0xd834ffde]), + ("MR2_MW_A.264", [0xe1e93e65, 0x96af2efd, 0x0e7d0fe5, 0x94d5be85]), + /*"MR6_BT_B.h264", + "MR7_BT_B.h264", + "MR8_BT_B.h264",*/ // interlaced coding + ("HCBP1_HHI_A.264", [0x13022e79, 0x70d78f1d, 0xe4aaf1f7, 0xbd0e440b]), + ("HCBP2_HHI_A.264", [0x6c689d15, 0x41f97dcc, 0x1a17f5bd, 0xb6569cf1]), +]; +#[test] +fn test_h264_mmco() { + test_files(MMCO_TEST_STREAMS); +} + +const WP_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CVWP5_TOSHIBA_E.264", [0x9663DA55, 0xE5EF516C, 0x8BF0CA0B, 0xCC0ABBB8]), + ("CVWP1_TOSHIBA_E.264", [0xE8868CA5, 0xE934AD77, 0x9132CDB3, 0xC71BE000]), + /*("CVWP2_TOSHIBA_E.264", [0x4ef20436, 0x093acfa5, 0xba60f9cb, 0x9e9c86d2]), + ("CVWP3_TOSHIBA_E.264", [0x157a9a52, 0x63054bca, 0x0754e34d, 0xed250695]),*/ // negative P-frame POCs +]; +#[test] +fn test_h264_wp() { + test_files(WP_TEST_STREAMS); +} + +/*const FIELD_CODING_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "CVNLFI1_Sony_C.jsv", + "CVNLFI2_Sony_H.jsv", + "Sharp_MP_Field_1_B.jvt", + "Sharp_MP_Field_2_B.jvt", + "Sharp_MP_Field_3_B.jvt", + "CVFI1_Sony_D.jsv", + "CVFI2_Sony_H.jsv", + "FI1_Sony_E.jsv", + "CVFI1_SVA_C.264", + "CVFI2_SVA_C.264", + "cvmp_mot_fld0_full_B.26l", + "CVMP_MOT_FLD_L30_B.26l", +]; +#[test] +fn test_h264_field() { + test_files(FIELD_CODING_TEST_STREAMS); +}*/ + +/*const FRAME_FIELD_CODING_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "Sharp_MP_PAFF_1r2.jvt", + "CVPA1_TOSHIBA_B.264", + "cvmp_mot_picaff0_full_B.26l", +]; +#[test] +fn test_h264_frame_field() { + test_files(FRAME_FIELD_CODING_TEST_STREAMS); +}*/ + +/*const MBAFF_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "CVMANL1_TOSHIBA_B.264", + "CVMANL2_TOSHIBA_B.264", + "CVMA1_Sony_D.jsv", + "CVMA1_TOSHIBA_B.264", + "CVMAQP2_Sony_G.jsv", + "CVMAQP3_Sony_D.jsv", + "CVMAPAQP3_Sony_E.jsv", + "cvmp_mot_mbaff0_full_B.26l", + "CVMP_MOT_FRM_L31_B.26l", +]; +#[test] +fn test_h264_mbaff() { + test_files(MBAFF_CODING_TEST_STREAMS); +}*/ + +/*const S_PICTURE_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "sp1_bt_a.h264", + "sp2_bt_b.h264", +]; +#[test] +fn test_h264_s_picture() { + test_files(S_PICTURE_TEST_STREAMS); +}*/ + +const LONG_SEQUENCE_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("LS_SVA_D.264", [0x9C53BE4B, 0x1DEDCD45, 0x98D30293, 0xF01C7BFE]), +]; +#[test] +fn test_h264_long_sequence() { + test_files(LONG_SEQUENCE_TEST_STREAMS); +} + +const SEI_VUI_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CVSE2_Sony_B.jsv", [0xDD660FB4, 0x07FEB42E, 0xCD3AF06B, 0x42FDA90D]), + ("CVSE3_Sony_H.jsv", [0xcec17e7e, 0xbe686bfc, 0xf234dece, 0x41f59179]), + ("CVSEFDFT3_Sony_E.jsv", [0xF44E4059, 0xD056AA37, 0x96F384A1, 0x1C894821]), +]; +#[test] +fn test_h264_sei_vui() { + test_files(SEI_VUI_TEST_STREAMS); +} + +const CABAC_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CANL1_TOSHIBA_G.264", [0xAFA07274, 0x6B16BD96, 0xF3152B45, 0xE2F2881E]), + ("CANL1_Sony_E.jsv", [0x27F1D5D3, 0x89E110FC, 0x320788BF, 0x78006DB0]), + ("CANL2_Sony_E.jsv", [0x3A28438E, 0x3E0795DE, 0xAED795FC, 0xFEFBC833]), + ("CANL3_Sony_C.jsv", [0xD8CE9D2F, 0xAA54CE32, 0x94AD1553, 0xC440CDE9]), + ("CANL1_SVA_B.264", [0xB02DEFCB, 0x741C0E98, 0x2313C574, 0x9F2008ED]), + ("CANL2_SVA_B.264", [0xB02DEFCB, 0x741C0E98, 0x2313C574, 0x9F2008ED]), + ("CANL3_SVA_B.264", [0x04A6DE98, 0x4EF88D1B, 0x8C1B26FC, 0x8F33A425]), + ("CANL4_SVA_B.264", [0x8F50D54B, 0x809E3B13, 0xC4F25B83, 0xDAC9715E]), + ("CABA1_Sony_D.jsv", [0x24B155A4, 0x00DC10D1, 0x1D45A3AA, 0xDF61AE25]), + ("CABA2_Sony_E.jsv", [0x3731F0F1, 0xACE3AD91, 0x76093A7B, 0x46347CEA]), + ("CABA3_Sony_C.jsv", [0x28C778FD, 0xCF189AFF, 0x70095DB5, 0x2572456B]), + ("CABA3_TOSHIBA_E.264", [0x13651D01, 0xC5B533E6, 0xB7AA132B, 0xAE7669ED]), + ("CABA1_SVA_B.264", [0x2F5CABD5, 0xBB4954C0, 0x386CAFD8, 0xA9AA782A]), + ("CABA2_SVA_B.264", [0x6D4277A7, 0xFC70ED1F, 0xBE3C5F10, 0xB0A70671]), + ("CABA3_SVA_B.264", [0xA671891F, 0xACE44E55, 0x5C7CAF55, 0x94677EA8]), + ("camp_mot_frm0_full.26l", [0xA37697DB, 0x4DC220E5, 0x53E8BFCD, 0x3BA31463]), +]; +#[test] +fn test_h264_cabac() { + test_files(CABAC_TEST_STREAMS); +} + +const CABAC_INIT_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CABACI3_Sony_B.jsv", [0xB63FC9B1, 0x4CC4102C, 0xB3C09A73, 0x88E636B2]), +]; +#[test] +fn test_h264_cabac_init() { + test_files(CABAC_INIT_TEST_STREAMS); +} + +const CABAC_MB_QPTEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CAQP1_Sony_B.jsv", [0xCAC07EAA, 0xBD141764, 0xD64CF9DE, 0x0230A92E]), + ("CACQP3_Sony_D.jsv", [0xDFC2C76E, 0x559E61C0, 0xE3E29220, 0x05DC805E]), +]; +#[test] +fn test_h264_cabac_mb_qp() { + test_files(CABAC_MB_QPTEST_STREAMS); +} + +const CABAC_SLICE_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CABAST3_Sony_E.jsv", [0xB4797DBC, 0x3CD95E50, 0x2C04F2DE, 0x629C61BA]), + ("CABASTBR3_Sony_B.jsv", [0xF8081465, 0xA02CF3C3, 0xC678671A, 0xC456D62C]), +]; +#[test] +fn test_h264_cabac_slice() { + test_files(CABAC_SLICE_TEST_STREAMS); +} + +const CABAC_I_PCM_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CAPCMNL1_Sand_E.264", [0xEE9968EE, 0xEFE935F0, 0x45C6B70B, 0xE51691EB]), + ("CAPCM1_Sand_E.264", [0xCA073CA1, 0x06E70D5C, 0xD51F6748, 0x5846A5B1]), + ("CAPM3_Sony_D.jsv", [0x9ECC3BF5, 0xFF7CAC9A, 0x068A5BA5, 0x7BC87CB7]), +]; +#[test] +fn test_h264_cabac_ipcm() { + test_files(CABAC_I_PCM_TEST_STREAMS); +} + +const CABAC_MMCO_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + /*"MR9_BT_B.h264",*/ //MBAFF + ("HCMP1_HHI_A.264", [0xF1550F70, 0x6762E865, 0x29FE9204, 0x7981C250]), +]; +#[test] +fn test_h264_cabac_mmco() { + test_files(CABAC_MMCO_TEST_STREAMS); +} + +const CABAC_WP_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("CAWP1_TOSHIBA_E.264", [0x87946607, 0xD1D774C2, 0xDA8EC863, 0x2710C84A]), + ("CAWP5_TOSHIBA_E.264", [0x9663DA55, 0xE5EF516C, 0x8BF0CA0B, 0xCC0ABBB8]), +]; +#[test] +fn test_h264_cabac_wp() { + test_files(CABAC_WP_TEST_STREAMS); +} + +/*const CABAC_FIELD_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "CABREF3_Sand_D.264", + "CAFI1_SVA_C.264", + "camp_mot_fld0_full.26l", +]; +#[test] +fn test_h264_cabac_field_() { + test_files(CABAC_FIELD_TEST_STREAMS); +}*/ + +/*const CABAC_FIELD_FRAME_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "Sharp_MP_PAFF_2.jvt", + "CAPA1_TOSHIBA_B.264", + "camp_mot_picaff0_full.26l", +]; +#[test] +fn test_h264_cabac_field_frame() { + test_files(CABAC_FIELD_FRAMETEST_STREAMS); +}*/ + +/*const CABAC_MBAFF_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "CAMANL1_TOSHIBA_B.264", + "CAMANL2_TOSHIBA_B.264", + "CANLMA2_Sony_C.jsv", + "CANLMA3_Sony_C.jsv", + "CAMA1_Sony_C.jsv", + "CAMA1_TOSHIBA_B.264", + "CAMANL3_Sand_E.264", + "CAMA3_Sand_E.264", + "CAMASL3_Sony_B.jsv", + "CAMACI3_Sony_C.jsv", + "camp_mot_mbaff0_full.26l", + "CAMP_MOT_MBAFF_L30.26l", + "CAMP_MOT_MBAFF_L31.26l", + "CAPAMA3_Sand_F.264", + "cama1_vtc_c.avc", + "cama2_vtc_b.avc", + "cama3_vtc_b.avc", +]; +#[test] +fn test_h264_cabac_mbaff() { + test_files(CABAC_MBAFF_TEST_STREAMS); +}*/ + +/*const CABAC_CAVLC_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "CVCANLMA2_Sony_C.jsv", +]; +#[test] +fn test_h264_cabac_cavlc() { + test_files(CABAC_CAVLC_TEST_STREAMS); +}*/ // contains MBAFF + +const CABAC_PRED_BW_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("src19td.IBP.264", [0xEE593F70, 0x57480500, 0xCE7D8768, 0xF1AA0E41]), +]; +#[test] +fn test_h264_cabac_pred_bw() { + test_files(CABAC_PRED_BW_TEST_STREAMS); +} + +const FREXT_420_8_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + ("FRext/FRExt1_Panasonic.avc", [0x224897db, 0xcb44b3a4, 0x09f779f1, 0x4ed4af76]), + ("FRext/FRExt3_Panasonic.avc", [0xC6AB33FD, 0xCC18BC98, 0x7FBF8B2C, 0xD240036D]), + ("FRext/HCAFR1_HHI.264", [0x662BB873, 0x3085753A, 0xC5E0D55E, 0x1D1A4A09]), + //("FRext/HCAFF1_HHI.264", [0;4]), //PAFF + //("FRext/HCAMFF1_HHI.264", [0;4]), //MBAFF + //("FRext/FRExt2_Panasonic.avc", [0;4]), //PAFF + //("FRext/FRExt4_Panasonic.avc", [0;4]), //MBAFF + ("FRext/HPCANL_BRCM_C.264", [0xB1660F91, 0xE5047ADA, 0xAE5204A5, 0x309D57B8]), + ("FRext/HPCA_BRCM_C.264", [0x7F14A1E8, 0x39AD8B19, 0xAE5B8E17, 0x6E3989A3]), + + /*("FRext/HPCAFLNL_BRCM_C.264", [0;4]), //PAFF + ("FRext/HPCAFL_BRCM_C.264", [0;4]),*/ + ("FRext/HCAFR2_HHI.264", [0x63D67A2A, 0x105325E9, 0x20DB4882, 0x3BCA5E0B]), + ("FRext/HCAFR3_HHI.264", [0xead8442e, 0xc7c92029, 0xb4308393, 0x04429e08]), + ("FRext/HCAFR4_HHI.264", [0xe3c8636e, 0x4a39d44b, 0x37c008be, 0x055f023f]), + ("FRext/HPCADQ_BRCM_B.264", [0xbc418315, 0x190b9fbc, 0xf26b2b67, 0x74ec9e0c]), + ("FRext/HPCALQ_BRCM_B.264", [0xbc418315, 0x190b9fbc, 0xf26b2b67, 0x74ec9e0c]), + //("FRext/HPCAMAPALQ_BRCM_B.264", [0;4]), //MBAFF + ("FRext/HPCV_BRCM_A.264", [0x2c898d3b, 0xd5a0ce47, 0x59056977, 0x0efa615c]), + ("FRext/HPCVNL_BRCM_A.264", [0x5c03fbee, 0x3197c054, 0xd9bb8998, 0xc7ad74c0]), + /*("FRext/HPCVFL_BRCM_A.264", [0;4]), //PAFF + ("FRext/HPCVFLNL_BRCM_A.264", [0;4]),*/ + //("FRext/HPCVMOLQ_BRCM_B.264", [0;4]), //grayscale + //("FRext/HPCAMOLQ_BRCM_B.264", [0;4]), //grayscale + ("FRext/HPCAQ2LQ_BRCM_B.264", [0x0548d695, 0x187a2dd9, 0x4019c881, 0xd50c37fe]), + ("FRext/Freh1_B.264", [0xdcbbcad3, 0xe236a00b, 0xe1634ab4, 0x10e18346]), + ("FRext/Freh2_B.264", [0x016d6d3f, 0xe4592072, 0x28352500, 0xd2997d1b]), + ("FRext/freh3.264", [0x1ec34cc7, 0x284a8778, 0x1a6fa64b, 0x71788926]), + //("FRext/freh4.264", [0;4]), //PAFF + //("FRext/freh5.264", [0;4]), //MBAFF + //("FRext/freh6.264", [0;4]), //PAFF + //("FRext/Freh7_B.264", [0;4]), //PAFF + ("FRext/freh8.264", [0x0be92564, 0x2ad3dbf6, 0xda89d9b6, 0xeebe66e3]), + ("FRext/freh9.264", [0xec630029, 0x953c309d, 0xa8813a35, 0x027fae05]), + //("FRext/freh10.264", [0;4]), //PAFF + //("FRext/freh11.264", [0;4]), //PAFF + ("FRext/Freh12_B.264", [0xa78649ab, 0x5d909a25, 0xf24e2ac6, 0xf6381467]), + /*("FRext/FREXT01_JVC_D.264", [0;4]), //MBAFF + ("FRext/FREXT02_JVC_C.264", [0;4]),*/ + ("FRext/FRExt_MMCO4_Sony_B.264", [0x47be1aa9, 0x61b2cc22, 0x83e55893, 0x696693b5]), + + ("FRext/test8b43.264", [0x921c816c, 0x14170ead, 0x03c19c5c, 0x9ed3c0a4]), +]; +#[test] +fn test_h264_frext_420_8() { + test_files(FREXT_420_8_TEST_STREAMS); +} + +/*const FREXT_420_10I_TEST_STREAMS: &[(&str, [u32; 4])] = &[ + "FRext/PPH10I1_Panasonic_A.264", + "FRext/PPH10I2_Panasonic_A.264", + "FRext/PPH10I3_Panasonic_A.264", + "FRext/PPH10I4_Panasonic_A.264", + "FRext/PPH10I5_Panasonic_A.264", + "FRext/PPH10I6_Panasonic_A.264", + "FRext/PPH10I7_Panasonic_A.264", +]; +#[test] +fn test_h264_frext_420_10i() { + test_files(FREXT_420_10I_TEST_STREAMS); +}*/ diff --git a/nihav-itu/src/codecs/mod.rs b/nihav-itu/src/codecs/mod.rs index f0ccc23..a3873bf 100644 --- a/nihav-itu/src/codecs/mod.rs +++ b/nihav-itu/src/codecs/mod.rs @@ -26,3 +26,15 @@ pub fn itu_register_all_decoders(rd: &mut RegisteredDecoders) { rd.add_decoder(*decoder); } } + +const ITU_MT_CODECS: &[MTDecoderInfo] = &[ +#[cfg(feature="decoder_h264")] + MTDecoderInfo { name: "h264", get_decoder: h264::get_decoder_mt }, +]; + +/// Registers all available multi-threaded decoders provided by this crate. +pub fn itu_register_all_mt_decoders(rd: &mut RegisteredMTDecoders) { + for decoder in ITU_MT_CODECS.iter() { + rd.add_decoder(*decoder); + } +} diff --git a/nihav-itu/src/lib.rs b/nihav-itu/src/lib.rs index 0bf247a..608d3f3 100644 --- a/nihav-itu/src/lib.rs +++ b/nihav-itu/src/lib.rs @@ -8,6 +8,7 @@ extern crate nihav_codec_support; #[allow(clippy::useless_let_if_seq)] mod codecs; pub use crate::codecs::itu_register_all_decoders; +pub use crate::codecs::itu_register_all_mt_decoders; #[cfg(test)] extern crate nihav_commonfmt;