From 6d3942ad8de35fd052cc9b071b5a4afbfc330868 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Sat, 7 Mar 2026 15:57:09 +0100 Subject: [PATCH] h264: implement 10-bit decoder It can easily support other bitdepths but currently there's no need for that. --- nihav-itu/src/codecs/h264/high/cabac.rs | 890 +++++++++++++ nihav-itu/src/codecs/h264/high/cavlc.rs | 680 ++++++++++ nihav-itu/src/codecs/h264/high/decoder_mt.rs | 1001 ++++++++++++++ nihav-itu/src/codecs/h264/high/decoder_st.rs | 961 +++++++++++++ nihav-itu/src/codecs/h264/high/dispatch.rs | 317 +++++ .../src/codecs/h264/high/dsp/mc/debug.rs | 267 ++++ nihav-itu/src/codecs/h264/high/dsp/mc/mod.rs | 393 ++++++ .../src/codecs/h264/high/dsp/mc/release.rs | 339 +++++ nihav-itu/src/codecs/h264/high/dsp/mod.rs | 1186 +++++++++++++++++ nihav-itu/src/codecs/h264/high/loopfilter.rs | 55 + nihav-itu/src/codecs/h264/high/mb_recon.rs | 939 +++++++++++++ nihav-itu/src/codecs/h264/high/mod.rs | 28 + nihav-itu/src/codecs/h264/high/pic_ref.rs | 655 +++++++++ nihav-itu/src/codecs/h264/high/types.rs | 283 ++++ nihav-itu/src/codecs/h264/mod.rs | 214 ++- 15 files changed, 8195 insertions(+), 13 deletions(-) create mode 100644 nihav-itu/src/codecs/h264/high/cabac.rs create mode 100644 nihav-itu/src/codecs/h264/high/cavlc.rs create mode 100644 nihav-itu/src/codecs/h264/high/decoder_mt.rs create mode 100644 nihav-itu/src/codecs/h264/high/decoder_st.rs create mode 100644 nihav-itu/src/codecs/h264/high/dispatch.rs create mode 100644 nihav-itu/src/codecs/h264/high/dsp/mc/debug.rs create mode 100644 nihav-itu/src/codecs/h264/high/dsp/mc/mod.rs create mode 100644 nihav-itu/src/codecs/h264/high/dsp/mc/release.rs create mode 100644 nihav-itu/src/codecs/h264/high/dsp/mod.rs create mode 100644 nihav-itu/src/codecs/h264/high/loopfilter.rs create mode 100644 nihav-itu/src/codecs/h264/high/mb_recon.rs create mode 100644 nihav-itu/src/codecs/h264/high/mod.rs create mode 100644 nihav-itu/src/codecs/h264/high/pic_ref.rs create mode 100644 nihav-itu/src/codecs/h264/high/types.rs diff --git a/nihav-itu/src/codecs/h264/high/cabac.rs b/nihav-itu/src/codecs/h264/high/cabac.rs new file mode 100644 index 0000000..a4105b7 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/cabac.rs @@ -0,0 +1,890 @@ +//use nihav_core::codecs::{DecoderResult, DecoderError}; +use nihav_codec_support::codecs::MV; + +use super::super::*; +use super::*; +use super::super::cabac_coder::*; +use super::dsp::{CHROMA_DC_SCAN, ZIGZAG, ZIGZAG1, ZIGZAG8X8}; +use super::super::slice::SliceHeader; + +pub fn cabac_decode_mbskip(cabac: &mut CABAC, sstate: &SliceState, slice_hdr: &SliceHeader) -> bool { + let skip_idx = if slice_hdr.slice_type.is_p() { 11 } else { 24 }; + let mut mb_skip_ctx = 0; + let left_mbt = sstate.get_left_mb().mb_type; + let top_mbt = sstate.get_top_mb().mb_type; + if left_mbt != CompactMBType::None && !left_mbt.is_skip() { + mb_skip_ctx += 1; + } + if top_mbt != CompactMBType::None && !top_mbt.is_skip() { + mb_skip_ctx += 1; + } + if !slice_hdr.slice_type.is_intra() { + cabac.decode_bit(skip_idx + mb_skip_ctx) + } else { + false + } +} + +fn decode_i_type(cabac: &mut CABAC, start: usize, ctx: usize) -> MBType { + if !cabac.decode_bit(start + ctx) { + MBType::Intra4x4 + } else if !cabac.decode_terminate() { + let cbpy = if cabac.decode_bit(start + 3) { 0xF } else { 0x0 }; + let cbpc = cabac.decode_012(start + 4); + let ipred = cabac.decode_bits(start + 6, start + 7, 2); + + MBType::Intra16x16(ipred, cbpy, cbpc) + } else { + MBType::PCM + } +} + +fn decode_i_type_inter(cabac: &mut CABAC, start: usize) -> MBType { + if !cabac.decode_bit(start) { + MBType::Intra4x4 + } else if !cabac.decode_terminate() { + let cbpy = if cabac.decode_bit(start + 1) { 0xF } else { 0x0 }; + let cbpc = if !cabac.decode_bit(start + 2) { + 0 + } else if !cabac.decode_bit(start + 2) { + 1 + } else { + 2 + }; + let ipred = cabac.decode_bits(start + 3, start + 3, 2); + + MBType::Intra16x16(ipred, cbpy, cbpc) + } else { + MBType::PCM + } +} + +fn remap_si_mbtype(mbtype: MBType) -> MBType { + match mbtype { + MBType::Intra16x16(0, 0, 0) => MBType::Intra4x4, + MBType::Intra16x16(imode, cbpy, cbpc) => { + let idx = imode + if cbpy != 0 { 12 } else { 0 } + cbpc * 4 - 1; + let nimode = idx & 3; + let (ncbpy, ncbpc) = if (idx >> 2) >= 3 { + (0xF, (idx >> 2) - 3) + } else { + (0x0, idx >> 2) + }; + MBType::Intra16x16(nimode, ncbpy, ncbpc) + }, + MBType::PCM => MBType::Intra16x16(3, 1, 2), + _ => mbtype, + } +} + +pub fn cabac_decode_mb_type(cabac: &mut CABAC, slice_hdr: &SliceHeader, sstate: &SliceState) -> MBType { + match slice_hdr.slice_type { + SliceType::I | SliceType::SI => { + let mut ctx = 0; + if sstate.get_left_mb().mb_type.is_intra16orpcm() { + ctx += 1; + } + if sstate.get_top_mb().mb_type.is_intra16orpcm() { + ctx += 1; + } + let mbtype = decode_i_type(cabac, 3, ctx); + if slice_hdr.slice_type == SliceType::I { + mbtype + } else { + remap_si_mbtype(mbtype) + } + }, + SliceType::P | SliceType::SP => { + if cabac.decode_bit(14) { + decode_i_type_inter(cabac, 17) + } else if !cabac.decode_bit(15) { + if !cabac.decode_bit(16) { + MBType::P16x16 + } else { + MBType::P8x8 + } + } else { + if !cabac.decode_bit(17) { + MBType::P8x16 + } else { + MBType::P16x8 + } + } + }, + SliceType::B => { + let mut ctx = 0; + if !sstate.get_left_mb().mb_type.is_direct() { + ctx += 1; + } + if !sstate.get_top_mb().mb_type.is_direct() { + ctx += 1; + } + if !cabac.decode_bit(27 + ctx) { + MBType::Direct + } else if !cabac.decode_bit(30) { + if !cabac.decode_bit(32) { + MBType::B16x16(BMode::L0) + } else { + MBType::B16x16(BMode::L1) + } + } else { + let idx = cabac.decode_bits(31, 32, 4); + match idx { + 0x0 => MBType::B16x16(BMode::Bi), + 0x1 => MBType::B16x8(BMode::L0, BMode::L0), + 0x2 => MBType::B8x16(BMode::L0, BMode::L0), + 0x3 => MBType::B16x8(BMode::L1, BMode::L1), + 0x4 => MBType::B8x16(BMode::L1, BMode::L1), + 0x5 => MBType::B16x8(BMode::L0, BMode::L1), + 0x6 => MBType::B8x16(BMode::L0, BMode::L1), + 0x7 => MBType::B16x8(BMode::L1, BMode::L0), + 0xE => MBType::B8x16(BMode::L1, BMode::L0), + 0xF => MBType::B8x8, + 0xD => decode_i_type_inter(cabac, 32), + _ => { + let idx = (idx - 8) * 2 + (cabac.decode_bit(32) as u8); + match idx { + 0 => MBType::B16x8(BMode::L0, BMode::Bi), + 1 => MBType::B8x16(BMode::L0, BMode::Bi), + 2 => MBType::B16x8(BMode::L1, BMode::Bi), + 3 => MBType::B8x16(BMode::L1, BMode::Bi), + 4 => MBType::B16x8(BMode::Bi, BMode::L0), + 5 => MBType::B8x16(BMode::Bi, BMode::L0), + 6 => MBType::B16x8(BMode::Bi, BMode::L1), + 7 => MBType::B8x16(BMode::Bi, BMode::L1), + 8 => MBType::B16x8(BMode::Bi, BMode::Bi), + _ => MBType::B8x16(BMode::Bi, BMode::Bi), + } + }, + } + } + }, + } +} + +fn decode_sub_mb_type_cabac(cabac: &mut CABAC, slice_hdr: &SliceHeader) -> SubMBType { + match slice_hdr.slice_type { + SliceType::P | SliceType::SP => { + if cabac.decode_bit(21) { + SubMBType::P8x8 + } else if !cabac.decode_bit(22) { + SubMBType::P8x4 + } else if cabac.decode_bit(23) { + SubMBType::P4x8 + } else { + SubMBType::P4x4 + } + }, + SliceType::B => { + if !cabac.decode_bit(36) { + SubMBType::Direct8x8 + } else if !cabac.decode_bit(37) { + if !cabac.decode_bit(39) { + SubMBType::B8x8(BMode::L0) + } else { + SubMBType::B8x8(BMode::L1) + } + } else { + let idx = cabac.decode_bits(38, 39, 3); + match idx { + 0 => SubMBType::B8x8(BMode::Bi), + 1 => SubMBType::B8x4(BMode::L0), + 2 => SubMBType::B4x8(BMode::L0), + 3 => SubMBType::B8x4(BMode::L1), + 6 => SubMBType::B4x4(BMode::L1), + 7 => SubMBType::B4x4(BMode::Bi), + _ => { + let idx = (idx - 4) * 2 + (cabac.decode_bit(39) as u8); + match idx { + 0 => SubMBType::B4x8(BMode::L1), + 1 => SubMBType::B8x4(BMode::Bi), + 2 => SubMBType::B4x8(BMode::Bi), + _ => SubMBType::B4x4(BMode::L0), + } + }, + } + } + }, + _ => unreachable!(), + } +} + +fn decode_ref_idx(cabac: &mut CABAC, num_refs: usize, ctx: usize) -> PicRef { + if num_refs == 1 { + return ZERO_REF; + } + if !cabac.decode_bit(54 + ctx) { + ZERO_REF + } else if !cabac.decode_bit(54 + 4) { + PicRef::new(1) + } else { + let mut idx = 2; + while cabac.decode_bit(54 + 5) && idx < 32 { + idx += 1; + } + if idx < num_refs { + PicRef::new(idx as u8) + } else { + INVALID_REF + } + } +} + +fn decode_mv_component(cabac: &mut CABAC, base: usize, ctx: usize) -> i16 { + if !cabac.decode_bit(base + ctx) { + 0 + } else { + let mut val = 1; + while val < 9 && cabac.decode_bit(base + (2 + val).min(6)) { + val += 1; + } + if val >= 9 { + let mut pfx = 3; + while pfx < 16 && cabac.decode_bypass() { + val += 1 << pfx; + pfx += 1; + } + val += cabac.decode_bypass_bits(pfx) as usize; + } + if val == 0 || !cabac.decode_bypass() { + val as i16 + } else { + -(val as i16) + } + } +} + +fn decode_mv(cabac: &mut CABAC, ctx0: usize, ctx1: usize) -> MV { + let x = decode_mv_component(cabac, 40, ctx0); + let y = decode_mv_component(cabac, 47, ctx1); + MV{ x, y } +} + +#[allow(clippy::cognitive_complexity)] +pub fn decode_mb_pred_cabac(cabac: &mut CABAC, slice_hdr: &SliceHeader, mb_type: MBType, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo) { + mb_info.mb_type = mb_type; + let num_l0 = slice_hdr.num_ref_idx_l0_active; + let num_l1 = slice_hdr.num_ref_idx_l1_active; + sstate.reset_mb_mv(); + match mb_type { + MBType::Intra4x4 => { + for &(x, y) in I4X4_SCAN.iter() { + let x = x as usize; + let y = y as usize; + let top_pred = sstate.get_top_blk4(x + y * 4).ipred; + let left_pred = sstate.get_left_blk4(x + y * 4).ipred; + + let top_idx = top_pred.into_pred_idx(); + let left_idx = left_pred.into_pred_idx(); + let pred_mode = top_idx.min(left_idx); + let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 }; + + if !cabac.decode_bit(68) { + let m0 = cabac.decode_bit(69) as u8; + let m1 = cabac.decode_bit(69) as u8; + let m2 = cabac.decode_bit(69) as u8; + let new_mode = (m2 << 2) | (m1 << 1) | m0; + pred_mode = if new_mode >= pred_mode { + new_mode + 1 + } else { new_mode }; + } + mb_info.ipred[x + y * 4] = pred_mode.into(); + sstate.get_cur_blk4(x + y * 4).ipred = pred_mode.into(); + } + let mut ctx = 0; + if sstate.get_left_mb().cmode != 0 { + ctx += 1; + } + if sstate.get_top_mb().cmode != 0 { + ctx += 1; + } + mb_info.chroma_ipred = if !cabac.decode_bit(64 + ctx) { + 0 + } else if !cabac.decode_bit(67) { + 1 + } else if !cabac.decode_bit(67) { + 2 + } else { + 3 + }; + }, + MBType::Intra8x8 => { + for part in 0..4 { + let blk4 = (part & 1) * 2 + (part & 2) * 4; + let top_pred = sstate.get_top_blk4(blk4).ipred; + let left_pred = sstate.get_left_blk4(blk4).ipred; + + let top_idx = top_pred.into_pred_idx(); + let left_idx = left_pred.into_pred_idx(); + let pred_mode = top_idx.min(left_idx); + let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 }; + if !cabac.decode_bit(68) { + let m0 = cabac.decode_bit(69) as u8; + let m1 = cabac.decode_bit(69) as u8; + let m2 = cabac.decode_bit(69) as u8; + let new_mode = (m2 << 2) | (m1 << 1) | m0; + pred_mode = if new_mode >= pred_mode { + new_mode + 1 + } else { new_mode }; + } + mb_info.ipred[blk4] = pred_mode.into(); + mb_info.ipred[blk4 + 1] = pred_mode.into(); + mb_info.ipred[blk4 + 4] = pred_mode.into(); + mb_info.ipred[blk4 + 5] = pred_mode.into(); + sstate.get_cur_blk4(blk4).ipred = pred_mode.into(); + sstate.get_cur_blk4(blk4 + 1).ipred = pred_mode.into(); + sstate.get_cur_blk4(blk4 + 4).ipred = pred_mode.into(); + sstate.get_cur_blk4(blk4 + 5).ipred = pred_mode.into(); + } + let mut ctx = 0; + if sstate.get_left_mb().cmode != 0 { + ctx += 1; + } + if sstate.get_top_mb().cmode != 0 { + ctx += 1; + } + mb_info.chroma_ipred = if !cabac.decode_bit(64 + ctx) { + 0 + } else if !cabac.decode_bit(67) { + 1 + } else if !cabac.decode_bit(67) { + 2 + } else { + 3 + }; + }, + MBType::Intra16x16(_ipred, _, _) => { + let mut ctx = 0; + if sstate.get_left_mb().cmode != 0 { + ctx += 1; + } + if sstate.get_top_mb().cmode != 0 { + ctx += 1; + } + mb_info.chroma_ipred = if !cabac.decode_bit(64 + ctx) { + 0 + } else if !cabac.decode_bit(67) { + 1 + } else if !cabac.decode_bit(67) { + 2 + } else { + 3 + }; + }, + MBType::P16x16 | MBType::P16x8 | MBType::P8x16 => { + let num_subparts = mb_type.num_parts(); + let (pw, ph) = mb_type.size(); + let mut xoff = 0; + let mut yoff = 0; + for i in 0..num_subparts { + let ctx = sstate.get_mv_ref_ctx(xoff, yoff, 0); + let ref_idx = decode_ref_idx(cabac, num_l0, ctx); + mb_info.ref_l0[i] = ref_idx; + sstate.fill_ref(xoff, yoff, pw, ph, 0, ref_idx); + xoff += pw; + if xoff == 16 { + xoff = 0; + yoff += ph; + } + } + let mut xoff = 0; + let mut yoff = 0; + for i in 0..num_subparts { + let (ctx0, ctx1) = sstate.get_mv_ctx(xoff, yoff, 0); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l0[i] = mv; + sstate.fill_mvd(xoff, yoff, pw, ph, 0, mv); + xoff += pw; + if xoff == 16 { + xoff = 0; + yoff += ph; + } + } + }, + MBType::B16x16(mode) => { + if mode != BMode::L1 { + let ctx = sstate.get_mv_ref_ctx(0, 0, 0); + let ref_idx = decode_ref_idx(cabac, num_l0, ctx); + mb_info.ref_l0[0] = ref_idx; + sstate.fill_ref(0, 0, 16, 16, 0, ref_idx); + } + if mode != BMode::L0 { + let ctx = sstate.get_mv_ref_ctx(0, 0, 1); + let ref_idx = decode_ref_idx(cabac, num_l1, ctx); + mb_info.ref_l1[0] = ref_idx; + sstate.fill_ref(0, 0, 16, 16, 1, ref_idx); + } + if mode != BMode::L1 { + let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 0); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l0[0] = mv; + sstate.fill_mvd(0, 0, 16, 16, 0, mv); + } + if mode != BMode::L0 { + let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 1); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l1[0] = mv; + sstate.fill_mvd(0, 0, 16, 16, 1, mv); + } + }, + MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { + let (pw, ph) = mb_info.mb_type.size(); + let (px, py) = (pw & 8, ph & 8); + if mode0 != BMode::L1 { + let ctx = sstate.get_mv_ref_ctx(0, 0, 0); + let ref_idx = decode_ref_idx(cabac, num_l0, ctx); + mb_info.ref_l0[0] = ref_idx; + sstate.fill_ref(0, 0, pw, ph, 0, ref_idx); + } + if mode1 != BMode::L1 { + let ctx = sstate.get_mv_ref_ctx(pw & 8, ph & 8, 0); + let ref_idx = decode_ref_idx(cabac, num_l0, ctx); + mb_info.ref_l0[1] = ref_idx; + sstate.fill_ref(px, py, pw, ph, 0, ref_idx); + } + if mode0 != BMode::L0 { + let ctx = sstate.get_mv_ref_ctx(0, 0, 1); + let ref_idx = decode_ref_idx(cabac, num_l1, ctx); + mb_info.ref_l1[0] = ref_idx; + sstate.fill_ref(0, 0, pw, ph, 1, ref_idx); + } + if mode1 != BMode::L0 { + let ctx = sstate.get_mv_ref_ctx(pw & 8, ph & 8, 1); + let ref_idx = decode_ref_idx(cabac, num_l1, ctx); + mb_info.ref_l1[1] = ref_idx; + sstate.fill_ref(px, py, pw, ph, 1, ref_idx); + } + if mode0 != BMode::L1 { + let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 0); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l0[0] = mv; + sstate.fill_mvd(0, 0, pw, ph, 0, mv); + } + if mode1 != BMode::L1 { + let (ctx0, ctx1) = sstate.get_mv_ctx(pw & 8, ph & 8, 0); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l0[1] = mv; + sstate.fill_mvd(px, py, pw, ph, 0, mv); + } + if mode0 != BMode::L0 { + let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 1); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l1[0] = mv; + sstate.fill_mvd(0, 0, pw, ph, 1, mv); + } + if mode1 != BMode::L0 { + let (ctx0, ctx1) = sstate.get_mv_ctx(pw & 8, ph & 8, 1); + let mv = decode_mv(cabac, ctx0, ctx1); + mb_info.mv_l1[1] = mv; + sstate.fill_mvd(px, py, pw, ph, 1, mv); + } + }, + MBType::P8x8 | MBType::B8x8 => { + for sub_type in mb_info.sub_mb_type.iter_mut() { + *sub_type = decode_sub_mb_type_cabac(cabac, slice_hdr); + } + let num_l = [num_l0, num_l1]; + let dst_ref = [&mut mb_info.ref_l0, &mut mb_info.ref_l1]; + for ref_l in 0..2 { + for spart in 0..4 { + let stype = mb_info.sub_mb_type[spart]; + if stype != SubMBType::Direct8x8 && ((ref_l == 0 && !stype.is_l1()) || (ref_l == 1 && !stype.is_l0())) { + let ctx = sstate.get_mv_ref_ctx((spart & 1) * 8, (spart & 2) * 4, ref_l); + let ref_idx = decode_ref_idx(cabac, num_l[ref_l], ctx); + dst_ref[ref_l][spart] = ref_idx; + sstate.get_cur_blk8(spart).ref_idx[ref_l] = ref_idx; + } + } + } + let dst_mv = [&mut mb_info.mv_l0, &mut mb_info.mv_l1]; + for ref_l in 0..2 { + for spart in 0..4 { + let stype = mb_info.sub_mb_type[spart]; + if stype == SubMBType::Direct8x8 || (ref_l == 0 && stype.is_l1()) || (ref_l == 1 && stype.is_l0()) { + continue; + } + let (pw, ph) = stype.size(); + let mut xoff = (spart & 1) * 8; + let mut yoff = (spart & 2) * 4; + let num_sub = stype.num_parts(); + let orig_x = xoff; + for i in 0..num_sub { + let (ctx0, ctx1) = sstate.get_mv_ctx(xoff, yoff, ref_l); + let mv = decode_mv(cabac, ctx0, ctx1); + dst_mv[ref_l][spart * 4 + i] = mv; + sstate.fill_mvd(xoff, yoff, pw, ph, ref_l, mv); + xoff += pw; + if xoff == orig_x + 8 { + xoff -= 8; + yoff += ph; + } + } + } + } + }, + _ => {}, + }; +} + +pub fn decode_cbp_cabac(cabac: &mut CABAC, sstate: &SliceState) -> (u8, u8) { + let mbt_a = sstate.get_left_mb().mb_type; + let mbt_b = sstate.get_top_mb().mb_type; + let left = if mbt_a == CompactMBType::None || mbt_a == CompactMBType::PCM { + 0x3F + } else if !mbt_a.is_skip() { + sstate.get_left_mb().cbp + } else { + 0 + }; + let top = if mbt_b == CompactMBType::None || mbt_b == CompactMBType::PCM { + 0x3F + } else if !mbt_b.is_skip() { + sstate.get_top_mb().cbp + } else { + 0 + }; + + let cbp_ctx = if (left & 2) != 0 { 0 } else { 1 } + if (top & 4) != 0 { 0 } else { 2 }; + let mut cbpy = cabac.decode_bit(73 + cbp_ctx) as u8; + let cbp_ctx = if cbpy != 0 { 0 } else { 1 } + if (top & 8) != 0 { 0 } else { 2 }; + cbpy |= (cabac.decode_bit(73 + cbp_ctx) as u8) << 1; + let cbp_ctx = if (left & 8) != 0 { 0 } else { 1 } + if (cbpy & 1) != 0 { 0 } else { 2 }; + cbpy |= (cabac.decode_bit(73 + cbp_ctx) as u8) << 2; + let cbp_ctx = if (cbpy & 4) != 0 { 0 } else { 1 } + if (cbpy & 2) != 0 { 0 } else { 2 }; + cbpy |= (cabac.decode_bit(73 + cbp_ctx) as u8) << 3; + + let left = if mbt_a == CompactMBType::PCM { + 0x2F + } else if mbt_a == CompactMBType::None || !mbt_a.is_skip() { + sstate.get_left_mb().cbp + } else { + 0 + }; + let top = if mbt_b == CompactMBType::PCM { + 0x2F + } else if mbt_b == CompactMBType::None || !mbt_b.is_skip() { + sstate.get_top_mb().cbp + } else { + 0 + }; + let cleft = left >> 4; + let ctop = top >> 4; + let cbp_ctx0 = if cleft != 0 { 1 } else { 0 } + if ctop != 0 { 2 } else { 0 }; + let cbp_ctx1 = if cleft == 2 { 1 } else { 0 } + if ctop == 2 { 2 } else { 0 }; + let cbpc = if !cabac.decode_bit(77 + cbp_ctx0) { + 0 + } else { + cabac.decode_bit(81 + cbp_ctx1) as u8 + 1 + }; + + (cbpy, cbpc) +} + +pub fn decode_mb_qp_delta_cabac(cabac: &mut CABAC, ctx: usize) -> i32 { + if !cabac.decode_bit(60 + ctx) { + 0 + } else if !cabac.decode_bit(62) { + 1 + } else { + let mut val = 0; + while val < 128 && cabac.decode_bit(63) { + val += 1; + } + if (val & 1) != 0 { + (val >> 1) + 2 + } else { + -(val >> 1) - 1 + } + } +} + +fn decode_block(cabac: &mut CABAC, coeffs: &mut [i16], cat: usize, ctx_off: usize) -> bool { + const CTX_BASE: [(usize, usize); 5] = [ + (0, 0), (15, 10), (29, 20), (44, 30), (47, 39) + ]; + let (flag_off, coef_off) = CTX_BASE[cat]; + let scan: &[usize] = match coeffs.len() { + 4 => &CHROMA_DC_SCAN, + 15 => &ZIGZAG1, + 16 => &ZIGZAG, + _ => unreachable!(), + }; + + let coded_block_flag = cabac.decode_bit(85 + ctx_off); + let mut coded = [false; 16]; + if coded_block_flag { + let mut last_idx = coeffs.len() - 1; + for i in 0..coeffs.len() - 1 { + coded[i] = cabac.decode_bit(105 + flag_off + i); // or 277 for interlaced + if coded[i] { + let last = cabac.decode_bit(166 + flag_off + i); // or 338 for interlaced + if last { + last_idx = i; + break; + } + } + } + coded[last_idx] = true; + let mut coef_ctx = 0; + for i in (0..=last_idx).rev() { + if coded[i] { + let zero_ctx = if coef_ctx < 4 { coef_ctx + 1 } else { 0 }; + coeffs[scan[i]] = if !cabac.decode_bit(227 + coef_off + zero_ctx) { + if coef_ctx < 3 { + coef_ctx += 1; + } + 1 + } else { + let cur_ctx = 227 + coef_off + (coef_ctx + 2).max(5); + coef_ctx = (coef_ctx + 1).clamp(4, 7); + + let mut coef = 2; + while coef < 15 && cabac.decode_bit(cur_ctx) { + coef += 1; + } + if coef == 15 { + let mut pfx = 0; + while pfx < 15 && cabac.decode_bypass() { + pfx += 1; + } + let mut tail = 1; + for _ in 0..pfx { + tail = (tail << 1) + (cabac.decode_bypass() as i16); + } + coef + tail - 1 + } else { + coef + } + }; + if cabac.decode_bypass() { + coeffs[scan[i]] = -coeffs[scan[i]]; + } + } + } + } + coded_block_flag +} + +fn decode_block8x8(cabac: &mut CABAC, coeffs: &mut [i16; 64], _cat: usize) { + const SIG_FLAG_MAP: [usize; 63] = [ + 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9, 10, 9, 8, 7, + 7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, 11, + 12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11, 14, 10, 12 + ]; + const LAST_SIG_FLAG_MAP: [usize; 63] = [ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 + ]; + let (flag_off, coef_off) = (0, 0); + let scan = &ZIGZAG8X8; + + let mut coded = [false; 64]; + let mut last_idx = coeffs.len() - 1; + for i in 0..coeffs.len() - 1 { + coded[i] = cabac.decode_bit(402 + flag_off + SIG_FLAG_MAP[i]); + if coded[i] { + let last = cabac.decode_bit(417 + flag_off + LAST_SIG_FLAG_MAP[i]); + if last { + last_idx = i; + break; + } + } + } + coded[last_idx] = true; + let mut coef_ctx = 0; + for i in (0..=last_idx).rev() { + if coded[i] { + let zero_ctx = if coef_ctx < 4 { coef_ctx + 1 } else { 0 }; + coeffs[scan[i]] = if !cabac.decode_bit(426 + coef_off + zero_ctx) { + if coef_ctx < 3 { + coef_ctx += 1; + } + 1 + } else { + let cur_ctx = 426 + coef_off + (coef_ctx + 2).max(5); + coef_ctx = (coef_ctx + 1).clamp(4, 7); + + let mut coef = 2; + while coef < 15 && cabac.decode_bit(cur_ctx) { + coef += 1; + } + if coef == 15 { + let mut pfx = 0; + while pfx < 15 && cabac.decode_bypass() { + pfx += 1; + } + let mut tail = 1; + for _ in 0..pfx { + tail = (tail << 1) + (cabac.decode_bypass() as i16); + } + coef + tail - 1 + } else { + coef + } + }; + if cabac.decode_bypass() { + coeffs[scan[i]] = -coeffs[scan[i]]; + } + } + } +} + +fn derive_ctx_off(sstate: &mut SliceState, cat: usize, blk_no: usize) -> usize { + let mbt = sstate.get_cur_mb().mb_type; + let mut mbt_a = sstate.get_left_mb().mb_type; + let mut mbt_b = sstate.get_top_mb().mb_type; + let (trans_a, trans_b, mut cond_term_a, mut cond_term_b) = match cat { + 0 => { + (mbt_a == CompactMBType::Intra16x16, + mbt_b == CompactMBType::Intra16x16, + (sstate.get_left_mb().coded_flags & 1) as usize, + (sstate.get_top_mb().coded_flags & 1) as usize) + }, + 1 | 2 => { + if (blk_no & 3) != 0 { + mbt_a = mbt; + } + if blk_no >= 4 { + mbt_b = mbt; + } + let nc_left = sstate.get_left_blk4(blk_no).ncoded; + let nc_top = sstate.get_top_blk4(blk_no).ncoded; + (nc_left != 0, + nc_top != 0, + (nc_left != 0) as usize, + (nc_top != 0) as usize) + }, + 3 => { + ((sstate.get_left_mb().cbp & 0x30) != 0, + (sstate.get_top_mb().cbp & 0x30) != 0, + ((sstate.get_left_mb().coded_flags & (1 << (blk_no + 1 + 16))) != 0) as usize, + ((sstate.get_top_mb().coded_flags & (1 << (blk_no + 1 + 16))) != 0) as usize) + }, + 4 => { + let chroma = blk_no >> 2; + if (blk_no & 1) != 0 { + mbt_a = mbt; + } + if (blk_no & 2) != 0 { + mbt_b = mbt; + } + ((blk_no & 1) != 0 || (sstate.get_left_mb().cbp & 0x20) != 0, + (blk_no & 2) != 0 || (sstate.get_top_mb().cbp & 0x20) != 0, + (sstate.get_left_blk8(blk_no & 3).ncoded_c[chroma] != 0) as usize, + (sstate.get_top_blk8(blk_no & 3).ncoded_c[chroma] != 0) as usize) + }, + _ => unreachable!(), + }; + /*let coded_no = match cat { + 0 => 0, + 1 | 2 => blk_no + 1, + 3 => 1 + 16 + blk_no, + 4 => 1 + 16 + 2 + blk_no, + _ => unreachable!(), + };*/ + + if mbt_a == CompactMBType::None && mbt.is_inter() { + cond_term_a = 0; + } + if !trans_a && mbt_a != CompactMBType::PCM { + cond_term_a = 0; + } + /*if mbt.is_intra() && pps.constrained_intra_pred && mbt_a.is_inter() && slice_partitioning { + cond_term_a = 0; + }*/ + if (mbt_a == CompactMBType::PCM) || (mbt_a == CompactMBType::None && mbt.is_intra()) { + cond_term_a = 1; + } + + if mbt_b == CompactMBType::None && mbt.is_inter() { + cond_term_b = 0; + } + if !trans_b && mbt_b != CompactMBType::PCM { + cond_term_b = 0; + } + /*if mbt.is_intra() && pps.constrained_intra_pred && mbt_b.is_inter() && slice_partitioning { + cond_term_b = 0; + }*/ + if (mbt_b == CompactMBType::PCM) || (mbt_b == CompactMBType::None && mbt.is_intra()) { + cond_term_b = 1; + } + + cat * 4 + cond_term_b * 2 + cond_term_a +} + +pub fn decode_residual_cabac(cabac: &mut CABAC, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo) { + sstate.get_cur_mb().mb_type = mb_info.mb_type.into(); + let mut coded_flags = 0; + if mb_info.mb_type.is_intra16x16() { + let off = derive_ctx_off(sstate, 0, 0); + let coded = decode_block(cabac, &mut mb_info.coeffs[24], 0, off); + mb_info.coded[24] = coded; + if coded { + coded_flags |= 1; + } + } + if !mb_info.transform_size_8x8 { + for blk8 in 0..4 { + if (mb_info.cbpy & (1 << blk8)) != 0 { + for blk4 in 0..4 { + let blk_no = (blk8 & 1) * 2 + (blk8 & 2) * 4 + (blk4 & 1) + (blk4 & 2) * 2; + let coded = if mb_info.mb_type.is_intra16x16() { + let off = derive_ctx_off(sstate, 1, blk_no); + decode_block(cabac, &mut mb_info.coeffs[blk_no][1..], 1, off) + } else { + let off = derive_ctx_off(sstate, 2, blk_no); + decode_block(cabac, &mut mb_info.coeffs[blk_no], 2, off) + }; + sstate.get_cur_blk4(blk_no).ncoded = coded as u8; + mb_info.coded[blk_no] = coded; + if coded { + coded_flags |= 1 << (1 + blk_no); + } + } + } + } + } else { + for blk8 in 0..4 { + if (mb_info.cbpy & (1 << blk8)) != 0 { + let blk4 = (blk8 & 1) * 2 + (blk8 & 2) * 4; + decode_block8x8(cabac, &mut mb_info.coeffs8x8[blk8].coeffs, 5); + coded_flags |= 0x33 << blk4; + mb_info.coded[blk4] = true; + mb_info.coded[blk4 + 1] = true; + mb_info.coded[blk4 + 4] = true; + mb_info.coded[blk4 + 5] = true; + sstate.get_cur_blk4(blk4).ncoded = 1; + sstate.get_cur_blk4(blk4 + 1).ncoded = 1; + sstate.get_cur_blk4(blk4 + 4).ncoded = 1; + sstate.get_cur_blk4(blk4 + 5).ncoded = 1; + } + } + } + for chroma in 0..2 { + if (mb_info.cbpc & 3) != 0 { + let off = derive_ctx_off(sstate, 3, chroma); + let coded = decode_block(cabac, &mut mb_info.chroma_dc[chroma], 3, off); + if coded { + coded_flags |= 1 << (16 + 1 + chroma); + } + } + } + for chroma in 0..2 { + if (mb_info.cbpc & 2) != 0 { + for blk4 in 0..4 { + let blk_no = 16 + chroma * 4 + blk4; + let off = derive_ctx_off(sstate, 4, blk_no - 16); + let coded = decode_block(cabac, &mut mb_info.coeffs[blk_no][1..], 4, off); + sstate.get_cur_blk8(blk4).ncoded_c[chroma] = coded as u8; + mb_info.coded[blk_no] = coded; + if coded { + coded_flags |= 1 << (1 + 2 + blk_no); + } + } + } + } + sstate.get_cur_mb().coded_flags = coded_flags; +} diff --git a/nihav-itu/src/codecs/h264/high/cavlc.rs b/nihav-itu/src/codecs/h264/high/cavlc.rs new file mode 100644 index 0000000..b8adedc --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/cavlc.rs @@ -0,0 +1,680 @@ +use nihav_core::codecs::{DecoderResult, DecoderError}; +use nihav_core::io::bitreader::*; +use nihav_core::io::codebook::*; +use nihav_core::io::intcode::*; +use nihav_codec_support::codecs::MV; + +use super::super::*; +use super::*; +use super::dsp::{CHROMA_DC_SCAN, ZIGZAG, ZIGZAG1}; +use super::super::slice::SliceHeader; + +fn map_i_type(idx: usize) -> MBType { + if idx == 0 { + MBType::Intra4x4 + } else if idx == 25 { + MBType::PCM + } else { + let imode = ((idx - 1) & 3) as u8; + let cbpc = ((idx - 1) / 4) as u8; + let (cbpy, cbpc) = if cbpc >= 3 { (0xF, cbpc - 3) } else { (0x0, cbpc) }; + MBType::Intra16x16(imode, cbpy, cbpc) + } +} + +const NUM_I_TYPES: usize = 26; + +const P_TYPES: [MBType; 5] = [ + MBType::P16x16, MBType::P16x8, MBType::P8x16, MBType::P8x8, MBType::P8x8Ref0 +]; + +const B_TYPES: [MBType; 23] = [ + MBType::Direct, + MBType::B16x16(BMode::L0), + MBType::B16x16(BMode::L1), + MBType::B16x16(BMode::Bi), + MBType::B16x8(BMode::L0, BMode::L0), + MBType::B8x16(BMode::L0, BMode::L0), + MBType::B16x8(BMode::L1, BMode::L1), + MBType::B8x16(BMode::L1, BMode::L1), + MBType::B16x8(BMode::L0, BMode::L1), + MBType::B8x16(BMode::L0, BMode::L1), + MBType::B16x8(BMode::L1, BMode::L0), + MBType::B8x16(BMode::L1, BMode::L0), + MBType::B16x8(BMode::L0, BMode::Bi), + MBType::B8x16(BMode::L0, BMode::Bi), + MBType::B16x8(BMode::L1, BMode::Bi), + MBType::B8x16(BMode::L1, BMode::Bi), + MBType::B16x8(BMode::Bi, BMode::L0), + MBType::B8x16(BMode::Bi, BMode::L0), + MBType::B16x8(BMode::Bi, BMode::L1), + MBType::B8x16(BMode::Bi, BMode::L1), + MBType::B16x8(BMode::Bi, BMode::Bi), + MBType::B8x16(BMode::Bi, BMode::Bi), + MBType::B8x8, +]; + +pub fn decode_mb_type_cavlc(br: &mut BitReader, slice_hdr: &SliceHeader) -> DecoderResult { + let mb_type_id = br.read_ue()? as usize; + match slice_hdr.slice_type { + SliceType::I => { + validate!(mb_type_id < NUM_I_TYPES); + Ok(map_i_type(mb_type_id)) + }, + SliceType::SI => { + validate!(mb_type_id < NUM_I_TYPES + 1); + if mb_type_id == 0 { + Ok(MBType::Intra4x4) // special SI one + } else { + Ok(map_i_type(mb_type_id - 1)) + } + }, + SliceType::P | SliceType::SP => { + validate!(mb_type_id < NUM_I_TYPES + P_TYPES.len()); + if mb_type_id < P_TYPES.len() { + Ok(P_TYPES[mb_type_id]) + } else { + Ok(map_i_type(mb_type_id - P_TYPES.len())) + } + }, + SliceType::B => { + validate!(mb_type_id < NUM_I_TYPES + B_TYPES.len()); + if mb_type_id < B_TYPES.len() { + Ok(B_TYPES[mb_type_id]) + } else { + Ok(map_i_type(mb_type_id - B_TYPES.len())) + } + }, + } +} + +fn read_refs(br: &mut BitReader, dst: &mut [PicRef], num_refs: usize) -> DecoderResult<()> { + if num_refs > 1 { + for pic_ref in dst.iter_mut() { + *pic_ref = PicRef::new(br.read_te(num_refs as u32 - 1)? as u8); + } + } else { + for pic_ref in dst.iter_mut() { + *pic_ref = ZERO_REF; + } + } + Ok(()) +} + +fn read_mvs(br: &mut BitReader, mvs: &mut [MV]) -> DecoderResult<()> { + for mv in mvs.iter_mut() { + mv.x = br.read_se()? as i16; + mv.y = br.read_se()? as i16; + } + Ok(()) +} + +#[allow(clippy::cognitive_complexity)] +pub fn decode_mb_pred_cavlc(br: &mut BitReader, slice_hdr: &SliceHeader, mb_type: MBType, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo) -> DecoderResult<()> { + mb_info.mb_type = mb_type; + let num_l0 = slice_hdr.num_ref_idx_l0_active; + let num_l1 = slice_hdr.num_ref_idx_l1_active; + match mb_type { + MBType::Intra4x4 => { + for &(x, y) in I4X4_SCAN.iter() { + let x = x as usize; + let y = y as usize; + let top_pred = sstate.get_top_blk4(x + y * 4).ipred; + let left_pred = sstate.get_left_blk4(x + y * 4).ipred; + + let top_idx = top_pred.into_pred_idx(); + let left_idx = left_pred.into_pred_idx(); + let pred_mode = top_idx.min(left_idx); + let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 }; + if !br.read_bool()? { + let new_mode = br.read(3)? as u8; + pred_mode = if new_mode >= pred_mode { + new_mode + 1 + } else { new_mode }; + } + mb_info.ipred[x + y * 4] = pred_mode.into(); + sstate.get_cur_blk4(x + y * 4).ipred = pred_mode.into(); + } + mb_info.chroma_ipred = br.read_ue_lim(3)? as u8; + }, + MBType::Intra8x8 => { + for part in 0..4 { + let blk4 = (part & 1) * 2 + (part & 2) * 4; + let top_pred = sstate.get_top_blk4(blk4).ipred; + let left_pred = sstate.get_left_blk4(blk4).ipred; + + let top_idx = top_pred.into_pred_idx(); + let left_idx = left_pred.into_pred_idx(); + let pred_mode = top_idx.min(left_idx); + let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 }; + if !br.read_bool()? { + let new_mode = br.read(3)? as u8; + pred_mode = if new_mode >= pred_mode { + new_mode + 1 + } else { new_mode }; + } + mb_info.ipred[blk4] = pred_mode.into(); + mb_info.ipred[blk4 + 1] = pred_mode.into(); + mb_info.ipred[blk4 + 4] = pred_mode.into(); + mb_info.ipred[blk4 + 5] = pred_mode.into(); + sstate.get_cur_blk4(blk4).ipred = pred_mode.into(); + sstate.get_cur_blk4(blk4 + 1).ipred = pred_mode.into(); + sstate.get_cur_blk4(blk4 + 4).ipred = pred_mode.into(); + sstate.get_cur_blk4(blk4 + 5).ipred = pred_mode.into(); + } + mb_info.chroma_ipred = br.read_ue_lim(3)? as u8; + }, + MBType::Intra16x16(_ipred, _, _) => { + sstate.fill_ipred(IntraPredMode::DC); + mb_info.chroma_ipred = br.read_ue_lim(3)? as u8; + }, + MBType::P16x16 | MBType::P16x8 | MBType::P8x16 => { + let nparts = mb_type.num_parts(); + read_refs(br, &mut mb_info.ref_l0[..nparts], num_l0)?; + read_mvs(br, &mut mb_info.mv_l0[..nparts])?; + }, + MBType::B16x16(mode) => { + if mode != BMode::L1 { + read_refs(br, &mut mb_info.ref_l0[..1], num_l0)?; + } + if mode != BMode::L0 { + read_refs(br, &mut mb_info.ref_l1[..1], num_l1)?; + } + if mode != BMode::L1 { + read_mvs(br, &mut mb_info.mv_l0[..1])?; + } + if mode != BMode::L0 { + read_mvs(br, &mut mb_info.mv_l1[..1])?; + } + }, + MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { + if num_l0 > 1 { + if mode0 != BMode::L1 { + read_refs(br, &mut mb_info.ref_l0[0..1], num_l0)?; + } + if mode1 != BMode::L1 { + read_refs(br, &mut mb_info.ref_l0[1..2], num_l0)?; + } + } + if num_l1 > 1 { + if mode0 != BMode::L0 { + read_refs(br, &mut mb_info.ref_l1[0..1], num_l1)?; + } + if mode1 != BMode::L0 { + read_refs(br, &mut mb_info.ref_l1[1..2], num_l1)?; + } + } + if mode0 != BMode::L1 { + read_mvs(br, &mut mb_info.mv_l0[0..1])?; + } + if mode1 != BMode::L1 { + read_mvs(br, &mut mb_info.mv_l0[1..2])?; + } + if mode0 != BMode::L0 { + read_mvs(br, &mut mb_info.mv_l1[0..1])?; + } + if mode1 != BMode::L0 { + read_mvs(br, &mut mb_info.mv_l1[1..2])?; + } + }, + MBType::P8x8 | MBType::P8x8Ref0 | MBType::B8x8 => { + for sub_mb in mb_info.sub_mb_type.iter_mut() { + *sub_mb = decode_sub_mb_type(br, mb_type != MBType::B8x8)?; + } + for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() { + if num_l0 > 1 && mb_type != MBType::P8x8Ref0 && sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l1() { + read_refs(br, &mut mb_info.ref_l0[part..][..1], num_l0)?; + } + } + for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() { + if num_l1 > 1 && sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l0() { + read_refs(br, &mut mb_info.ref_l1[part..][..1], num_l1)?; + } + } + for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() { + if sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l1() { + let num_subparts = sub_mb.num_parts(); + read_mvs(br, &mut mb_info.mv_l0[part * 4..][..num_subparts])?; + } + } + for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() { + if sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l0() { + let num_subparts = sub_mb.num_parts(); + read_mvs(br, &mut mb_info.mv_l1[part * 4..][..num_subparts])?; + } + } + }, + _ => {}, + }; + Ok(()) +} + +fn decode_sub_mb_type(br: &mut BitReader, is_p: bool) -> DecoderResult { + const SUB_MB_P_TYPES: [SubMBType; 4] = [ + SubMBType::P8x8, SubMBType::P8x4, SubMBType::P4x8, SubMBType::P4x4 + ]; + const SUB_MB_B_TYPES: [SubMBType; 13] = [ + SubMBType::Direct8x8, + SubMBType::B8x8(BMode::L0), SubMBType::B8x8(BMode::L1), SubMBType::B8x8(BMode::Bi), + SubMBType::B8x4(BMode::L0), SubMBType::B4x8(BMode::L0), + SubMBType::B8x4(BMode::L1), SubMBType::B4x8(BMode::L1), + SubMBType::B8x4(BMode::Bi), SubMBType::B4x8(BMode::Bi), + SubMBType::B4x4(BMode::L0), SubMBType::B4x4(BMode::L1), SubMBType::B4x4(BMode::Bi), + ]; + if is_p { + let idx = br.read_ue_lim(SUB_MB_P_TYPES.len() as u32 - 1)? as usize; + Ok(SUB_MB_P_TYPES[idx]) + } else { + let idx = br.read_ue_lim(SUB_MB_B_TYPES.len() as u32 - 1)? as usize; + Ok(SUB_MB_B_TYPES[idx]) + } +} + +fn map_coeff_token(val: u8) -> (usize, usize) { + const TRAILING_ONES: [u8; 6] = [ 0, 0, 1, 0, 1, 2 ]; + const TOTAL_COEFF: [u8; 6] = [0, 1, 1, 2, 2, 2]; + + if val < 6 { + (TRAILING_ONES[val as usize] as usize, TOTAL_COEFF[val as usize] as usize) + } else { + (((val - 6) & 3) as usize, ((val + 6) >> 2) as usize) + } +} + +fn decode_coeffs(br: &mut BitReader, coeffs: &mut [i16], scan: &[usize], cb: &Codebook, tables: &CAVLCTables) -> DecoderResult { + let coeff_token = br.read_cb(cb)?; + let (trail_ones, total_coeff) = map_coeff_token(coeff_token); + let mut level = [0i16; 16]; + let mut run = [0u8; 16]; + if total_coeff > 0 { + let mut suffix_length = (total_coeff > 10 && trail_ones < 3) as u8; + for i in 0..total_coeff { + if i < trail_ones { + if !br.read_bool()? { + level[i] = 1; + } else { + level[i] = -1; + } + } else { + let level_prefix = br.read_code(UintCodeType::UnaryZeroes)?; + validate!(level_prefix <= 19); + let mut level_code = level_prefix.min(15) << suffix_length; + if suffix_length > 0 || level_prefix >= 14 { + let level_suffix_size = if level_prefix == 14 && suffix_length == 0 { + 4 + } else if level_prefix >= 15 { + (level_prefix - 3) as u8 + } else { + suffix_length + }; + let level_suffix = br.read(level_suffix_size)?; + level_code += level_suffix; + } + if level_prefix >= 15 && suffix_length == 0 { + level_code += 15; + } + if level_prefix >= 16 { + level_code += (1 << (level_prefix - 3)) - 4096; + } + if i == trail_ones && trail_ones < 3 { + level_code += 2; + } + level[i] = if (level_code & 1) == 0 { + (level_code as i32 + 2) >> 1 + } else { + -((level_code as i32 + 1) >> 1) + } as i16; + if suffix_length == 0 { + suffix_length = 1; + } + if level[i].abs() > (3 << (suffix_length - 1)) && suffix_length < 6 { + suffix_length += 1; + } + } + } + let mut zeros_left = if total_coeff < coeffs.len() { + let cb = if coeffs.len() > 4 { + &tables.total_zeros_cb[total_coeff - 1] + } else { + &tables.cdc_total_zeros_cb[total_coeff - 1] + }; + br.read_cb(cb)? + } else { 0 }; + for i in 0..total_coeff - 1 { + if zeros_left > 0 { + let run_before = br.read_cb(&tables.run_before_cb[(zeros_left - 1).min(6) as usize])?; + run[i] = run_before; + zeros_left -= run_before; + } + } + run[total_coeff - 1] = zeros_left; + let mut idx = 0; + for i in (0..total_coeff).rev() { + idx += run[i] as usize; + coeffs[scan[idx]] = level[i]; + idx += 1; + } + } + Ok(total_coeff as u8) +} + +fn decode_block(br: &mut BitReader, coeffs: &mut [i16; 16], cb: &Codebook, tables: &CAVLCTables) -> DecoderResult { + decode_coeffs(br, coeffs, &ZIGZAG, cb, tables) +} + +fn decode_block_ac(br: &mut BitReader, coeffs: &mut [i16; 16], cb: &Codebook, tables: &CAVLCTables) -> DecoderResult { + decode_coeffs(br, &mut coeffs[1..], &ZIGZAG1, cb, tables) +} + +fn decode_chroma_dc(br: &mut BitReader, coeffs: &mut [i16; 4], cb: &Codebook, tables: &CAVLCTables) -> DecoderResult { + decode_coeffs(br, coeffs, &CHROMA_DC_SCAN, cb, tables) +} + +fn get_cb_idx(nc: u8) -> usize { + match nc { + 0 | 1 => 0, + 2 | 3 => 1, + 4..=7 => 2, + _ => 3, + } +} + +pub fn decode_residual_cavlc(br: &mut BitReader, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo, tables: &CAVLCTables) -> DecoderResult<()> { + if mb_info.mb_type.is_intra16x16() { + let mut top_nc = sstate.get_top_blk4(0).ncoded; + let mut left_nc = sstate.get_left_blk4(0).ncoded; + if !sstate.has_left { + left_nc = top_nc; + } else if !sstate.has_top { + top_nc = left_nc; + } + let cb_idx = get_cb_idx((left_nc + top_nc + 1) >> 1); + + let nc = decode_block(br, &mut mb_info.coeffs[24], &tables.coeff_token_cb[cb_idx], tables)?; + mb_info.coded[24] = nc != 0; + } + for blk8 in 0..4 { + if (mb_info.cbpy & (1 << blk8)) != 0 { + for blk4 in 0..4 { + let bx = (blk8 & 1) * 2 + (blk4 & 1); + let by = ((blk8 & 2) * 2 + (blk4 & 2)) >> 1; + let blk_no = bx + by * 4; + + let mut top_nc = sstate.get_top_blk4(blk_no).ncoded; + let mut left_nc = sstate.get_left_blk4(blk_no).ncoded; + if bx == 0 && !sstate.has_left { + left_nc = top_nc; + } else if by == 0 && !sstate.has_top { + top_nc = left_nc; + } + let cb_idx = get_cb_idx((left_nc + top_nc + 1) >> 1); + + let nc = if mb_info.mb_type.is_intra16x16() { + decode_block_ac(br, &mut mb_info.coeffs[blk_no], &tables.coeff_token_cb[cb_idx], tables)? + } else { + decode_block(br, &mut mb_info.coeffs[blk_no], &tables.coeff_token_cb[cb_idx], tables)? + }; + sstate.get_cur_blk4(blk_no).ncoded = nc; + mb_info.coded[blk_no] = nc != 0; + } + } + } + if mb_info.transform_size_8x8 { + for y in 0..2 { + for x in 0..2 { + let b0 = &mb_info.coeffs[x + y * 8]; + let b1 = &mb_info.coeffs[x + 1 + y * 8]; + let b2 = &mb_info.coeffs[x + 4 + y * 8]; + let b3 = &mb_info.coeffs[x + 5 + y * 8]; + let dst = &mut mb_info.coeffs8x8[x + y * 2].coeffs; + for (dst, (s0, s1)) in dst.chunks_mut(8).zip(b0.chunks(4).zip(b1.chunks(4))) { + let (d0, d1) = dst.split_at_mut(4); + d0.copy_from_slice(s0); + d1.copy_from_slice(s1); + } + for (dst, (s0, s1)) in dst.chunks_mut(8).skip(4).zip(b2.chunks(4).zip(b3.chunks(4))) { + let (d0, d1) = dst.split_at_mut(4); + d0.copy_from_slice(s0); + d1.copy_from_slice(s1); + } + } + } + } + for chroma in 0..2 { + if (mb_info.cbpc & 3) != 0 { + decode_chroma_dc(br, &mut mb_info.chroma_dc[chroma], &tables.cdc_coeff_token_cb, tables)?; + } + } + for chroma in 0..2 { + if (mb_info.cbpc & 2) != 0 { + for blk4 in 0..4 { + let blk_no = 16 + chroma * 4 + blk4; + let bx = blk4 & 1; + let by = blk4 >> 1; + + let mut top_nc = sstate.get_top_blk8(blk4).ncoded_c[chroma]; + let mut left_nc = sstate.get_left_blk8(blk4).ncoded_c[chroma]; + if bx == 0 && !sstate.has_left { + left_nc = top_nc; + } else if by == 0 && !sstate.has_top { + top_nc = left_nc; + } + let cb_idx = get_cb_idx((left_nc + top_nc + 1) >> 1); + + let nc = decode_block_ac(br, &mut mb_info.coeffs[blk_no], &tables.coeff_token_cb[cb_idx], tables)?; + sstate.get_cur_blk8(blk4).ncoded_c[chroma] = nc; + mb_info.coded[blk_no] = nc != 0; + } + } + } + + Ok(()) +} + +pub struct CAVLCTables { + coeff_token_cb: [Codebook; 4], + cdc_coeff_token_cb: Codebook, + total_zeros_cb: [Codebook; 15], + cdc_total_zeros_cb: [Codebook; 3], + run_before_cb: [Codebook; 7], +} + +fn map_idx(idx: usize) -> u8 { idx as u8 } + +macro_rules! create_cb { + ($bits: expr, $lens: expr) => {{ + let mut reader = TableCodebookDescReader::new($bits, $lens, map_idx); + Codebook::new(&mut reader, CodebookMode::MSB).unwrap() + }} +} + +impl CAVLCTables { + pub fn new() -> Self { + /*let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[0], &COEFF_TOKEN_LENS[0], map_idx); + let coef_tok_cb0 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap(); + let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[1], &COEFF_TOKEN_LENS[1], map_idx); + let coef_tok_cb1 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap(); + let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[2], &COEFF_TOKEN_LENS[2], map_idx); + let coef_tok_cb2 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap(); + let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[3], &COEFF_TOKEN_LENS[3], map_idx); + let coef_tok_cb3 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap(); + + let mut reader = TableCodebookDescReader::new(&CHROMA_DC_COEFF_TOKEN_BITS, &CHROMA_DC_COEFF_TOKEN_LENS, map_idx); + let cdc_coeff_token_cb = Codebook::new(&mut reader, CodebookMode::MSB).unwrap();*/ + + let coef_tok_cb0 = create_cb!(&COEFF_TOKEN_BITS[0], &COEFF_TOKEN_LENS[0]); + let coef_tok_cb1 = create_cb!(&COEFF_TOKEN_BITS[1], &COEFF_TOKEN_LENS[1]); + let coef_tok_cb2 = create_cb!(&COEFF_TOKEN_BITS[2], &COEFF_TOKEN_LENS[2]); + let coef_tok_cb3 = create_cb!(&COEFF_TOKEN_BITS[3], &COEFF_TOKEN_LENS[3]); + + let cdc_coeff_token_cb = create_cb!(&CHROMA_DC_COEFF_TOKEN_BITS, &CHROMA_DC_COEFF_TOKEN_LENS); + + let total_zeros0 = create_cb!(&TOTAL_ZERO_BITS[ 0], &TOTAL_ZERO_LENS[ 0]); + let total_zeros1 = create_cb!(&TOTAL_ZERO_BITS[ 1], &TOTAL_ZERO_LENS[ 1]); + let total_zeros2 = create_cb!(&TOTAL_ZERO_BITS[ 2], &TOTAL_ZERO_LENS[ 2]); + let total_zeros3 = create_cb!(&TOTAL_ZERO_BITS[ 3], &TOTAL_ZERO_LENS[ 3]); + let total_zeros4 = create_cb!(&TOTAL_ZERO_BITS[ 4], &TOTAL_ZERO_LENS[ 4]); + let total_zeros5 = create_cb!(&TOTAL_ZERO_BITS[ 5], &TOTAL_ZERO_LENS[ 5]); + let total_zeros6 = create_cb!(&TOTAL_ZERO_BITS[ 6], &TOTAL_ZERO_LENS[ 6]); + let total_zeros7 = create_cb!(&TOTAL_ZERO_BITS[ 7], &TOTAL_ZERO_LENS[ 7]); + let total_zeros8 = create_cb!(&TOTAL_ZERO_BITS[ 8], &TOTAL_ZERO_LENS[ 8]); + let total_zeros9 = create_cb!(&TOTAL_ZERO_BITS[ 9], &TOTAL_ZERO_LENS[ 9]); + let total_zeros10 = create_cb!(&TOTAL_ZERO_BITS[10], &TOTAL_ZERO_LENS[10]); + let total_zeros11 = create_cb!(&TOTAL_ZERO_BITS[11], &TOTAL_ZERO_LENS[11]); + let total_zeros12 = create_cb!(&TOTAL_ZERO_BITS[12], &TOTAL_ZERO_LENS[12]); + let total_zeros13 = create_cb!(&TOTAL_ZERO_BITS[13], &TOTAL_ZERO_LENS[13]); + let total_zeros14 = create_cb!(&TOTAL_ZERO_BITS[14], &TOTAL_ZERO_LENS[14]); + + let cdc_total_zeros_cb0 = create_cb!(&CHROMA_DC_TOTAL_ZERO_BITS[0], &CHROMA_DC_TOTAL_ZERO_LENS[0]); + let cdc_total_zeros_cb1 = create_cb!(&CHROMA_DC_TOTAL_ZERO_BITS[1], &CHROMA_DC_TOTAL_ZERO_LENS[1]); + let cdc_total_zeros_cb2 = create_cb!(&CHROMA_DC_TOTAL_ZERO_BITS[2], &CHROMA_DC_TOTAL_ZERO_LENS[2]); + + let run_before_cb0 = create_cb!(&RUN_BEFORE_BITS[0], &RUN_BEFORE_LENS[0]); + let run_before_cb1 = create_cb!(&RUN_BEFORE_BITS[1], &RUN_BEFORE_LENS[1]); + let run_before_cb2 = create_cb!(&RUN_BEFORE_BITS[2], &RUN_BEFORE_LENS[2]); + let run_before_cb3 = create_cb!(&RUN_BEFORE_BITS[3], &RUN_BEFORE_LENS[3]); + let run_before_cb4 = create_cb!(&RUN_BEFORE_BITS[4], &RUN_BEFORE_LENS[4]); + let run_before_cb5 = create_cb!(&RUN_BEFORE_BITS[5], &RUN_BEFORE_LENS[5]); + let run_before_cb6 = create_cb!(&RUN_BEFORE_BITS[6], &RUN_BEFORE_LENS[6]); + + Self { + coeff_token_cb: [coef_tok_cb0, coef_tok_cb1, coef_tok_cb2, coef_tok_cb3], + cdc_coeff_token_cb, + total_zeros_cb: [total_zeros0, total_zeros1, total_zeros2, + total_zeros3, total_zeros4, total_zeros5, + total_zeros6, total_zeros7, total_zeros8, + total_zeros9, total_zeros10, total_zeros11, + total_zeros12, total_zeros13, total_zeros14 ], + cdc_total_zeros_cb: [cdc_total_zeros_cb0, cdc_total_zeros_cb1, cdc_total_zeros_cb2], + run_before_cb: [ run_before_cb0, run_before_cb1, run_before_cb2, + run_before_cb3, run_before_cb4, run_before_cb5, + run_before_cb6 ], + } + } +} + +const COEFF_TOKEN_BITS: [[u16; 62]; 4] = [ + [ + 0x01, 0x05, 0x01, 0x07, 0x04, 0x01, 0x07, 0x06, + 0x05, 0x03, 0x07, 0x06, 0x05, 0x03, 0x07, 0x06, + 0x05, 0x04, 0x0F, 0x06, 0x05, 0x04, 0x0B, 0x0E, + 0x05, 0x04, 0x08, 0x0A, 0x0D, 0x04, 0x0F, 0x0E, + 0x09, 0x04, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E, + 0x09, 0x0C, 0x0B, 0x0A, 0x0D, 0x08, 0x0F, 0x01, + 0x09, 0x0C, 0x0B, 0x0E, 0x0D, 0x08, 0x07, 0x0A, + 0x09, 0x0C, 0x04, 0x06, 0x05, 0x08 + ], [ + 0x03, 0x0B, 0x02, 0x07, 0x07, 0x03, 0x07, 0x0A, + 0x09, 0x05, 0x07, 0x06, 0x05, 0x04, 0x04, 0x06, + 0x05, 0x06, 0x07, 0x06, 0x05, 0x08, 0x0F, 0x06, + 0x05, 0x04, 0x0B, 0x0E, 0x0D, 0x04, 0x0F, 0x0A, + 0x09, 0x04, 0x0B, 0x0E, 0x0D, 0x0C, 0x08, 0x0A, + 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, + 0x09, 0x0C, 0x07, 0x0B, 0x06, 0x08, 0x09, 0x08, + 0x0A, 0x01, 0x07, 0x06, 0x05, 0x04 + ], [ + 0x0F, 0x0F, 0x0E, 0x0B, 0x0F, 0x0D, 0x08, 0x0C, + 0x0E, 0x0C, 0x0F, 0x0A, 0x0B, 0x0B, 0x0B, 0x08, + 0x09, 0x0A, 0x09, 0x0E, 0x0D, 0x09, 0x08, 0x0A, + 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0D, 0x0B, 0x0E, + 0x0A, 0x0C, 0x0F, 0x0A, 0x0D, 0x0C, 0x0B, 0x0E, + 0x09, 0x0C, 0x08, 0x0A, 0x0D, 0x08, 0x0D, 0x07, + 0x09, 0x0C, 0x09, 0x0C, 0x0B, 0x0A, 0x05, 0x08, + 0x07, 0x06, 0x01, 0x04, 0x03, 0x02 + ], [ + 0x03, 0x00, 0x01, 0x04, 0x05, 0x06, 0x08, 0x09, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, + 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F + ] +]; +const COEFF_TOKEN_LENS: [[u8; 62]; 4] = [ + [ + 1, 6, 2, 8, 6, 3, 9, 8, 7, 5, 10, 9, 8, 6, 11, 10, + 9, 7, 13, 11, 10, 8, 13, 13, 11, 9, 13, 13, 13, 10, 14, 14, + 13, 11, 14, 14, 14, 13, 15, 15, 14, 14, 15, 15, 15, 14, 16, 15, + 15, 15, 16, 16, 16, 15, 16, 16, 16, 16, 16, 16, 16, 16 + ], [ + 2, 6, 2, 6, 5, 3, 7, 6, 6, 4, 8, 6, 6, 4, 8, 7, + 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11, 11, 11, 7, 12, 11, + 11, 9, 12, 12, 12, 11, 12, 12, 12, 11, 13, 13, 13, 12, 13, 13, + 13, 13, 13, 14, 13, 13, 14, 14, 14, 13, 14, 14, 14, 14 + ], [ + 4, 6, 4, 6, 5, 4, 6, 5, 5, 4, 7, 5, 5, 4, 7, 5, + 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5, 8, 8, + 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8, 10, 9, + 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 + ], [ 6; 62 ] +]; + +const CHROMA_DC_COEFF_TOKEN_BITS: [u8; 14] = [ + 1, 7, 1, 4, 6, 1, 3, 3, 2, 5, 2, 3, 2, 0 +]; +const CHROMA_DC_COEFF_TOKEN_LENS: [u8; 14] = [ + 2, 6, 1, 6, 6, 3, 6, 7, 7, 6, 6, 8, 8, 7 +]; + +const TOTAL_ZERO_BITS: [[u8; 16]; 15] = [ + [ 1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1 ], + [ 7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0, 0 ], + [ 5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0, 0, 0 ], + [ 3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0, 0, 0, 0 ], + [ 5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, 0, 0, 0 ], + [ 1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0 ], + [ 1, 1, 5, 4, 3, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0 ], + [ 1, 1, 1, 3, 3, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 1, 0, 1, 3, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 1, 0, 1, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0, 1, 1, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] +]; +const TOTAL_ZERO_LENS: [[u8; 16]; 15] = [ + [ 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9 ], + [ 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 0 ], + [ 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6, 0, 0 ], + [ 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5, 0, 0, 0 ], + [ 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5, 0, 0, 0, 0 ], + [ 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6, 0, 0, 0, 0, 0 ], + [ 6, 5, 3, 3, 3, 2, 3, 4, 3, 6, 0, 0, 0, 0, 0, 0 ], + [ 6, 4, 5, 3, 2, 2, 3, 3, 6, 0, 0, 0, 0, 0, 0, 0 ], + [ 6, 6, 4, 2, 2, 3, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 5, 5, 3, 2, 2, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 4, 4, 3, 3, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 4, 4, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 3, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] +]; + +const CHROMA_DC_TOTAL_ZERO_BITS: [[u8; 4]; 3] = [ + [ 1, 1, 1, 0 ], [ 1, 1, 0, 0 ], [ 1, 0, 0, 0 ] +]; +const CHROMA_DC_TOTAL_ZERO_LENS: [[u8; 4]; 3] = [ + [ 1, 2, 3, 3 ], [ 1, 2, 2, 0 ], [ 1, 1, 0, 0 ] +]; + +const RUN_BEFORE_BITS: [[u8; 15]; 7] = [ + [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 3, 2, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 3, 0, 1, 3, 2, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] +]; +const RUN_BEFORE_LENS: [[u8; 15]; 7] = [ + [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 2, 2, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 2, 2, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0 ], + [ 3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] +]; diff --git a/nihav-itu/src/codecs/h264/high/decoder_mt.rs b/nihav-itu/src/codecs/h264/high/decoder_mt.rs new file mode 100644 index 0000000..978eccf --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/decoder_mt.rs @@ -0,0 +1,1001 @@ +use std::str::FromStr; + +use nihav_core::codecs::*; +use nihav_core::io::bitreader::*; + +use super::super::*; +use super::*; +use super::dispatch::*; + +pub struct FrameDecoder { + pub slices: Vec<(SliceHeader, usize, SliceRefs, Vec)>, + pub cur_pic: PictureInfo, + sps: Arc, + pps: Arc, + pub num_mbs: usize, + mc_dsp: H264MC, + dispatch: Shareable, + sstate: SliceState, + cavlc_cb: Arc, + ipcm_buf: [u8; 256 + 64 + 64], + is_mbaff: bool, + deblock_skip: bool, +} + +impl FrameDecoder { + pub fn decode_slice(&mut self, hdr: &SliceHeader, hdr_size: usize, refs: &SliceRefs, nal: &[u8]) -> DecoderResult { + self.sstate.def_fill = 1 << (self.sps.bit_depth_luma - 1); + self.sstate.reset(self.sps.pic_width_in_mbs, self.sps.pic_height_in_mbs, hdr.first_mb_in_slice); + + let mut full_size = nal.len() * 8; + for &byte in nal.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; + } + } + validate!(full_size > 0); + + let sslice_refs = SimplifiedSliceRefs::new(refs); + + let mut br = BitReader::new(&nal[hdr_size / 8..], BitReaderMode::BE); + let mut dst_pic = self.cur_pic.clone(); + let mut dst_frm = NASimpleVideoFrame::from_video_buf(&mut dst_pic.buf).unwrap(); + if !self.pps.entropy_coding_mode { + br.skip((hdr_size & 7) as u32)?; + self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, &sslice_refs, &mut dst_frm) + } else { + let csrc = &nal[(hdr_size + 7) / 8..]; + validate!(csrc.len() >= 2); + let mut cabac = CABAC::new(csrc, hdr.slice_type, hdr.slice_qp, hdr.cabac_init_idc as usize)?; + self.decode_slice_cabac(&mut cabac, hdr, &sslice_refs, &mut dst_frm) + } + } + fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame) -> DecoderResult { + const INTRA_CBP: [u8; 48] = [ + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 + ]; + const INTER_CBP: [u8; 48] = [ + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 + ]; + + let mut mb_idx = slice_hdr.first_mb_in_slice; + let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; + while br.tell() < full_size && mb_idx < self.num_mbs { + mb_info.coded = [false; 25]; + mb_info.ref_l0 = [ZERO_REF; 4]; + mb_info.ref_l1 = [ZERO_REF; 4]; + mb_info.mv_l0 = [ZERO_MV; 16]; + mb_info.mv_l1 = [ZERO_MV; 16]; + mb_info.chroma_dc = [[0; 4]; 2]; + mb_info.cbpy = 0; + mb_info.cbpc = 0; + + if !slice_hdr.slice_type.is_intra() { + let mb_skip_run = br.read_ue()? as usize; + validate!(mb_idx + mb_skip_run <= self.num_mbs); + mb_info.mb_type = skip_type; + for _ in 0..mb_skip_run { + self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?; + mb_idx += 1; + } + if mb_idx == self.num_mbs || br.tell() >= full_size { + break; + } + } + if br.tell() < full_size { + if self.is_mbaff && ((mb_idx & 1) == 0) { + let _mb_field_decoding = br.read_bool()?; + } + let mut mb_type = decode_mb_type_cavlc(br, slice_hdr)?; + mb_info.mb_type = mb_type; + mb_info.transform_size_8x8 = false; + if mb_type == MBType::PCM { + br.align(); + for pix in self.ipcm_buf[..256 + 64 + 64].iter_mut() { + *pix = br.read(8)? as u8; + } + self.sstate.fill_ncoded(16); + } else { + if self.pps.transform_8x8_mode && mb_type == MBType::Intra4x4 { + mb_info.transform_size_8x8 = br.read_bool()?; + if mb_info.transform_size_8x8 { + mb_type = MBType::Intra8x8; + mb_info.mb_type = MBType::Intra8x8; + } + } + decode_mb_pred_cavlc(br, slice_hdr, mb_type, &mut self.sstate, &mut mb_info)?; + let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type { + (cbpy, cbpc) + } else { + let cbp_id = br.read_ue()? as usize; + validate!(cbp_id < INTRA_CBP.len()); + let cbp = if mb_type == MBType::Intra4x4 || mb_type == MBType::Intra8x8 { + INTRA_CBP[cbp_id] + } else { + INTER_CBP[cbp_id] + }; + if self.pps.transform_8x8_mode && (cbp & 0xF) != 0 && mb_info.can_have_8x8_tx(self.sps.direct_8x8_inference) { + mb_info.transform_size_8x8 = br.read_bool()?; + } + ((cbp & 0xF), (cbp >> 4)) + }; + mb_info.cbpy = cbpy; + mb_info.cbpc = cbpc; + self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy; + if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() { + let mb_qp_delta = br.read_se()?; + validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25); + let new_qp = mb_qp_delta + i32::from(mb_info.qp_y); + mb_info.qp_y = if new_qp < 0 { + (new_qp + 52) as u8 + } else if new_qp >= 52 { + (new_qp - 52) as u8 + } else { + new_qp as u8 + }; + mb_info.coeffs = [[0; 16]; 25]; + if self.pps.transform_8x8_mode { + mb_info.clear_coeffs8x8(); + } + mb_info.chroma_dc = [[0; 4]; 2]; + decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?; + } + } + self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?; + } + mb_idx += 1; + if let Ok(disp) = self.dispatch.read() { + disp.update_pos(self.cur_pic.full_id, mb_idx); + } + } + Ok(mb_idx) + } + fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame) -> DecoderResult { + let mut mb_idx = slice_hdr.first_mb_in_slice; + let mut prev_mb_skipped = false; + let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; + let mut last_qp_diff = false; + + let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + + while mb_idx < self.num_mbs { + mb_info.coded = [false; 25]; + mb_info.ref_l0 = [ZERO_REF; 4]; + mb_info.ref_l1 = [ZERO_REF; 4]; + mb_info.mv_l0 = [ZERO_MV; 16]; + mb_info.mv_l1 = [ZERO_MV; 16]; + mb_info.chroma_dc = [[0; 4]; 2]; + mb_info.cbpy = 0; + mb_info.cbpc = 0; + let mb_skip = cabac_decode_mbskip(cabac, &self.sstate, slice_hdr); + if !mb_skip { + if self.is_mbaff && (((mb_idx & 1) == 0) || (prev_mb_skipped && ((mb_idx & 1) == 1))) { + let _mb_field_decoding = cabac.decode_bit(70); + } + let mut mb_type = cabac_decode_mb_type(cabac, slice_hdr, &self.sstate); + mb_info.mb_type = mb_type; + mb_info.transform_size_8x8 = false; + if mb_type == MBType::PCM { + let ipcm_size = 256 + 64 + 64; + validate!(cabac.pos + ipcm_size <= cabac.src.len()); + self.ipcm_buf[..ipcm_size].copy_from_slice(&cabac.src[cabac.pos..][..ipcm_size]); + cabac.pos += ipcm_size; + cabac.reinit()?; + last_qp_diff = false; + } else { + if self.pps.transform_8x8_mode && mb_type == MBType::Intra4x4 { + let mut ctx = 0; + if self.sstate.get_top_mb().transform_8x8 { + ctx += 1; + } + if self.sstate.get_left_mb().transform_8x8 { + ctx += 1; + } + mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); + if mb_info.transform_size_8x8 { + mb_type = MBType::Intra8x8; + mb_info.mb_type = MBType::Intra8x8; + } + } + decode_mb_pred_cabac(cabac, slice_hdr, mb_type, &mut self.sstate, &mut mb_info); + let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type { + (cbpy, cbpc) + } else { + decode_cbp_cabac(cabac, &self.sstate) + }; + if self.pps.transform_8x8_mode && cbpy != 0 && mb_info.can_have_8x8_tx(self.sps.direct_8x8_inference) { + let mut ctx = 0; + if self.sstate.get_top_mb().transform_8x8 { + ctx += 1; + } + if self.sstate.get_left_mb().transform_8x8 { + ctx += 1; + } + mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); + } + if mb_type.is_intra() { + self.sstate.get_cur_mb().cmode = mb_info.chroma_ipred; + } + mb_info.cbpy = cbpy; + mb_info.cbpc = cbpc; + self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy; + if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() { + let mb_qp_delta = decode_mb_qp_delta_cabac(cabac, last_qp_diff as usize); + validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25); + last_qp_diff = mb_qp_delta != 0; + let new_qp = mb_qp_delta + i32::from(mb_info.qp_y); + mb_info.qp_y = if new_qp < 0 { + (new_qp + 52) as u8 + } else if new_qp >= 52 { + (new_qp - 52) as u8 + } else { + new_qp as u8 + }; + mb_info.coeffs = [[0; 16]; 25]; + if self.pps.transform_8x8_mode { + mb_info.clear_coeffs8x8(); + } + mb_info.chroma_dc = [[0; 4]; 2]; + decode_residual_cabac(cabac, &mut self.sstate, &mut mb_info); + } else { + last_qp_diff = false; + } + } + } else { + mb_info.mb_type = skip_type; + mb_info.transform_size_8x8 = false; + last_qp_diff = false; + } + self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?; + prev_mb_skipped = mb_skip; + if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() { + if let Ok(disp) = self.dispatch.read() { + disp.update_pos(self.cur_pic.full_id, mb_idx + 1); + } + return Ok(mb_idx + 1); + } + mb_idx += 1; + if let Ok(disp) = self.dispatch.read() { + disp.update_pos(self.cur_pic.full_id, mb_idx); + } + } + Err(DecoderError::InvalidData) + } + #[allow(clippy::cognitive_complexity)] + fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame) -> DecoderResult<()> { + let qp_y = mb_info.qp_y; + let qpr = ((qp_y as i8) + self.pps.chroma_qp_index_offset).clamp(0, 51) as usize; + let qp_u = CHROMA_QUANTS[qpr]; + let qpb = ((qp_y as i8) + self.pps.second_chroma_qp_index_offset).clamp(0, 51) as usize; + let qp_v = CHROMA_QUANTS[qpb]; + + let tx_bypass = qp_y == 0 && self.sps.qpprime_y_zero_transform_bypass; + + self.sstate.get_cur_mb().mb_type = mb_info.mb_type.into(); + if mb_info.mb_type != MBType::PCM { + self.sstate.get_cur_mb().qp_y = qp_y; + self.sstate.get_cur_mb().qp_u = qp_u; + self.sstate.get_cur_mb().qp_v = qp_v; + self.sstate.get_cur_mb().transform_8x8 = mb_info.transform_size_8x8; + } + let has_dc = mb_info.mb_type.is_intra16x16() && mb_info.coded[24]; + let qp_y = (qp_y + 6 * (self.sps.bit_depth_luma - 8)).min(51); + let qp_u = (qp_u + 6 * (self.sps.bit_depth_chroma - 8)).min(51); + let qp_v = (qp_v + 6 * (self.sps.bit_depth_chroma - 8)).min(51); + if has_dc { + idct_luma_dc(&mut mb_info.coeffs[24], qp_y); + for i in 0..16 { + mb_info.coeffs[i][0] = mb_info.coeffs[24][i]; + } + } + if !mb_info.transform_size_8x8 { + let quant_dc = !mb_info.mb_type.is_intra16x16(); + if quant_dc { + for i in 0..16 { + if mb_info.coded[i] { + if !tx_bypass { + idct(&mut mb_info.coeffs[i], qp_y); + } + } else if has_dc { + if !tx_bypass { + idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc); + } + mb_info.coded[i] = true; + } + } + } else { + for i in 0..16 { + if mb_info.coded[i] { + if !tx_bypass { + idct_skip_dc(&mut mb_info.coeffs[i], qp_y); + } + } else if has_dc { + if !tx_bypass { + idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc); + } + mb_info.coded[i] = true; + } + } + } + } else { + for i in 0..4 { + if mb_info.coded[(i & 1) * 2 + (i & 2) * 4] && !tx_bypass { + dequant8x8(&mut mb_info.coeffs8x8[i].coeffs, &self.pps.scaling_list_8x8[!mb_info.mb_type.is_intra() as usize]); + idct8x8(&mut mb_info.coeffs8x8[i].coeffs, qp_y); + } + } + } + for chroma in 0..2 { + let qp_c = if chroma == 0 { qp_u } else { qp_v }; + if mb_info.cbpc != 0 { + chroma_dc_transform(&mut mb_info.chroma_dc[chroma], qp_c); + } + for i in 0..4 { + let blk_no = 16 + chroma * 4 + i; + mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i]; + if mb_info.coded[blk_no] { + idct_skip_dc(&mut mb_info.coeffs[blk_no], qp_c); + } else if mb_info.coeffs[blk_no][0] != 0 { + idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false); + mb_info.coded[blk_no] = true; + } + } + } + if !self.pps.entropy_coding_mode || mb_info.mb_type.is_skip() || mb_info.mb_type.is_intra() { + self.sstate.reset_mb_mv(); + } + if !mb_info.mb_type.is_intra() { + let temporal_mv = !slice_hdr.direct_spatial_mv_pred; + let cur_id = self.cur_pic.full_id as u16; + // wait for the reference macroblock MV to be available + if matches!(mb_info.mb_type, MBType::Direct | MBType::BSkip | MBType::B8x8) { + if let Some(ref_id) = refs.get_ref_id(1, mb_info.ref_l1[0].index()) { + wait_for_mb(&self.dispatch, &self.sstate, self.sstate.mb_x * 16, self.sstate.mb_y * 16, ZERO_MV, ref_id)?; + } + } + Self::pred_mv(&mut self.sstate, refs, mb_info, cur_id, temporal_mv, self.sps.direct_8x8_inference); + } + if !self.pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + self.sstate.fill_ipred(IntraPredMode::DC); + } + + let xpos = self.sstate.mb_x * 16; + let ypos = self.sstate.mb_y * 16; + if mb_info.mb_type != MBType::PCM { + let weight_mode = if self.pps.weighted_pred && slice_hdr.slice_type.is_p() { + 1 + } else if slice_hdr.slice_type.is_b() { + self.pps.weighted_bipred_idc + } else { + 0 + }; + recon_mb_mt(frm, slice_hdr, mb_info, &mut self.sstate, refs, &mut self.mc_dsp, weight_mode, &self.dispatch)?; + } else { + for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { + for (dst, &p) in dline[..16].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..16].copy_from_slice(src); + } + for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) { + for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src); + } + for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) { + for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src); + } + } + self.sstate.save_ipred_context(frm); + + let mv_info = &mut self.cur_pic.mv_info; + let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride; + let mut mb = FrameMBInfo::new(); + mb.mb_type = mb_info.mb_type.into(); + for blk4 in 0..16 { + mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv; + } + for blk8 in 0..4 { + mb.ref_poc[blk8] = refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx); + mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx; + } + mv_info.mbs[mb_pos] = mb; + + let deblock_mode = slice_hdr.disable_deblocking_filter_idc; + if !self.deblock_skip && deblock_mode != 1 { + let is_s = slice_hdr.slice_type == SliceType::SI || slice_hdr.slice_type == SliceType::SP; + self.sstate.fill_deblock(refs, deblock_mode, is_s); + let mut frm = NASimpleVideoFrame::from_video_buf(&mut self.cur_pic.buf).unwrap(); + let lf_alpha = slice_hdr.slice_alpha_c0_offset; + let lf_beta = slice_hdr.slice_beta_offset; + loop_filter_mb(&mut frm, &self.sstate, lf_alpha, lf_beta); + } + self.sstate.next_mb(); + Ok(()) + } + + fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { + let mb_type = mb_info.mb_type; + if !mb_type.is_4x4() { + let (pw, ph) = mb_type.size(); + let mut xoff = 0; + let mut yoff = 0; + if mb_type == MBType::Direct || mb_type == MBType::BSkip { + sstate.predict_direct_mb(frame_refs, temporal_mv, direct_8x8, cur_id); + } + for part in 0..mb_type.num_parts() { + if !mb_type.is_l1(part) { + match mb_type { + MBType::PSkip => sstate.predict_pskip(), + MBType::BSkip | MBType::Direct => { + }, + _ => { + sstate.predict(xoff, yoff, pw, ph, 0, + mb_info.mv_l0[part], mb_info.ref_l0[part]); + }, + }; + } + if !mb_type.is_l0(part) && mb_type != MBType::BSkip && mb_type != MBType::Direct { + sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part], mb_info.ref_l1[part]); + } + if pw != 16 { + xoff += pw; + } else { + yoff += ph; + } + } + } else { + for part in 0..4 { + let sub_type = mb_info.sub_mb_type[part]; + let mut xoff = (part & 1) * 8; + let mut yoff = (part & 2) * 4; + let orig_x = xoff; + let (pw, ph) = sub_type.size(); + for subpart in 0..sub_type.num_parts() { + if sub_type != SubMBType::Direct8x8 { + if !sub_type.is_l1() { + sstate.predict(xoff, yoff, pw, ph, 0, mb_info.mv_l0[part * 4 + subpart], mb_info.ref_l0[part]); + } + if !sub_type.is_l0() { + sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part * 4 + subpart], mb_info.ref_l1[part]); + } + } else { + for sblk in 0..4 { + sstate.predict_direct_sub(frame_refs, temporal_mv, direct_8x8, cur_id, (xoff / 4) + (sblk & 1) + (yoff / 4) * 4 + (sblk & 2) * 2); + } + } + xoff += pw; + if xoff == orig_x + 8 { + xoff -= 8; + yoff += ph; + } + } + } + } + } +} + +pub struct H264MTDecoder { + info: NACodecInfoRef, + nal_len: u8, + dispatch: Shareable, + frame_refs: FrameRefs, + skip_mode: FrameSkipMode, + sps: Vec>, + cur_sps: usize, + pps: Vec>, + cur_pps: usize, + cur_fdec: Option, + cavlc_cb: Arc, + deblock_skip: bool, + max_last_poc: u32, + poc_base: u32, + disp_w: usize, + disp_h: usize, +} + +impl H264MTDecoder { + pub fn new() -> Self { + Self { + info: NACodecInfoRef::default(), + nal_len: 0, + dispatch: Arc::new(RwLock::new(ThreadDispatcher::new())), + frame_refs: FrameRefs::new(), + skip_mode: FrameSkipMode::default(), + sps: Vec::new(), + cur_sps: 0, + pps: Vec::new(), + cur_pps: 0, + cur_fdec: None, + cavlc_cb: Arc::new(CAVLCTables::new()), + deblock_skip: false, + max_last_poc: 0, + poc_base: 0, + disp_w: 0, + disp_h: 0, + } + } + fn handle_nal(&mut self, src: Vec, supp: &mut NADecoderSupport, skip_decoding: bool, user_id: u32, time: NATimeInfo) -> DecoderResult<()> { + validate!(!src.is_empty()); + validate!((src[0] & 0x80) == 0); + let nal_ref_idc = src[0] >> 5; + let nal_unit_type = src[0] & 0x1F; + + let mut full_size = src.len() * 8; + for &byte in src.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; + } + } + validate!(full_size > 0); + match nal_unit_type { + 1 | 5 if !skip_decoding => { + let is_idr = nal_unit_type == 5; + let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); + br.skip(8)?; + + let slice_hdr = parse_slice_header(&mut br, self.sps.as_slice(), self.pps.as_slice(), is_idr, nal_ref_idc)?; + let hdr_size = br.tell(); + validate!(br.tell() < full_size); + let full_id; + if slice_hdr.first_mb_in_slice == 0 { + validate!(self.cur_fdec.is_none()); + for (i, pps) in self.pps.iter().enumerate() { + if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { + self.cur_pps = i; + break; + } + } + for (i, sps) in self.sps.iter().enumerate() { + if sps.seq_parameter_set_id == self.pps[self.cur_pps].seq_parameter_set_id { + self.cur_sps = i; + break; + } + } + + let mut cur_full_id = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, &self.sps[self.cur_sps]) + self.poc_base; + if is_idr { + if cur_full_id <= self.max_last_poc { + self.poc_base = self.max_last_poc + 2 - (cur_full_id - self.poc_base); + cur_full_id = self.max_last_poc + 2; + } + } + self.max_last_poc = self.max_last_poc.max(cur_full_id); + full_id = cur_full_id; + + let sps = &self.sps[self.cur_sps]; + if sps.chroma_format_idc != 1 || sps.bit_depth_luma != sps.bit_depth_chroma { +println!(" chroma fmt {} bits {}/{}", sps.chroma_format_idc, sps.bit_depth_luma, sps.bit_depth_chroma); + return Err(DecoderError::NotImplemented); + } + if sps.bit_depth_luma != 10 { +println!(" unsupported depth {}", sps.bit_depth_luma); + return Err(DecoderError::NotImplemented); + } + + if is_idr { + self.frame_refs.clear_refs(); + } + + let width = sps.pic_width_in_mbs << 4; + let height = sps.pic_height_in_mbs << 4; + let num_mbs = sps.pic_width_in_mbs * sps.pic_height_in_mbs; + + let mut mc_dsp = H264MC::new(); + mc_dsp.set_dimensions(width, height); + mc_dsp.set_depth(sps.bit_depth_luma); + + let is_mbaff = sps.mb_adaptive_frame_field && !slice_hdr.field_pic; + if is_mbaff { + println!("MBAFF"); + return Err(DecoderError::NotImplemented); + } + if !sps.frame_mbs_only { + println!("PAFF?"); + return Err(DecoderError::NotImplemented); + } + + let cur_vinfo = supp.pool_u16.get_info(); + let (w, h) = if ((self.disp_w + 15) & !15) == width && ((self.disp_h + 15) & !15) == height { + (self.disp_w, self.disp_h) + } else { + (width, height) + }; + let fmtstr = match sps.bit_depth_luma { + 9 => "yuv420p9", + 10 => "yuv420p10", + 11 => "yuv420p11", + 12 => "yuv420p12", + _ => return Err(DecoderError::NotImplemented), + }; + let tmp_vinfo = NAVideoInfo::new(w, h, false, NAPixelFormaton::from_str(fmtstr).unwrap()); + if cur_vinfo != Some(tmp_vinfo) { + supp.pool_u16.reset(); + supp.pool_u16.prealloc_video(tmp_vinfo, 4)?; + } + + let buf = if let Some(pic) = supp.pool_u16.get_free() { + pic + } else { + if supp.pool_u16.get_num_used() > 256 { + return Err(DecoderError::AllocError); + } + if let Ok(nbuf) = alloc_video_buffer(tmp_vinfo, 4) { + let vbuf = nbuf.get_vbuf16().unwrap(); + supp.pool_u16.add_frame(vbuf.clone()); + vbuf + } else { + return Err(DecoderError::AllocError); + } + }; + + let cur_pic = PictureInfo { + id: slice_hdr.frame_num, + full_id, user_id, time, + pic_type: slice_hdr.slice_type.to_frame_type(), + buf, + cur_mb: 0, + is_ref: nal_ref_idc != 0, + is_idr, + long_term: get_long_term_id(is_idr, &slice_hdr), + mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)), + }; + + self.cur_fdec = Some(FrameDecoder{ + slices: Vec::new(), + sstate: SliceState::new(), + ipcm_buf: [0; 256 + 64 + 64], + //width, height, + num_mbs, + sps: Arc::clone(sps), + pps: Arc::clone(&self.pps[self.cur_pps]), + dispatch: Arc::clone(&self.dispatch), + cavlc_cb: Arc::clone(&self.cavlc_cb), + mc_dsp, + cur_pic, + is_mbaff, + deblock_skip: self.deblock_skip, + }); + } else { + if let Some(ref mut fdec) = self.cur_fdec { + let new_type = slice_hdr.slice_type.to_frame_type(); + let pic = &mut fdec.cur_pic; + pic.pic_type = match (pic.pic_type, new_type) { + (FrameType::I, _) => new_type, + (_, FrameType::B) => FrameType::B, + _ => pic.pic_type, + }; + full_id = pic.full_id; + } else { + return Ok(()); + } + } + + let sps = &self.sps[self.cur_sps]; + + self.frame_refs.select_refs(sps, &slice_hdr, full_id); + + if slice_hdr.adaptive_ref_pic_marking_mode { + self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, ((1u32 << self.sps[self.cur_sps].log2_max_frame_num) - 1) as u16)?; + } + if let Some(ref mut fdec) = self.cur_fdec { + fdec.slices.push((slice_hdr, hdr_size, self.frame_refs.cur_refs.clone(), src)); + } + }, + 2 => { // slice data partition A + //slice header + //slice id = read_ue() + //cat 2 slice data (all but MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 3 => { // slice data partition B + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 3 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 4 => { // slice data partition C + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 4 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 6 => {}, //SEI + 7 => { + let sps = parse_sps(&src[1..])?; + self.sps.push(Arc::new(sps)); + }, + 8 => { + validate!(full_size >= 8 + 16); + let pps = parse_pps(&src[1..], self.sps.as_slice(), full_size - 8)?; + let mut found = false; + for stored_pps in self.pps.iter_mut() { + if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { + *stored_pps = Arc::clone(&pps); + found = true; + break; + } + } + if !found { + self.pps.push(pps); + } + }, + 9 => { // access unit delimiter + }, + 10 => {}, //end of sequence + 11 => {}, //end of stream + 12 => {}, //filler + _ => {}, + }; + + Ok(()) + } +} + +impl NADecoderMT for H264MTDecoder { + fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef, nthreads: usize) -> DecoderResult<()> { + if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { + let fmt = NAPixelFormaton::from_str("yuv420p10").unwrap(); + let myinfo = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, fmt)); + self.info = NACodecInfo::new_ref(info.get_name(), myinfo, info.get_extradata()).into_ref(); + + let edata = info.get_extradata().unwrap(); +//print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!(); + if edata.len() > 11 && &edata[0..4] == b"avcC" { + let mut br = MemoryReader::new_read(edata.as_slice()); + + br.read_skip(4)?; + let version = br.read_byte()?; + validate!(version == 1); + let profile = br.read_byte()?; + let _compatibility = br.read_byte()?; + let _level = br.read_byte()?; + let b = br.read_byte()?; + //validate!((b & 0xFC) == 0xFC); + self.nal_len = (b & 3) + 1; + let b = br.read_byte()?; + //validate!((b & 0xE0) == 0xE0); + let num_sps = (b & 0x1F) as usize; + for _ in 0..num_sps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 7); + let mut nal_buf = Vec::new(); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + self.handle_nal(nal_buf, supp, true, 0, NATimeInfo::new(None, None, None, 0, 0))?; + br.read_skip(len)?; + } + let num_pps = br.read_byte()? as usize; + for _ in 0..num_pps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 8); + let mut nal_buf = Vec::new(); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + self.handle_nal(nal_buf, supp, true, 0, NATimeInfo::new(None, None, None, 0, 0))?; + br.read_skip(len)?; + } + if br.left() > 0 { + match profile { + 100 | 110 | 122 | 144 => { + let b = br.read_byte()?; + // some encoders put something different here + if (b & 0xFC) != 0xFC { + return Ok(()); + } + // b & 3 -> chroma format + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> luma depth minus 8 + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> chroma depth minus 8 + let num_spsext = br.read_byte()? as usize; + for _ in 0..num_spsext { + let len = br.read_u16be()? as usize; + // parse spsext + br.read_skip(len)?; + } + }, + _ => {}, + }; + } + } else { + return Err(DecoderError::NotImplemented); + } + + let mut width = vinfo.get_width(); + let mut height = vinfo.get_height(); + self.disp_w = width; + self.disp_h = height; + + if (width == 0 || height == 0) && !self.sps.is_empty() { + width = self.sps[0].pic_width_in_mbs * 16; + height = self.sps[0].pic_height_in_mbs * 16; + } + + let num_bufs = if !self.sps.is_empty() { + self.sps[0].num_ref_frames + 1 + } else { + 3 + }.max(16 + 1); + if let Ok(ref mut sd) = self.dispatch.write() { + sd.max_threads = nthreads; + } else { + return Err(DecoderError::Bug); + } + supp.pool_u16.set_dec_bufs(num_bufs + nthreads); + supp.pool_u16.prealloc_video(NAVideoInfo::new(width, height, false, fmt), 4)?; + + Ok(()) + } else { + Err(DecoderError::InvalidData) + } + } + fn can_take_input(&mut self) -> bool { + if let Ok(ref sd) = self.dispatch.read() { + sd.can_decode_more() + } else { + false + } + } + fn queue_pkt(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket, user_id: u32) -> DecoderResult { + if !self.can_take_input() { + return Ok(false); + } + + let src = pkt.get_buffer(); + + let mut br = MemoryReader::new_read(&src); + let mut nal_buf = Vec::with_capacity(src.len()); + + if self.nal_len > 0 { + let mut skip_decoding = false; + if self.skip_mode != FrameSkipMode::None { + let mut pic_type = FrameType::I; + let mut is_ref = false; + while br.left() > 0 { + let size = match self.nal_len { + 1 => br.read_byte()? as usize, + 2 => br.read_u16be()? as usize, + 3 => br.read_u24be()? as usize, + 4 => br.read_u32be()? as usize, + _ => unreachable!(), + }; + validate!(br.left() >= (size as i64)); + let offset = br.tell() as usize; + let size = unescape_nal(&src[offset..][..size], &mut nal_buf); + validate!(size > 0); + let nal_ref_idc = nal_buf[0] >> 5; + let nal_unit_type = nal_buf[0] & 0x1F; + if nal_unit_type == 1 || nal_unit_type == 5 { + let mut bitr = BitReader::new(&nal_buf[1..], BitReaderMode::BE); + let (first_mb, slice_type) = parse_slice_header_minimal(&mut bitr)?; + if first_mb == 0 && nal_ref_idc != 0 { + is_ref = true; + } + let new_type = slice_type.to_frame_type(); + pic_type = match (pic_type, new_type) { + (FrameType::I, _) => new_type, + (_, FrameType::B) => FrameType::B, + _ => pic_type, + }; + } + br.read_skip(size)?; + } + match self.skip_mode { + FrameSkipMode::IntraOnly => { + skip_decoding = pic_type != FrameType::I; + }, + FrameSkipMode::KeyframesOnly => { + if !is_ref { + skip_decoding = true; + } + }, + _ => {}, + }; + br.seek(SeekFrom::Start(0))?; + } + + let mut initial_ref_frames = Vec::new(); + self.frame_refs.fill_ref_nums(&mut initial_ref_frames); + + while br.left() > 0 { + let size = match self.nal_len { + 1 => br.read_byte()? as usize, + 2 => br.read_u16be()? as usize, + 3 => br.read_u24be()? as usize, + 4 => br.read_u32be()? as usize, + _ => unreachable!(), + }; + validate!(br.left() >= (size as i64)); + let offset = br.tell() as usize; + let mut cur_nal_buf = Vec::with_capacity(size); + let _size = unescape_nal(&src[offset..][..size], &mut cur_nal_buf); + self.handle_nal(cur_nal_buf, supp, skip_decoding, user_id, pkt.ts)?; + br.read_skip(size)?; + } + let mut fdec = None; + std::mem::swap(&mut fdec, &mut self.cur_fdec); + if let Some(fdc) = fdec { + let cpic = &fdc.cur_pic; + if cpic.is_ref { + self.frame_refs.add_short_term(cpic.clone(), self.sps[self.cur_sps].num_ref_frames); + } + if let Some(lt_idx) = cpic.long_term { + self.frame_refs.add_long_term(lt_idx, cpic.clone()); + } + let mut ref_frames = Vec::new(); + self.frame_refs.fill_ref_nums(&mut ref_frames); + queue_decoding(&mut self.dispatch, fdc, &initial_ref_frames, &ref_frames); + } + } else { +//todo NAL detection + unimplemented!(); + } + Ok(true) + } + fn has_output(&mut self) -> bool { + if let Ok(ref ds) = self.dispatch.read() { + ds.has_output() + } else { + panic!("can't peek into status"); + } + } + fn get_frame(&mut self) -> (DecoderResult, u32) { + match wait_for_one(&mut self.dispatch) { + Ok(cpic) => { + let bufinfo = NABufferType::Video16(cpic.buf.clone()); + let ftype = cpic.pic_type; + let pts = Some(u64::from(cpic.full_id)); + let mut frm = NAFrame::new(cpic.time, ftype, cpic.is_idr, self.info.clone(), bufinfo); + if let (Some(mypts), None) = (pts, frm.get_pts()) { + frm.set_pts(Some(mypts)); + } + frm.set_id(cpic.user_id as i64); + (Ok(frm.into_ref()), cpic.user_id) + }, + Err((err, id)) => (Err(err), id), + } + } + fn flush(&mut self) { + clear_threads(&mut self.dispatch); + self.frame_refs.clear_refs(); + } +} + +impl NAOptionHandler for H264MTDecoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { DECODER_OPTIONS } + fn set_options(&mut self, options: &[NAOption]) { + for option in options.iter() { + for opt_def in DECODER_OPTIONS.iter() { + if opt_def.check(option).is_ok() { + match (option.name, &option.value) { + (FRAME_SKIP_OPTION, NAValue::String(ref strval)) => { + if let Ok(smode) = FrameSkipMode::from_str(strval) { + self.skip_mode = smode; + } + }, + (DEBLOCK_SKIP_OPTION, NAValue::Bool(val)) => { + self.deblock_skip = *val; + }, + _ => {}, + } + } + } + } + } + fn query_option_value(&self, name: &str) -> Option { + match name { + FRAME_SKIP_OPTION => Some(NAValue::String(self.skip_mode.to_string())), + DEBLOCK_SKIP_OPTION => Some(NAValue::Bool(self.deblock_skip)), + _ => None, + } + } +} diff --git a/nihav-itu/src/codecs/h264/high/decoder_st.rs b/nihav-itu/src/codecs/h264/high/decoder_st.rs new file mode 100644 index 0000000..b125d93 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/decoder_st.rs @@ -0,0 +1,961 @@ +use std::sync::Arc; +use std::str::FromStr; + +use nihav_core::codecs::*; +use nihav_core::io::bitreader::*; + +use super::super::*; +use super::*; + +pub struct H264Decoder { + info: NACodecInfoRef, + width: usize, + height: usize, + disp_w: usize, + disp_h: usize, + num_mbs: usize, + nal_len: u8, + sps: Vec>, + cur_sps: usize, + pps: Vec>, + cur_pps: usize, + + skip_mode: FrameSkipMode, + deblock_skip: bool, + + is_mbaff: bool, + + cavlc_cb: CAVLCTables, + + sstate: SliceState, + + cur_pic: Option, + cur_id: u16, + has_pic: bool, + frame_refs: FrameRefs, + + temporal_mv: bool, + deblock_mode: u8, + lf_alpha: i8, + lf_beta: i8, + is_s: bool, + + ipcm_buf: [u8; 256 + 64 + 64], + + mc_dsp: H264MC, + + transform_8x8_mode: bool, +} + +impl H264Decoder { + pub fn new() -> Self { + H264Decoder{ + info: NACodecInfoRef::default(), + width: 0, + height: 0, + disp_w: 0, + disp_h: 0, + num_mbs: 0, + nal_len: 0, + sps: Vec::with_capacity(1), + cur_sps: 0, + pps: Vec::with_capacity(3), + cur_pps: 0, + + skip_mode: FrameSkipMode::default(), + deblock_skip: false, + + is_mbaff: false, + + cavlc_cb: CAVLCTables::new(), + + sstate: SliceState::new(), + cur_pic: None, + cur_id: 0, + has_pic: false, + frame_refs: FrameRefs::new(), + + temporal_mv: false, + deblock_mode: 0, + lf_alpha: 0, + lf_beta: 0, + is_s: false, + + ipcm_buf: [0; 256 + 64 + 64], + + mc_dsp: H264MC::new(), + + transform_8x8_mode: false, + } + } + fn handle_nal(&mut self, src: &[u8], supp: &mut NADecoderSupport, skip_decoding: bool) -> DecoderResult<()> { + validate!(!src.is_empty()); + validate!((src[0] & 0x80) == 0); + let nal_ref_idc = src[0] >> 5; + let nal_unit_type = src[0] & 0x1F; + + let mut full_size = src.len() * 8; + for &byte in src.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; + } + } + validate!(full_size > 0); + match nal_unit_type { + 1 | 5 if !skip_decoding => { + let is_idr = nal_unit_type == 5; + let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); + br.skip(8)?; + + let slice_hdr = parse_slice_header(&mut br, self.sps.as_slice(), self.pps.as_slice(), is_idr, nal_ref_idc)?; + validate!(br.tell() < full_size); + let full_id; + if slice_hdr.first_mb_in_slice == 0 { + validate!(self.cur_pic.is_none()); + for (i, pps) in self.pps.iter().enumerate() { + if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { + self.cur_pps = i; + break; + } + } + for (i, sps) in self.sps.iter().enumerate() { + if sps.seq_parameter_set_id == self.pps[self.cur_pps].seq_parameter_set_id { + self.cur_sps = i; + break; + } + } + + full_id = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, &self.sps[self.cur_sps]); + + let sps = &self.sps[self.cur_sps]; + if sps.chroma_format_idc != 1 || sps.bit_depth_luma != sps.bit_depth_chroma { +println!(" chroma fmt {} bits {}/{}", sps.chroma_format_idc, sps.bit_depth_luma, sps.bit_depth_chroma); + return Err(DecoderError::NotImplemented); + } + if sps.bit_depth_luma != 10 { +println!(" unsupported depth {}", sps.bit_depth_luma); + return Err(DecoderError::NotImplemented); + } + //let pps = &self.pps[self.cur_pps]; + + if is_idr { + self.frame_refs.clear_refs(); + } + + self.width = sps.pic_width_in_mbs << 4; + self.height = sps.pic_height_in_mbs << 4; + self.num_mbs = sps.pic_width_in_mbs * sps.pic_height_in_mbs; + self.mc_dsp.set_dimensions(self.width, self.height); + self.mc_dsp.set_depth(sps.bit_depth_luma); + + self.is_mbaff = sps.mb_adaptive_frame_field && !slice_hdr.field_pic; + if self.is_mbaff { +println!("MBAFF"); + return Err(DecoderError::NotImplemented); + } + if !sps.frame_mbs_only { +println!("PAFF?"); + return Err(DecoderError::NotImplemented); + } + +//if slice_hdr.slice_type.is_b() { return Ok(()); } + self.cur_id = full_id as u16; + } else { + if let Some(ref mut pic) = self.cur_pic { + validate!(pic.cur_mb == slice_hdr.first_mb_in_slice); + let new_type = slice_hdr.slice_type.to_frame_type(); + pic.pic_type = match (pic.pic_type, new_type) { + (FrameType::I, _) => new_type, + (_, FrameType::B) => FrameType::B, + _ => pic.pic_type, + }; + full_id = pic.full_id; + } else { + return Ok(());//Err(DecoderError::InvalidData); + } + validate!(self.cur_pps < self.pps.len() && self.pps[self.cur_pps].pic_parameter_set_id == slice_hdr.pic_parameter_set_id); + } + + let sps = &self.sps[self.cur_sps]; + let pps = &self.pps[self.cur_pps]; + + self.temporal_mv = !slice_hdr.direct_spatial_mv_pred; + self.is_s = slice_hdr.slice_type == SliceType::SI || slice_hdr.slice_type == SliceType::SP; + self.deblock_mode = slice_hdr.disable_deblocking_filter_idc; + self.lf_alpha = slice_hdr.slice_alpha_c0_offset; + self.lf_beta = slice_hdr.slice_beta_offset; + + self.frame_refs.select_refs(sps, &slice_hdr, full_id); + + if slice_hdr.adaptive_ref_pic_marking_mode { + self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, ((1u32 << self.sps[self.cur_sps].log2_max_frame_num) - 1) as u16)?; + } + if slice_hdr.first_mb_in_slice == 0 { + let ret = supp.pool_u16.get_free(); + if ret.is_none() { + return Err(DecoderError::AllocError); + } + let (w, h) = if ((self.disp_w + 15) & !15) == self.width && ((self.disp_h + 15) & !15) == self.height { + (self.disp_w, self.disp_h) + } else { + (self.width, self.height) + }; + let fmtstr = match self.sps[self.cur_sps].bit_depth_luma { + 9 => "yuv420p9", + 10 => "yuv420p10", + 11 => "yuv420p11", + 12 => "yuv420p12", + _ => return Err(DecoderError::NotImplemented), + }; + let tmp_vinfo = NAVideoInfo::new(w, h, false, NAPixelFormaton::from_str(fmtstr).unwrap()); + let mut buf = ret.unwrap(); + if buf.get_info() != tmp_vinfo { + supp.pool_u16.reset(); + supp.pool_u16.prealloc_video(tmp_vinfo, 4)?; + let ret = supp.pool_u16.get_free(); + if ret.is_none() { + return Err(DecoderError::AllocError); + } + buf = ret.unwrap(); + } + self.cur_pic = Some(PictureInfo { + id: slice_hdr.frame_num, + full_id, + user_id: full_id, + time: NATimeInfo::new(None, None, None, 0, 0), + pic_type: slice_hdr.slice_type.to_frame_type(), + buf, + cur_mb: 0, + is_ref: nal_ref_idc != 0, + is_idr, + long_term: get_long_term_id(is_idr, &slice_hdr), + mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)), + }); + } + + self.transform_8x8_mode = pps.transform_8x8_mode; + + self.sstate.def_fill = 1 << (sps.bit_depth_luma - 1); + self.sstate.reset(sps.pic_width_in_mbs, sps.pic_height_in_mbs, slice_hdr.first_mb_in_slice); + + let mut dst_pic = if let Some(ref pic) = self.cur_pic { + pic.clone() + } else { + return Err(DecoderError::InvalidData); + }; + let mut dst_frm = NASimpleVideoFrame::from_video_buf(&mut dst_pic.buf).unwrap(); + let dst_mv_info = &mut dst_pic.mv_info; + if !pps.entropy_coding_mode { + self.has_pic = self.decode_slice_cavlc(&mut br, &slice_hdr, full_size, &mut dst_frm, dst_mv_info)?; + } else { + br.align(); + let start = br.tell() / 8; + let csrc = &src[start..]; + validate!(csrc.len() >= 2); + let mut cabac = CABAC::new(csrc, slice_hdr.slice_type, slice_hdr.slice_qp, slice_hdr.cabac_init_idc as usize)?; + self.has_pic = self.decode_slice_cabac(&mut cabac, &slice_hdr, &mut dst_frm, dst_mv_info)?; + } + }, + 2 => { // slice data partition A + //slice header + //slice id = read_ue() + //cat 2 slice data (all but MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 3 => { // slice data partition B + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 3 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 4 => { // slice data partition C + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 4 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 6 => {}, //SEI + 7 => { + let sps = parse_sps(&src[1..])?; + self.sps.push(Arc::new(sps)); + }, + 8 => { + validate!(full_size >= 8 + 16); + let pps = parse_pps(&src[1..], &self.sps, full_size - 8)?; + let mut found = false; + for stored_pps in self.pps.iter_mut() { + if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { + *stored_pps = Arc::clone(&pps); + found = true; + break; + } + } + if !found { + self.pps.push(pps); + } + }, + 9 => { // access unit delimiter + }, + 10 => {}, //end of sequence + 11 => {}, //end of stream + 12 => {}, //filler + _ => {}, + }; + + Ok(()) + } + fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) { + let mb_type = mb_info.mb_type; + if !mb_type.is_4x4() { + let (pw, ph) = mb_type.size(); + let mut xoff = 0; + let mut yoff = 0; + if mb_type == MBType::Direct || mb_type == MBType::BSkip { + sstate.predict_direct_mb(frame_refs, temporal_mv, direct_8x8, cur_id); + } + for part in 0..mb_type.num_parts() { + if !mb_type.is_l1(part) { + match mb_type { + MBType::PSkip => sstate.predict_pskip(), + MBType::BSkip | MBType::Direct => { + }, + _ => { + sstate.predict(xoff, yoff, pw, ph, 0, + mb_info.mv_l0[part], mb_info.ref_l0[part]); + }, + }; + } + if !mb_type.is_l0(part) && mb_type != MBType::BSkip && mb_type != MBType::Direct { + sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part], mb_info.ref_l1[part]); + } + if pw != 16 { + xoff += pw; + } else { + yoff += ph; + } + } + } else { + for part in 0..4 { + let sub_type = mb_info.sub_mb_type[part]; + let mut xoff = (part & 1) * 8; + let mut yoff = (part & 2) * 4; + let orig_x = xoff; + let (pw, ph) = sub_type.size(); + for subpart in 0..sub_type.num_parts() { + if sub_type != SubMBType::Direct8x8 { + if !sub_type.is_l1() { + sstate.predict(xoff, yoff, pw, ph, 0, mb_info.mv_l0[part * 4 + subpart], mb_info.ref_l0[part]); + } + if !sub_type.is_l0() { + sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part * 4 + subpart], mb_info.ref_l1[part]); + } + } else { + for sblk in 0..4 { + sstate.predict_direct_sub(frame_refs, temporal_mv, direct_8x8, cur_id, (xoff / 4) + (sblk & 1) + (yoff / 4) * 4 + (sblk & 2) * 2); + } + } + xoff += pw; + if xoff == orig_x + 8 { + xoff -= 8; + yoff += ph; + } + } + } + } + } + #[allow(clippy::cognitive_complexity)] + fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, slice_refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame, mv_info: &mut FrameMV) { + let pps = &self.pps[self.cur_pps]; + + let qp_y = mb_info.qp_y; + let qpr = ((qp_y as i8) + pps.chroma_qp_index_offset).clamp(0, 51) as usize; + let qp_u = CHROMA_QUANTS[qpr]; + let qpb = ((qp_y as i8) + pps.second_chroma_qp_index_offset).clamp(0, 51) as usize; + let qp_v = CHROMA_QUANTS[qpb]; + + let tx_bypass = qp_y == 0 && self.sps[self.cur_sps].qpprime_y_zero_transform_bypass; + + self.sstate.get_cur_mb().mb_type = mb_info.mb_type.into(); + if mb_info.mb_type != MBType::PCM { + self.sstate.get_cur_mb().qp_y = qp_y; + self.sstate.get_cur_mb().qp_u = qp_u; + self.sstate.get_cur_mb().qp_v = qp_v; + self.sstate.get_cur_mb().transform_8x8 = mb_info.transform_size_8x8; + } + let has_dc = mb_info.mb_type.is_intra16x16() && mb_info.coded[24]; + let qp_y = (qp_y + 6 * (self.sps[self.cur_sps].bit_depth_luma - 8)).min(51); + let qp_u = (qp_u + 6 * (self.sps[self.cur_sps].bit_depth_chroma - 8)).min(51); + let qp_v = (qp_v + 6 * (self.sps[self.cur_sps].bit_depth_chroma - 8)).min(51); + if has_dc { + idct_luma_dc(&mut mb_info.coeffs[24], qp_y); + for i in 0..16 { + mb_info.coeffs[i][0] = mb_info.coeffs[24][i]; + } + } + if !tx_bypass { + if !mb_info.transform_size_8x8 { + let quant_dc = !mb_info.mb_type.is_intra16x16(); + if quant_dc { + for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) { + if *coded { + idct(coeffs, qp_y); + } else if has_dc { + idct_dc(coeffs, qp_y, quant_dc); + *coded = true; + } + } + } else { + for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) { + if *coded { + idct_skip_dc(coeffs, qp_y); + } else if has_dc { + idct_dc(coeffs, qp_y, quant_dc); + *coded = true; + } + } + } + } else { + for i in 0..4 { + if mb_info.coded[(i & 1) * 2 + (i & 2) * 4] { + dequant8x8(&mut mb_info.coeffs8x8[i].coeffs, &pps.scaling_list_8x8[!mb_info.mb_type.is_intra() as usize]); + idct8x8(&mut mb_info.coeffs8x8[i].coeffs, qp_y); + } + } + } + } else if !mb_info.transform_size_8x8 { + for i in 0..16 { + if !mb_info.coded[i] && has_dc { + mb_info.coded[i] = true; + } + } + } + for chroma in 0..2 { + let qp_c = if chroma == 0 { qp_u } else { qp_v }; + if mb_info.cbpc != 0 { + chroma_dc_transform(&mut mb_info.chroma_dc[chroma], qp_c); + } + for i in 0..4 { + let blk_no = 16 + chroma * 4 + i; + mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i]; + if mb_info.coded[blk_no] { + idct_skip_dc(&mut mb_info.coeffs[blk_no], qp_c); + } else if mb_info.coeffs[blk_no][0] != 0 { + idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false); + mb_info.coded[blk_no] = true; + } + } + } + if !pps.entropy_coding_mode || mb_info.mb_type.is_skip() || mb_info.mb_type.is_intra() { + self.sstate.reset_mb_mv(); + } + if !mb_info.mb_type.is_intra() { + Self::pred_mv(&mut self.sstate, slice_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference); + } + if !pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + self.sstate.fill_ipred(IntraPredMode::DC); + } + + let xpos = self.sstate.mb_x * 16; + let ypos = self.sstate.mb_y * 16; + + if mb_info.mb_type != MBType::PCM { + let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() { + 1 + } else if slice_hdr.slice_type.is_b() { + self.pps[self.cur_pps].weighted_bipred_idc + } else { + 0 + }; + recon_mb(frm, slice_hdr, mb_info, &mut self.sstate, slice_refs, &mut self.mc_dsp, weight_mode); + } else { + for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { + for (dst, &p) in dline[..16].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..16].copy_from_slice(src); + } + for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) { + for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src); + } + for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) { + for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src); + } + } +/*match mb_info.mb_type { +MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBType::B8x16(_, _) | MBType::B8x8 => { + let dstride = frm.stride[0]; + let dst = &mut frm.data[frm.offset[0] + self.sstate.mb_x * 16 + self.sstate.mb_y * 16 * dstride..]; + for el in dst[..16].iter_mut() { *el = 255; } + for row in dst.chunks_mut(dstride).skip(1).take(15) { + row[0] = 255; + } +}, +_ => {}, +};*/ + self.sstate.save_ipred_context(frm); + + let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride; + let mut mb = FrameMBInfo::new(); + mb.mb_type = mb_info.mb_type.into(); + for blk4 in 0..16 { + mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv; + } + for blk8 in 0..4 { + mb.ref_poc[blk8] = slice_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx); + mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx; + } + mv_info.mbs[mb_pos] = mb; + + if !self.deblock_skip && self.deblock_mode != 1 { + self.sstate.fill_deblock(slice_refs, self.deblock_mode, self.is_s); + loop_filter_mb(frm, &self.sstate, self.lf_alpha, self.lf_beta); + } + self.sstate.next_mb(); + } + fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize, frm: &mut NASimpleVideoFrame, mv_info: &mut FrameMV) -> DecoderResult { + const INTRA_CBP: [u8; 48] = [ + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 + ]; + const INTER_CBP: [u8; 48] = [ + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 + ]; + + let mut mb_idx = slice_hdr.first_mb_in_slice; + let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; + + let slice_refs = self.frame_refs.cur_refs.clone(); + let sslice_refs = SimplifiedSliceRefs::new(&slice_refs); + + while br.tell() < full_size && mb_idx < self.num_mbs { + mb_info.coded = [false; 25]; + mb_info.ref_l0 = [ZERO_REF; 4]; + mb_info.ref_l1 = [ZERO_REF; 4]; + mb_info.mv_l0 = [ZERO_MV; 16]; + mb_info.mv_l1 = [ZERO_MV; 16]; + mb_info.chroma_dc = [[0; 4]; 2]; + mb_info.cbpy = 0; + mb_info.cbpc = 0; + + if !slice_hdr.slice_type.is_intra() { + let mb_skip_run = br.read_ue()? as usize; + validate!(mb_idx + mb_skip_run <= self.num_mbs); + mb_info.mb_type = skip_type; + for _ in 0..mb_skip_run { + self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info); + mb_idx += 1; + } + if mb_idx == self.num_mbs || br.tell() >= full_size { + break; + } + } + if br.tell() < full_size { + if self.is_mbaff && ((mb_idx & 1) == 0) { + let _mb_field_decoding = br.read_bool()?; + } + let mut mb_type = decode_mb_type_cavlc(br, slice_hdr)?; + mb_info.mb_type = mb_type; + mb_info.transform_size_8x8 = false; + if mb_type == MBType::PCM { + br.align(); + for pix in self.ipcm_buf[..256 + 64 + 64].iter_mut() { + *pix = br.read(8)? as u8; + } + self.sstate.fill_ncoded(16); + } else { + if self.transform_8x8_mode && mb_type == MBType::Intra4x4 { + mb_info.transform_size_8x8 = br.read_bool()?; + if mb_info.transform_size_8x8 { + mb_type = MBType::Intra8x8; + mb_info.mb_type = MBType::Intra8x8; + } + } + decode_mb_pred_cavlc(br, slice_hdr, mb_type, &mut self.sstate, &mut mb_info)?; + let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type { + (cbpy, cbpc) + } else { + let cbp_id = br.read_ue()? as usize; + validate!(cbp_id < INTRA_CBP.len()); + let cbp = if mb_type == MBType::Intra4x4 || mb_type == MBType::Intra8x8 { + INTRA_CBP[cbp_id] + } else { + INTER_CBP[cbp_id] + }; + if self.transform_8x8_mode && (cbp & 0xF) != 0 && mb_info.can_have_8x8_tx(self.sps[self.cur_sps].direct_8x8_inference) { + mb_info.transform_size_8x8 = br.read_bool()?; + } + ((cbp & 0xF), (cbp >> 4)) + }; + mb_info.cbpy = cbpy; + mb_info.cbpc = cbpc; + self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy; + if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() { + let mb_qp_delta = br.read_se()?; + validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25); + let new_qp = mb_qp_delta + i32::from(mb_info.qp_y); + mb_info.qp_y = if new_qp < 0 { + (new_qp + 52) as u8 + } else if new_qp >= 52 { + (new_qp - 52) as u8 + } else { + new_qp as u8 + }; + mb_info.coeffs = [[0; 16]; 25]; + if self.transform_8x8_mode { + mb_info.clear_coeffs8x8(); + } + mb_info.chroma_dc = [[0; 4]; 2]; + decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?; + } + } + self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info); + } + mb_idx += 1; + } + if let Some(ref mut pic) = self.cur_pic { + pic.cur_mb = mb_idx; + } + Ok(mb_idx == self.num_mbs) + } + fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, frm: &mut NASimpleVideoFrame, mv_info: &mut FrameMV) -> DecoderResult { + let mut mb_idx = slice_hdr.first_mb_in_slice; + let mut prev_mb_skipped = false; + let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip }; + let mut last_qp_diff = false; + + let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() }; + + let slice_refs = self.frame_refs.cur_refs.clone(); + let sslice_refs = SimplifiedSliceRefs::new(&slice_refs); + + while mb_idx < self.num_mbs { + mb_info.coded = [false; 25]; + mb_info.ref_l0 = [ZERO_REF; 4]; + mb_info.ref_l1 = [ZERO_REF; 4]; + mb_info.mv_l0 = [ZERO_MV; 16]; + mb_info.mv_l1 = [ZERO_MV; 16]; + mb_info.chroma_dc = [[0; 4]; 2]; + mb_info.cbpy = 0; + mb_info.cbpc = 0; + let mb_skip = cabac_decode_mbskip(cabac, &self.sstate, slice_hdr); + if !mb_skip { + if self.is_mbaff && (((mb_idx & 1) == 0) || (prev_mb_skipped && ((mb_idx & 1) == 1))) { + let _mb_field_decoding = cabac.decode_bit(70); + } + let mut mb_type = cabac_decode_mb_type(cabac, slice_hdr, &self.sstate); + mb_info.mb_type = mb_type; + mb_info.transform_size_8x8 = false; + if mb_type == MBType::PCM { + let ipcm_size = 256 + 64 + 64; + validate!(cabac.pos + ipcm_size <= cabac.src.len()); + self.ipcm_buf[..ipcm_size].copy_from_slice(&cabac.src[cabac.pos..][..ipcm_size]); + cabac.pos += ipcm_size; + cabac.reinit()?; + last_qp_diff = false; + } else { + if self.transform_8x8_mode && mb_type == MBType::Intra4x4 { + let mut ctx = 0; + if self.sstate.get_top_mb().transform_8x8 { + ctx += 1; + } + if self.sstate.get_left_mb().transform_8x8 { + ctx += 1; + } + mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); + if mb_info.transform_size_8x8 { + mb_type = MBType::Intra8x8; + mb_info.mb_type = MBType::Intra8x8; + } + } + decode_mb_pred_cabac(cabac, slice_hdr, mb_type, &mut self.sstate, &mut mb_info); + let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type { + (cbpy, cbpc) + } else { + decode_cbp_cabac(cabac, &self.sstate) + }; + if self.transform_8x8_mode && cbpy != 0 && mb_info.can_have_8x8_tx(self.sps[self.cur_sps].direct_8x8_inference) { + let mut ctx = 0; + if self.sstate.get_top_mb().transform_8x8 { + ctx += 1; + } + if self.sstate.get_left_mb().transform_8x8 { + ctx += 1; + } + mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx); + } + if mb_type.is_intra() { + self.sstate.get_cur_mb().cmode = mb_info.chroma_ipred; + } + mb_info.cbpy = cbpy; + mb_info.cbpc = cbpc; + self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy; + if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() { + let mb_qp_delta = decode_mb_qp_delta_cabac(cabac, last_qp_diff as usize); + validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25); + last_qp_diff = mb_qp_delta != 0; + let new_qp = mb_qp_delta + i32::from(mb_info.qp_y); + mb_info.qp_y = if new_qp < 0 { + (new_qp + 52) as u8 + } else if new_qp >= 52 { + (new_qp - 52) as u8 + } else { + new_qp as u8 + }; + mb_info.coeffs = [[0; 16]; 25]; + if self.transform_8x8_mode { + mb_info.clear_coeffs8x8(); + } + mb_info.chroma_dc = [[0; 4]; 2]; + decode_residual_cabac(cabac, &mut self.sstate, &mut mb_info); + } else { + last_qp_diff = false; + } + } + } else { + mb_info.mb_type = skip_type; + mb_info.transform_size_8x8 = false; + last_qp_diff = false; + } + self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info); + prev_mb_skipped = mb_skip; + if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() { + if let Some(ref mut pic) = self.cur_pic { + pic.cur_mb = mb_idx + 1; + } + return Ok(mb_idx + 1 == self.num_mbs); + } + mb_idx += 1; + } + Err(DecoderError::InvalidData) + } +} + +impl NADecoder for H264Decoder { + fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef) -> DecoderResult<()> { + if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { + let fmt = NAPixelFormaton::from_str("yuv420p10").unwrap(); + let myinfo = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, fmt)); + self.info = NACodecInfo::new_ref(info.get_name(), myinfo, info.get_extradata()).into_ref(); + + let edata = info.get_extradata().unwrap(); +//print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!(); + if edata.len() > 11 && &edata[0..4] == b"avcC" { + let mut br = MemoryReader::new_read(edata.as_slice()); + let mut nal_buf = Vec::new(); + + br.read_skip(4)?; + let version = br.read_byte()?; + validate!(version == 1); + let profile = br.read_byte()?; + let _compatibility = br.read_byte()?; + let _level = br.read_byte()?; + let b = br.read_byte()?; + //validate!((b & 0xFC) == 0xFC); + self.nal_len = (b & 3) + 1; + let b = br.read_byte()?; + //validate!((b & 0xE0) == 0xE0); + let num_sps = (b & 0x1F) as usize; + for _ in 0..num_sps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 7); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + self.handle_nal(&nal_buf, supp, true)?; + br.read_skip(len)?; + } + let num_pps = br.read_byte()? as usize; + for _ in 0..num_pps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 8); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + self.handle_nal(&nal_buf, supp, true)?; + br.read_skip(len)?; + } + if br.left() > 0 { + match profile { + 100 | 110 | 122 | 144 => { + let b = br.read_byte()?; + // some encoders put something different here + if (b & 0xFC) != 0xFC { + return Ok(()); + } + // b & 3 -> chroma format + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> luma depth minus 8 + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> chroma depth minus 8 + let num_spsext = br.read_byte()? as usize; + for _ in 0..num_spsext { + let len = br.read_u16be()? as usize; + // parse spsext + br.read_skip(len)?; + } + }, + _ => {}, + }; + } + } else { + return Err(DecoderError::NotImplemented); + } + + self.width = vinfo.get_width(); + self.height = vinfo.get_height(); + self.disp_w = self.width; + self.disp_h = self.height; + + if (self.width == 0 || self.height == 0) && !self.sps.is_empty() { + self.width = self.sps[0].pic_width_in_mbs * 16; + self.height = self.sps[0].pic_height_in_mbs * 16; + } + + let num_bufs = if !self.sps.is_empty() { + self.sps[0].num_ref_frames + 1 + } else { + 3 + }.max(16 + 1); + supp.pool_u16.set_dec_bufs(num_bufs); + supp.pool_u16.prealloc_video(NAVideoInfo::new(self.width, self.height, false, fmt), 4)?; + + Ok(()) + } else { + Err(DecoderError::InvalidData) + } + } + fn decode(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket) -> DecoderResult { + let src = pkt.get_buffer(); + + let mut br = MemoryReader::new_read(&src); + let mut nal_buf = Vec::with_capacity(src.len()); + if self.nal_len > 0 { + let mut skip_decoding = false; + if self.skip_mode != FrameSkipMode::None { + let mut pic_type = FrameType::I; + let mut is_ref = false; + while br.left() > 0 { + let size = match self.nal_len { + 1 => br.read_byte()? as usize, + 2 => br.read_u16be()? as usize, + 3 => br.read_u24be()? as usize, + 4 => br.read_u32be()? as usize, + _ => unreachable!(), + }; + validate!(br.left() >= (size as i64)); + let offset = br.tell() as usize; + let size = unescape_nal(&src[offset..][..size], &mut nal_buf); + validate!(size > 0); + let nal_ref_idc = nal_buf[0] >> 5; + let nal_unit_type = nal_buf[0] & 0x1F; + if nal_unit_type == 1 || nal_unit_type == 5 { + let mut bitr = BitReader::new(&nal_buf[1..], BitReaderMode::BE); + let (first_mb, slice_type) = parse_slice_header_minimal(&mut bitr)?; + if first_mb == 0 && nal_ref_idc != 0 { + is_ref = true; + } + let new_type = slice_type.to_frame_type(); + pic_type = match (pic_type, new_type) { + (FrameType::I, _) => new_type, + (_, FrameType::B) => FrameType::B, + _ => pic_type, + }; + } + br.read_skip(size)?; + } + match self.skip_mode { + FrameSkipMode::IntraOnly => { + skip_decoding = pic_type != FrameType::I; + }, + FrameSkipMode::KeyframesOnly => { + if !is_ref { + skip_decoding = true; + } + }, + _ => {}, + }; + br.seek(SeekFrom::Start(0))?; + } + while br.left() > 0 { + let size = match self.nal_len { + 1 => br.read_byte()? as usize, + 2 => br.read_u16be()? as usize, + 3 => br.read_u24be()? as usize, + 4 => br.read_u32be()? as usize, + _ => unreachable!(), + }; + validate!(br.left() >= (size as i64)); + let offset = br.tell() as usize; + let _size = unescape_nal(&src[offset..][..size], &mut nal_buf); + self.handle_nal(nal_buf.as_slice(), supp, skip_decoding)?; + br.read_skip(size)?; + } + } else { +//todo NAL detection + unimplemented!(); + } + + let (bufinfo, ftype, pts) = if self.has_pic && self.cur_pic.is_some() { + let mut npic = None; + std::mem::swap(&mut self.cur_pic, &mut npic); + let cpic = npic.unwrap(); + let ret = (NABufferType::Video16(cpic.buf.clone()), cpic.pic_type, Some(u64::from(cpic.full_id))); + if cpic.is_ref { + self.frame_refs.add_short_term(cpic.clone(), self.sps[self.cur_sps].num_ref_frames); + } + if let Some(lt_idx) = cpic.long_term { + self.frame_refs.add_long_term(lt_idx, cpic); + } + ret + } else { + (NABufferType::None, FrameType::Skip, None) + }; + + let mut frm = NAFrame::new_from_pkt(pkt, self.info.clone(), bufinfo); + frm.set_keyframe(ftype == FrameType::I); + if let (Some(mypts), None) = (pts, frm.get_pts()) { + frm.set_pts(Some(mypts)); + } + if let Some(pts) = pts { + frm.set_id(pts as i64); + } + frm.set_frame_type(ftype); + Ok(frm.into_ref()) + } + fn flush(&mut self) { + } +} + +impl NAOptionHandler for H264Decoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { DECODER_OPTIONS } + fn set_options(&mut self, options: &[NAOption]) { + for option in options.iter() { + for opt_def in DECODER_OPTIONS.iter() { + if opt_def.check(option).is_ok() { + match (option.name, &option.value) { + (FRAME_SKIP_OPTION, NAValue::String(ref strval)) => { + if let Ok(smode) = FrameSkipMode::from_str(strval) { + self.skip_mode = smode; + } + }, + (DEBLOCK_SKIP_OPTION, NAValue::Bool(val)) => { + self.deblock_skip = *val; + }, + _ => {}, + } + } + } + } + } + fn query_option_value(&self, name: &str) -> Option { + match name { + FRAME_SKIP_OPTION => Some(NAValue::String(self.skip_mode.to_string())), + DEBLOCK_SKIP_OPTION => Some(NAValue::Bool(self.deblock_skip)), + _ => None, + } + } +} diff --git a/nihav-itu/src/codecs/h264/high/dispatch.rs b/nihav-itu/src/codecs/h264/high/dispatch.rs new file mode 100644 index 0000000..e2adfb0 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/dispatch.rs @@ -0,0 +1,317 @@ +use std::sync::{Arc, Barrier}; +use std::sync::atomic::*; +use std::thread; + +use nihav_core::codecs::{DecoderError, DecoderResult}; + +use super::PictureInfo; +use super::decoder_mt::FrameDecoder; +use super::super::Shareable; + +#[derive(Clone,Copy,Debug,PartialEq)] +pub enum FrameDecodingStatus { + Ok, + NotReady, + Error, + NotFound, +} + +struct FrameState { + pinfo: PictureInfo, + mb_pos: AtomicUsize, + error: AtomicBool, + complete: AtomicBool, + output: AtomicBool, + worker: Option>>, + result: DecoderResult<()>, + num_refs: usize, + ref_frames: Vec, +} + +impl FrameState { + fn get_id(&self) -> u32 { self.pinfo.full_id } + fn get_user_id(&self) -> u32 { self.pinfo.user_id } + fn is_working(&self) -> bool { + self.worker.is_some() && + !self.complete.load(Ordering::Relaxed) && + !self.error.load(Ordering::Relaxed) + } + fn is_output_candidate(&self) -> bool { + !self.output.load(Ordering::Relaxed) && + (self.complete.load(Ordering::Relaxed) || self.error.load(Ordering::Relaxed)) + } +} + +pub struct ThreadDispatcher { + fstate: Vec, + pub max_threads: usize, + cur_threads: usize, +} + +impl ThreadDispatcher { + pub fn new() -> Self { + Self { + fstate: Vec::new(), + max_threads: 3, + cur_threads: 0, + } + } + pub fn can_decode_more(&self) -> bool { + let out_cand = self.fstate.iter().filter(|state| state.is_output_candidate()).count(); + if out_cand > self.max_threads { + return false; + } + if (self.cur_threads < self.max_threads) || (self.max_threads == 0) { + true + } else { + let real_workers = self.fstate.iter().fold(0usize, + |acc, state| acc + (state.is_working() as usize)); + real_workers < self.max_threads + } + } + fn cleanup(&mut self) { + for state in self.fstate.iter_mut() { + if state.worker.is_some() && !state.is_working() { + let mut ret = None; + std::mem::swap(&mut state.worker, &mut ret); + if let Some(handle) = ret { + state.result = handle.join().unwrap(); + } + self.cur_threads -= 1; + } + } + } + fn unref_frame(&mut self, id: u32) { + let mut toremove = Vec::new(); + for state in self.fstate.iter() { + if state.num_refs == 0 && state.output.load(Ordering::Relaxed) { + toremove.push(state.get_id()); + } + } + if let Some(idx) = self.find_by_id(id) { + let mut ref_frm = Vec::new(); + std::mem::swap(&mut ref_frm, &mut self.fstate[idx].ref_frames); + for state in self.fstate.iter_mut() { + if ref_frm.contains(&state.get_id()) { + assert!(state.num_refs >= 2); + state.num_refs -= 2; + } + } + if self.fstate[idx].num_refs == 0 && self.fstate[idx].output.load(Ordering::Relaxed) { + self.remove_frame(id); + } + } + for &id in toremove.iter() { + self.remove_frame(id); + } + } + fn find_by_id(&self, id: u32) -> Option { + self.fstate.iter().position(|x| x.get_id() == id) + } + fn set_completed(&self, id: u32) { + if let Some(idx) = self.find_by_id(id) { + self.fstate[idx].complete.store(true, Ordering::Relaxed); + } + } + fn set_error(&self, id: u32) { + if let Some(idx) = self.find_by_id(id) { + self.fstate[idx].error.store(true, Ordering::Relaxed); + } + } + pub fn update_pos(&self, id: u32, mb_pos: usize) { + if let Some(idx) = self.find_by_id(id) { + self.fstate[idx].mb_pos.store(mb_pos, Ordering::Relaxed); + } + } + pub fn check_pos(&self, id: u32, mb_pos: usize) -> FrameDecodingStatus { + if let Some(idx) = self.find_by_id(id) { + let state = &self.fstate[idx]; + if !state.error.load(Ordering::Relaxed) { + if state.complete.load(Ordering::Relaxed) || mb_pos < state.mb_pos.load(Ordering::Relaxed) { + FrameDecodingStatus::Ok + } else { + FrameDecodingStatus::NotReady + } + } else { + FrameDecodingStatus::Error + } + } else { + FrameDecodingStatus::NotFound + } + } + fn remove_frame(&mut self, id: u32) { + if let Some(idx) = self.find_by_id(id) { + self.fstate.remove(idx); + } + } + /*fn print_state(&self) { + print!(" state:"); + for state in self.fstate.iter() { + print!(" s{}b{}r{}{}{}{}", state.get_id(), + state.mb_pos.load(Ordering::Relaxed), state.num_refs, + if state.error.load(Ordering::Relaxed) { "E" } else {""}, + if state.complete.load(Ordering::Relaxed) {"C"} else {""}, + if state.output.load(Ordering::Relaxed) {"O"} else {""}); + } + println!(); + }*/ + pub fn has_output(&self) -> bool { + for state in self.fstate.iter() { + if state.is_output_candidate() { + return true; + } + } + false + } +} + +pub fn queue_decoding(disp: &mut Shareable, mut fdec: FrameDecoder, initial_ref_frames: &[u32], ref_frames: &[u32]) { + let barrier = Arc::new(Barrier::new(2)); + let starter = Arc::clone(&barrier); + + let pinfo = fdec.cur_pic.clone(); + let pic_id = pinfo.full_id; + let shared_disp = Arc::clone(disp); + let worker = thread::Builder::new().name("frame ".to_string() + &pic_id.to_string()).spawn(move || { + barrier.wait(); + + let mut slices = Vec::new(); + std::mem::swap(&mut slices, &mut fdec.slices); + let mut cur_mb = 0; + for (hdr, hdr_size, refs, nal) in slices.iter() { + if hdr.first_mb_in_slice != cur_mb { + if let Ok(rd) = shared_disp.read() { + rd.set_error(pic_id); + } else { + panic!("can't set error"); + } + return Err(DecoderError::InvalidData); + } + match fdec.decode_slice(hdr, *hdr_size, refs, nal) { + Ok(pos) => cur_mb = pos, + Err(err) => { + if let Ok(rd) = shared_disp.read() { + rd.set_error(pic_id); + } else { + panic!("can't set error"); + } + return Err(err); + }, + }; + } + + if cur_mb == fdec.num_mbs { + if let Ok(rd) = shared_disp.read() { + rd.set_completed(pic_id); + } else { + panic!("can't set status"); + } + } + + DecoderResult::Ok(()) + }).unwrap(); + let new_state = FrameState { + pinfo, + mb_pos: AtomicUsize::new(0), + error: AtomicBool::new(false), + complete: AtomicBool::new(false), + output: AtomicBool::new(false), + worker: Some(worker), + result: DecoderResult::Err(DecoderError::Bug), + num_refs: 0, + ref_frames: initial_ref_frames.to_vec(), + }; + if let Ok(ref mut ds) = disp.write() { + let new_id = new_state.get_id(); + if ds.find_by_id(new_id).is_some() { + ds.remove_frame(new_id); + } + ds.cleanup(); + ds.fstate.push(new_state); + for state in ds.fstate.iter_mut() { + if ref_frames.contains(&state.get_id()) { + state.num_refs += 1; + } + if initial_ref_frames.contains(&state.get_id()) { + state.num_refs += 1; + } + } + ds.cur_threads += 1; + starter.wait(); + } else { + panic!("cannot invoke thread dispatcher"); + } +} + +pub fn wait_for_one(dispatch: &mut Shareable) -> Result { + /*if let Ok(ref ds) = dispatch.read() { + ds.print_state(); + }*/ + let start = std::time::Instant::now(); + 'main_loop: loop { + if std::time::Instant::now().duration_since(start) > std::time::Duration::from_millis(20000) { panic!(" too long!"); } + if let Ok(ref ds) = dispatch.read() { + let mut nw = 0; + for state in ds.fstate.iter() { + if state.is_working() { + nw += 1; + } + if state.is_output_candidate() { + break 'main_loop; + } + } + if nw == 0 { + return Err((DecoderError::NoFrame, 0)); + } + } else { + panic!("can't peek into status"); + } + thread::yield_now(); + } + if let Ok(ref mut ds) = dispatch.write() { + ds.cleanup(); + let mut found = None; + for state in ds.fstate.iter() { + if state.is_output_candidate() { + state.output.store(true, Ordering::Relaxed); + if let DecoderResult::Err(err) = state.result { + let id = state.get_id(); + let user_id = state.get_user_id(); + ds.unref_frame(id); + return Err((err, user_id)); + } else { + found = Some(state.pinfo.clone()); + break; + } + } + } + if let Some(ret) = found { + ds.unref_frame(ret.full_id); + Ok(ret) + } else { + unreachable!(); + } + } else { + panic!("can't grab status"); + } +} + +pub fn clear_threads(dispatch: &mut Shareable) { + /*if let Ok(ref ds) = dispatch.read() { + ds.print_state(); + }*/ + let mut to_wait = Vec::new(); + if let Ok(ref mut ds) = dispatch.write() { + while let Some(state) = ds.fstate.pop() { + if let Some(handle) = state.worker { + to_wait.push(handle); + } + } + ds.cur_threads = 0; + } else { + panic!("can't grab status"); + } + while let Some(handle) = to_wait.pop() { + let _ = handle.join(); + } +} diff --git a/nihav-itu/src/codecs/h264/high/dsp/mc/debug.rs b/nihav-itu/src/codecs/h264/high/dsp/mc/debug.rs new file mode 100644 index 0000000..98b0c33 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/dsp/mc/debug.rs @@ -0,0 +1,267 @@ +const TMP_BUF_STRIDE: usize = 32; + +fn interp_block1(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool, clip: fn (i32) -> u16) { + + let step = if hor { 1 } else { sstride }; + let mut idx = 0; + let avgidx = if avg0 { step * 2 } else { step * 3 }; + + for dline in dst.chunks_mut(dstride).take(h) { + for (x, pix) in dline.iter_mut().take(w).enumerate() { + let t = (clip)(( i32::from(src[idx + x]) + - 5 * i32::from(src[idx + x + step]) + + 20 * i32::from(src[idx + x + step * 2]) + + 20 * i32::from(src[idx + x + step * 3]) + - 5 * i32::from(src[idx + x + step * 4]) + + i32::from(src[idx + x + step * 5]) + + 16) >> 5); + *pix = (t + src[idx + x + avgidx] + 1) >> 1; + } + idx += sstride; + } +} + +fn interp_block2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, clip: fn (i32) -> u16) { + let step = if hor { 1 } else { sstride }; + let mut idx = 0; + for dline in dst.chunks_mut(dstride).take(h) { + for (x, pix) in dline.iter_mut().take(w).enumerate() { + *pix = (clip)(( i32::from(src[idx + x]) + - 5 * i32::from(src[idx + x + step]) + + 20 * i32::from(src[idx + x + step * 2]) + + 20 * i32::from(src[idx + x + step * 3]) + - 5 * i32::from(src[idx + x + step * 4]) + + i32::from(src[idx + x + step * 5]) + + 16) >> 5); + } + idx += sstride; + } +} + +fn mc_avg_tmp(dst: &mut [u16], dstride: usize, w: usize, h: usize, tmp: &[u16], tmp2: &[u16]) { + for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE).zip(tmp2.chunks(TMP_BUF_STRIDE))).take(h) { + for (pix, (&a, &b)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(w) { + *pix = (a + b + 1) >> 1; + } + } +} + +fn h264_mc00(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, _clip: fn (i32) -> u16) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { + dline[..w].copy_from_slice(&sline[..w]); + } +} + +fn h264_mc01(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true, clip); +} + +fn h264_mc02(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true, clip); +} + +fn h264_mc03(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false, clip); +} + +fn h264_mc10(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true, clip); +} + +fn h264_mc11(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc12(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc13(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc20(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block2(dst, dstride, &src[2..], sstride, w, h, false, clip); +} + +fn h264_mc21(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc22(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0i32; TMP_BUF_STRIDE * 16]; + let mut idx = 0; + for dline in tmp.chunks_mut(TMP_BUF_STRIDE).take(h) { + for (x, pix) in dline.iter_mut().take(w + 5).enumerate() { + *pix = i32::from(src[idx + x]) + - 5 * i32::from(src[idx + x + sstride]) + + 20 * i32::from(src[idx + x + sstride * 2]) + + 20 * i32::from(src[idx + x + sstride * 3]) + - 5 * i32::from(src[idx + x + sstride * 4]) + + i32::from(src[idx + x + sstride * 5]); + } + idx += sstride; + } + for (dline, sline) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE)).take(h) { + for (x, pix) in dline.iter_mut().take(w).enumerate() { + *pix = (clip)((sline[x] - 5 * sline[x + 1] + 20 * sline[x + 2] + 20 * sline[x + 3] - 5 * sline[x + 4] + sline[x + 5] + 512) >> 10); + } + } +} + +fn h264_mc23(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc30(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false, clip); +} + +fn h264_mc31(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc32(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc33(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp = [0u16; TMP_BUF_STRIDE * 16]; + let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16]; + h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip); + h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + + +fn chroma_interp(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { + let a0 = 8 - dx; + let a1 = dx; + let b0 = 8 - dy; + let b1 = dy; + + let src1 = &src[sstride..]; + if a0 == 8 && b0 == 8 { + for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { + drow[..w].copy_from_slice(&line[..w]); + } + } else if a0 == 8 { + for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) { + for (pix, (&a, &b)) in drow.iter_mut().take(w).zip(line0.iter().zip(line1.iter())) { + *pix = (a * b0 + b * b1 + 4) >> 3; + } + } + } else if b0 == 8 { + for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { + let mut a = line[0]; + for (pix, &b) in drow.iter_mut().take(w).zip(line.iter().skip(1)) { + *pix = (a * a0 + b * a1 + 4) >> 3; + a = b; + } + } + } else { + for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) { + let mut a = line0[0]; + let mut c = line1[0]; + for (pix, (&b, &d)) in drow.iter_mut().take(w).zip(line0[1..].iter().zip(line1[1..].iter())) { + *pix = ((u32::from(a) * u32::from(a0 * b0) + u32::from(b) * u32::from(a1 * b0) + u32::from(c) * u32::from(a0 * b1) + u32::from(d) * u32::from(a1 * b1) + 0x20) >> 6) as u16; + a = b; + c = d; + } + } + } +} + +pub fn chroma_interp_8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h); +} + +pub fn chroma_interp_4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h); +} + +pub fn chroma_interp_2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h); +} + +macro_rules! luma_mc { + ($orig:ident, $func4:ident, $func8:ident, $func16:ident, $clip:expr) => { + fn $func4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) { + $orig(dst, dstride, src, sstride, 4, h, $clip); + } + fn $func8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) { + $orig(dst, dstride, src, sstride, 8, h, $clip); + } + fn $func16(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) { + $orig(dst, dstride, src, sstride, 16, h, $clip); + } + } +} + +luma_mc!(h264_mc00, h264_mc00_4_10, h264_mc00_8_10, h264_mc00_16_10, super::clip_10); +luma_mc!(h264_mc01, h264_mc01_4_10, h264_mc01_8_10, h264_mc01_16_10, super::clip_10); +luma_mc!(h264_mc02, h264_mc02_4_10, h264_mc02_8_10, h264_mc02_16_10, super::clip_10); +luma_mc!(h264_mc03, h264_mc03_4_10, h264_mc03_8_10, h264_mc03_16_10, super::clip_10); +luma_mc!(h264_mc10, h264_mc10_4_10, h264_mc10_8_10, h264_mc10_16_10, super::clip_10); +luma_mc!(h264_mc11, h264_mc11_4_10, h264_mc11_8_10, h264_mc11_16_10, super::clip_10); +luma_mc!(h264_mc12, h264_mc12_4_10, h264_mc12_8_10, h264_mc12_16_10, super::clip_10); +luma_mc!(h264_mc13, h264_mc13_4_10, h264_mc13_8_10, h264_mc13_16_10, super::clip_10); +luma_mc!(h264_mc20, h264_mc20_4_10, h264_mc20_8_10, h264_mc20_16_10, super::clip_10); +luma_mc!(h264_mc21, h264_mc21_4_10, h264_mc21_8_10, h264_mc21_16_10, super::clip_10); +luma_mc!(h264_mc22, h264_mc22_4_10, h264_mc22_8_10, h264_mc22_16_10, super::clip_10); +luma_mc!(h264_mc23, h264_mc23_4_10, h264_mc23_8_10, h264_mc23_16_10, super::clip_10); +luma_mc!(h264_mc30, h264_mc30_4_10, h264_mc30_8_10, h264_mc30_16_10, super::clip_10); +luma_mc!(h264_mc31, h264_mc31_4_10, h264_mc31_8_10, h264_mc31_16_10, super::clip_10); +luma_mc!(h264_mc32, h264_mc32_4_10, h264_mc32_8_10, h264_mc32_16_10, super::clip_10); +luma_mc!(h264_mc33, h264_mc33_4_10, h264_mc33_8_10, h264_mc33_16_10, super::clip_10); + +pub const H264_LUMA_INTERP_10: [[super::MCFunc; 16]; 3] = [ + [ + h264_mc00_4_10, h264_mc01_4_10, h264_mc02_4_10, h264_mc03_4_10, + h264_mc10_4_10, h264_mc11_4_10, h264_mc12_4_10, h264_mc13_4_10, + h264_mc20_4_10, h264_mc21_4_10, h264_mc22_4_10, h264_mc23_4_10, + h264_mc30_4_10, h264_mc31_4_10, h264_mc32_4_10, h264_mc33_4_10 + ], [ + h264_mc00_8_10, h264_mc01_8_10, h264_mc02_8_10, h264_mc03_8_10, + h264_mc10_8_10, h264_mc11_8_10, h264_mc12_8_10, h264_mc13_8_10, + h264_mc20_8_10, h264_mc21_8_10, h264_mc22_8_10, h264_mc23_8_10, + h264_mc30_8_10, h264_mc31_8_10, h264_mc32_8_10, h264_mc33_8_10 + ], [ + h264_mc00_16_10, h264_mc01_16_10, h264_mc02_16_10, h264_mc03_16_10, + h264_mc10_16_10, h264_mc11_16_10, h264_mc12_16_10, h264_mc13_16_10, + h264_mc20_16_10, h264_mc21_16_10, h264_mc22_16_10, h264_mc23_16_10, + h264_mc30_16_10, h264_mc31_16_10, h264_mc32_16_10, h264_mc33_16_10 + ] +]; + +impl super::RegisterSIMD for super::H264MC { + fn register_simd(&mut self) {} +} diff --git a/nihav-itu/src/codecs/h264/high/dsp/mc/mod.rs b/nihav-itu/src/codecs/h264/high/dsp/mc/mod.rs new file mode 100644 index 0000000..b586c65 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/dsp/mc/mod.rs @@ -0,0 +1,393 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::MV; +use super::super::SimpleFrame; + +macro_rules! module_selector { + ($( ($cond:meta, $module:ident) ),*) => { + module_selector!(list; r#false; $(($cond, $module)),*); + }; + (list; $nocond:meta; ($ccar:meta, $carmod:ident), $(($condcdr:meta, $cdrmod:ident)),*) => { + module_selector!(single; $nocond; $ccar; $carmod); + module_selector!(list; any($nocond, $ccar); $(($condcdr, $cdrmod)),*); + }; + (list; $nocond:meta; ($yescond:meta, $module:ident)) => { + module_selector!(single; $nocond; $yescond; $module); + }; + (list; $_:meta; ) => {}; + (single; $nocond:meta; $yescond:meta; $module:ident) => { + #[cfg(all(not($nocond), $yescond))] + mod $module; + #[cfg(all(not($nocond), $yescond))] + use $module::*; + }; +} + +module_selector! ( +// (all(feature = "simd", target_arch = "x86_64"), x86), + (debug_assertions, debug), + (not(debug_assertions), release) +); + +type MCFunc = fn (dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize); + +fn clip_10(val: i32) -> u16 { val.clamp(0, 1023) as u16 } + +trait RegisterSIMD { + fn register_simd(&mut self); +} + +#[repr(align(16))] +pub struct McBlock { + pub y: [u16; 16 * 16], + pub u: [u16; 16 * 16], + pub v: [u16; 16 * 16], +} + +impl McBlock { + pub fn new() -> Self { + unsafe { + let blk = std::mem::MaybeUninit::uninit(); + blk.assume_init() + } + } +} + +#[allow(clippy::type_complexity)] +pub struct H264MC { + pub put_block_weighted: [fn (dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]); 4], + pub put_block_weighted2: [fn (dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]); 4], + pub chroma_interp: [fn (dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize); 3], + avg: [fn (dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize); 4], + interp: [[MCFunc; 16]; 3], + + width: usize, + height: usize, + depth: u8, +} + +impl H264MC { + pub fn new() -> Self { + let mut obj = Self { + put_block_weighted: [put_blk_w_2_10, put_blk_w_4_10, put_blk_w_8_10, put_blk_w_16_10], + put_block_weighted2: [put_blk_w2_2_10, put_blk_w2_4_10, put_blk_w2_8_10, put_blk_w2_16_10], + chroma_interp: [chroma_interp_2, chroma_interp_4, chroma_interp_8], + interp: H264_LUMA_INTERP_10, + avg: [avg_2, avg_4, avg_8, avg_16], + width: 0, height: 0, + depth: 0, + }; + obj.register_simd(); + obj + } + pub fn set_dimensions(&mut self, width: usize, height: usize) { + self.width = width; + self.height = height; + } + pub fn set_depth(&mut self, depth: u8) { + if depth == self.depth { + return; + } + self.depth = depth; + match depth { + 10 => { + self.put_block_weighted = [put_blk_w_2_10, put_blk_w_4_10, put_blk_w_8_10, put_blk_w_16_10]; + self.put_block_weighted2 = [put_blk_w2_2_10, put_blk_w2_4_10, put_blk_w2_8_10, put_blk_w2_16_10]; + self.interp = H264_LUMA_INTERP_10; + }, + _ => unreachable!(), + } + self.register_simd(); + } + pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let ubuf = std::mem::MaybeUninit::<[u16; 22 * 22]>::uninit(); + let mut ebuf = unsafe { ubuf.assume_init() }; + let mvx = mv.x >> 2; + let mvy = mv.y >> 2; + let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; + let pre = if mode != 0 { 2isize } else { 0 }; + let post = if mode != 0 { 3isize } else { 0 }; + let (yw, yh) = (self.width, self.height); + let src = refpic.data; + let systride = refpic.stride[0]; + let src_x = (xpos as isize) + (mvx as isize); + let src_y = (ypos as isize) + (mvy as isize); + let (ysrc, ystride) = if (src_x - pre < 0) || (src_x + (w as isize) + post > (yw as isize)) || (src_y - pre < 0) || (src_y + (h as isize) + post > (yh as isize)) { + let add = (pre + post) as usize; + edge_emu_sf(refpic, src_x - pre, src_y - pre, yw, yh, w + add, h + add, &mut ebuf, 22, 0); + (&ebuf[..], 22) + } else { + (&src[refpic.offset[0] + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride) + }; + let wmode = match w { + 4 => 0, + 8 => 1, + _ => 2, + }; + (self.interp[wmode][mode])(&mut frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..], frm.stride[0], ysrc, ystride, h); + + let (cw, ch) = (self.width >> 1, self.height >> 1); + let mvx = mv.x >> 3; + let mvy = mv.y >> 3; + let dx = (mv.x & 7) as u16; + let dy = (mv.y & 7) as u16; + let src_x = ((xpos >> 1) as isize) + (mvx as isize); + let src_y = ((ypos >> 1) as isize) + (mvy as isize); + let suoff = refpic.offset[1]; + let svoff = refpic.offset[2]; + let sustride = refpic.stride[1]; + let svstride = refpic.stride[2]; + let cbw = w / 2; + let cbh = h / 2; + let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { + let aw = (cw + 7) & !7; + let ah = (ch + 7) & !7; + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1); + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2); + ([&ebuf, &ebuf[9..]], [18, 18]) + } else { + ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], + &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], + [sustride, svstride]) + }; + for chroma in 1..3 { + let off = frm.offset[chroma] + xpos / 2 + (ypos / 2) * frm.stride[chroma]; + (self.chroma_interp[wmode])(&mut frm.data[off..], frm.stride[chroma], csrc[chroma - 1], cstride[chroma - 1], dx, dy, cbh); + } + } + + pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; + + let pre = if mode != 0 { 2 } else { 0 }; + let post = if mode != 0 { 3 } else { 0 }; + let (width, height) = (self.width, self.height); + let sx = (xpos as isize) + ((mv.x >> 2) as isize); + let sy = (ypos as isize) + ((mv.y >> 2) as isize); + + const EBUF_STRIDE: usize = 32; + let mut ebuf = [0u16; EBUF_STRIDE * (16 + 2 + 3)]; + + let wmode = match w { + 4 => 0, + 8 => 1, + _ => 2, + }; + if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) || + (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) { + let edge = (pre + post) as usize; + edge_emu_sf(refpic, sx - pre, sy - pre, width, height, w + edge, h + edge, + &mut ebuf, EBUF_STRIDE, 0); + (self.interp[wmode][mode])(&mut dst.y, 16, &ebuf, EBUF_STRIDE, h); + } else { + let sstride = refpic.stride[0]; + let soff = refpic.offset[0]; + let sbuf = refpic.data; + let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; + (self.interp[wmode][mode])(&mut dst.y, 16, &sbuf[saddr..], sstride, h); + } + + let (cw, ch) = (self.width >> 1, self.height >> 1); + let mvx = mv.x >> 3; + let mvy = mv.y >> 3; + let dx = (mv.x & 7) as u16; + let dy = (mv.y & 7) as u16; + let src_x = ((xpos >> 1) as isize) + (mvx as isize); + let src_y = ((ypos >> 1) as isize) + (mvy as isize); + let suoff = refpic.offset[1]; + let svoff = refpic.offset[2]; + let sustride = refpic.stride[1]; + let svstride = refpic.stride[2]; + let src = refpic.data; + let cbw = w / 2; + let cbh = h / 2; + let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { + let aw = (cw + 7) & !7; + let ah = (ch + 7) & !7; + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1); + edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2); + ([&ebuf, &ebuf[9..]], [18, 18]) + } else { + ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], + &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], + [sustride, svstride]) + }; + (self.chroma_interp[wmode])(&mut dst.u, 16, csrc[0], cstride[0], dx, dy, cbh); + (self.chroma_interp[wmode])(&mut dst.v, 16, csrc[1], cstride[1], dx, dy, cbh); + } + + pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let ubuf = std::mem::MaybeUninit::<[u16; 64 * 16 + 32]>::uninit(); + let mut buf = unsafe { ubuf.assume_init() }; + let offset = 32 - ((&buf as *const u16 as usize) & 0x1F); + + let mut afrm = NASimpleVideoFrame { + width: [32, 16, 16, 0], + height: [16, 16, 16, 0], + flip: false, + stride: [64, 64, 64, 0], + offset: [0, 32, 48, 0], + components: 3, + data: &mut buf[offset..] + }; + let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 }; + self.do_mc(&mut afrm, refpic, 0, 0, w, h, amv); + let wsize = (w.ilog2() - 1) as usize; + let src = afrm.data; + for (comp, (&sstride, &soff)) in afrm.stride.iter().zip(afrm.offset.iter()).take(3).enumerate() { + let shift = if comp == 0 { 0 } else { 1 }; + (self.avg[wsize - shift])(&mut frm.data[frm.offset[comp] + (xpos >> shift) + (ypos >> shift) * frm.stride[comp]..], frm.stride[comp], &src[soff..], sstride, h >> shift); + } + } + + pub fn gray_block(&mut self, frm: &mut NASimpleVideoFrame, x: usize, y: usize, w: usize, h: usize) { + let fill = 1 << (self.depth - 1); + let yoff = frm.offset[0] + x + y * frm.stride[0]; + let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1], + frm.offset[2] + x / 2 + y / 2 * frm.stride[2]]; + for row in frm.data[yoff..].chunks_mut(frm.stride[0]).take(h) { + for el in row[..w].iter_mut() { + *el = fill; + } + } + for chroma in 0..2 { + for row in frm.data[coff[chroma]..].chunks_mut(frm.stride[chroma + 1]).take(h / 2) { + for el in row[..w / 2].iter_mut() { + *el = fill; + } + } + } + } +} + +fn edge_emu_sf(src: &SimpleFrame, xpos: isize, ypos: isize, w: usize, h: usize, bw: usize, bh: usize, dst: &mut [u16], dstride: usize, comp: usize) { + let stride = src.stride[comp]; + let offs = src.offset[comp]; + let framebuf = src.data; + + for y in 0..bh { + let srcy; + if (y as isize) + ypos < 0 { srcy = 0; } + else if (y as isize) + ypos >= (h as isize) { srcy = h - 1; } + else { srcy = ((y as isize) + ypos) as usize; } + + for x in 0..bw { + let srcx; + if (x as isize) + xpos < 0 { srcx = 0; } + else if (x as isize) + xpos >= (w as isize) { srcx = w - 1; } + else { srcx = ((x as isize) + xpos) as usize; } + dst[x + y * dstride] = framebuf[offs + srcx + srcy * stride]; + } + } +} + +fn avg(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bw: usize, bh: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { + for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { + *dst = (*dst + *src + 1) >> 1; + } + } +} + +fn avg_2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) { + let _ = src[sstride + 1]; + let _ = dst[dstride + 1]; + dst[0] = (dst[0] + src[0] + 1) >> 1; + dst[1] = (dst[1] + src[1] + 1) >> 1; + dst[dstride] = (dst[dstride] + src[sstride] + 1) >> 1; + dst[dstride + 1] = (dst[dstride + 1] + src[sstride + 1] + 1) >> 1; + if bh == 4 { + let _ = src[sstride * 3 + 1]; + let _ = dst[dstride * 3 + 1]; + dst[dstride * 2] = (dst[dstride * 2] + src[sstride * 2] + 1) >> 1; + dst[dstride * 2 + 1] = (dst[dstride * 2 + 1] + src[sstride * 2 + 1] + 1) >> 1; + dst[dstride * 3] = (dst[dstride * 3] + src[sstride * 3] + 1) >> 1; + dst[dstride * 3 + 1] = (dst[dstride * 3 + 1] + src[sstride * 3 + 1] + 1) >> 1; + } +} +fn avg_4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) { + avg(dst, dstride, src, sstride, 4, bh); +} +fn avg_8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) { + avg(dst, dstride, src, sstride, 8, bh); +} +fn avg_16(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) { + avg(dst, dstride, src, sstride, 16, bh); +} + +fn put_block_weighted_10(dst: &mut [u16], stride: usize, src: &[u16], w: usize, h: usize, wparams: [i8; 3]) { + let weight = i32::from(wparams[0]); + let offset = i32::from(wparams[1]) << (10 - 8); + let wshift = wparams[2] as u8; + let bias = (1 << wshift) >> 1; + + for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks_exact(16)).take(h) { + for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) { + *dst = clip_10(((i32::from(src) * weight + bias) >> wshift) + offset); + } + } +} + +fn put_blk_w_2_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) { + put_block_weighted_10(dst, stride, src, 2, h, wparams); +} +fn put_blk_w_4_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) { + put_block_weighted_10(dst, stride, src, 4, h, wparams); +} +fn put_blk_w_8_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) { + put_block_weighted_10(dst, stride, src, 8, h, wparams); +} +fn put_blk_w_16_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) { + put_block_weighted_10(dst, stride, src, 16, h, wparams); +} + +fn put_block_weighted2_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], w: usize, h: usize, wparams: [i8; 5]) { + let weight0 = i32::from(wparams[0]); + let offset0 = i32::from(wparams[1]) << (10 - 8); + let weight1 = i32::from(wparams[2]); + let offset1 = i32::from(wparams[3]) << (10 - 8); + let wshift = (wparams[4] as u8) + 1; + let offset = (offset0 + offset1 + 1) >> 1; + let bias = (1 << wshift) >> 1; + + for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks_exact(16).zip(src1.chunks_exact(16))).take(h) { + for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) { + *dst = clip_10(((i32::from(src0) * weight0 + i32::from(src1) * weight1 + bias) >> wshift) + offset); + } + } +} + +fn put_blk_w2_2_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) { + let weight0 = i32::from(wparams[0]); + let offset0 = i32::from(wparams[1]); + let weight1 = i32::from(wparams[2]); + let offset1 = i32::from(wparams[3]); + let wshift = (wparams[4] as u8) + 1; + let offset = (offset0 + offset1 + 1) >> 1; + let bias = (1 << wshift) >> 1; + + let _ = src0[16 + 1]; + let _ = src1[16 + 1]; + let _ = dst[stride + 1]; + dst[0] = clip_10(((i32::from(src0[ 0]) * weight0 + i32::from(src1[ 0]) * weight1 + bias) >> wshift) + offset); + dst[1] = clip_10(((i32::from(src0[ 1]) * weight0 + i32::from(src1[ 1]) * weight1 + bias) >> wshift) + offset); + dst[stride] = clip_10(((i32::from(src0[16]) * weight0 + i32::from(src1[16]) * weight1 + bias) >> wshift) + offset); + dst[stride + 1] = clip_10(((i32::from(src0[17]) * weight0 + i32::from(src1[17]) * weight1 + bias) >> wshift) + offset); + if h == 4 { + let _ = src0[16 * 3 + 1]; + let _ = src1[16 * 3 + 1]; + let _ = dst[stride * 3 + 1]; + dst[stride * 2] = clip_10(((i32::from(src0[32]) * weight0 + i32::from(src1[32]) * weight1 + bias) >> wshift) + offset); + dst[stride * 2 + 1] = clip_10(((i32::from(src0[33]) * weight0 + i32::from(src1[33]) * weight1 + bias) >> wshift) + offset); + dst[stride * 3] = clip_10(((i32::from(src0[48]) * weight0 + i32::from(src1[48]) * weight1 + bias) >> wshift) + offset); + dst[stride * 3 + 1] = clip_10(((i32::from(src0[49]) * weight0 + i32::from(src1[49]) * weight1 + bias) >> wshift) + offset); + } +} +fn put_blk_w2_4_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) { + put_block_weighted2_10(dst, stride, src0, src1, 4, h, wparams); +} +fn put_blk_w2_8_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) { + put_block_weighted2_10(dst, stride, src0, src1, 8, h, wparams); +} +fn put_blk_w2_16_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) { + put_block_weighted2_10(dst, stride, src0, src1, 16, h, wparams); +} diff --git a/nihav-itu/src/codecs/h264/high/dsp/mc/release.rs b/nihav-itu/src/codecs/h264/high/dsp/mc/release.rs new file mode 100644 index 0000000..b7ae267 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/dsp/mc/release.rs @@ -0,0 +1,339 @@ +const TMP_BUF_STRIDE: usize = 32; + +fn interp_block1(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool, clip: fn (i32) -> u16) { + unsafe { + let step = if hor { 1 } else { sstride }; + let avgidx = if avg0 { step * 2 } else { step * 3 }; + let mut src = src.as_ptr(); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + for _ in 0..w { + let t = (clip)(( i32::from(*src) + - 5 * i32::from(*src.add(step)) + + 20 * i32::from(*src.add(step * 2)) + + 20 * i32::from(*src.add(step * 3)) + - 5 * i32::from(*src.add(step * 4)) + + i32::from(*src.add(step * 5)) + + 16) >> 5); + *dst = (t + *src.add(avgidx) + 1) >> 1; + src = src.add(1); + dst = dst.add(1); + } + dst = dst.sub(w).add(dstride); + src = src.sub(w).add(sstride); + } + } +} + +fn interp_block2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, clip: fn (i32) -> u16) { + unsafe { + let step = if hor { 1 } else { sstride }; + let mut pix = dst.as_mut_ptr(); + let mut src = src.as_ptr(); + for _ in 0..h { + for x in 0..w { + *pix.add(x) = (clip)(( i32::from(*src) + - 5 * i32::from(*src.add(step)) + + 20 * i32::from(*src.add(step * 2)) + + 20 * i32::from(*src.add(step * 3)) + - 5 * i32::from(*src.add(step * 4)) + + i32::from(*src.add(step * 5)) + + 16) >> 5); + src = src.add(1); + } + pix = pix.add(dstride); + src = src.sub(w); + src = src.add(sstride); + } + } +} + +fn mc_avg_tmp(dst: &mut [u16], dstride: usize, w: usize, h: usize, tmp: &[u16], tmp2: &[u16]) { + unsafe { + let mut src1 = tmp.as_ptr(); + let mut src2 = tmp2.as_ptr(); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + for x in 0..w { + let a = *src1.add(x); + let b = *src2.add(x); + *dst.add(x) = (a + b + 1) >> 1; + } + dst = dst.add(dstride); + src1 = src1.add(TMP_BUF_STRIDE); + src2 = src2.add(TMP_BUF_STRIDE); + } + } +} + +fn h264_mc00(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, _clip: fn(i32) -> u16) { + unsafe { + let mut src = src.as_ptr(); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + std::ptr::copy_nonoverlapping(src, dst, w); + src = src.add(sstride); + dst = dst.add(dstride); + } + } +} + +fn h264_mc01(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true, clip); +} + +fn h264_mc02(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true, clip); +} + +fn h264_mc03(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false, clip); +} + +fn h264_mc10(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true, clip); +} + +fn h264_mc11(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc12(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc13(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc20(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block2(dst, dstride, &src[2..], sstride, w, h, false, clip); +} + +fn h264_mc21(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc22(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp: [i32; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + unsafe { + let mut src = src.as_ptr(); + let mut dst = tmp.as_mut_ptr(); + for _ in 0..h { + for _ in 0..w+5 { + *dst = i32::from(*src) + - 5 * i32::from(*src.add(sstride)) + + 20 * i32::from(*src.add(sstride * 2)) + + 20 * i32::from(*src.add(sstride * 3)) + - 5 * i32::from(*src.add(sstride * 4)) + + i32::from(*src.add(sstride * 5)); + dst = dst.add(1); + src = src.add(1); + } + src = src.sub(w+5).add(sstride); + dst = dst.sub(w+5).add(TMP_BUF_STRIDE); + } + } + unsafe { + let mut dst = dst.as_mut_ptr(); + let mut src = tmp.as_ptr(); + for _ in 0..h { + for _ in 0..w { + *dst = (clip)((*src - 5 * *src.add(1) + 20 * *src.add(2) + 20 * *src.add(3) - 5 * *src.add(4) + *src.add(5) + 512) >> 10); + dst = dst.add(1); + src = src.add(1); + } + dst = dst.sub(w).add(dstride); + src = src.sub(w).add(TMP_BUF_STRIDE); + } + } +} + +fn h264_mc23(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc30(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false, clip); +} + +fn h264_mc31(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc32(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip); + h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + +fn h264_mc33(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) { + let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() }; + h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip); + h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip); + mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); +} + + +fn chroma_interp(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { + let a0 = 8 - dx; + let a1 = dx; + let b0 = 8 - dy; + let b1 = dy; + + if a0 == 8 && b0 == 8 { + unsafe { + let mut src = src.as_ptr(); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + std::ptr::copy_nonoverlapping(src, dst, w); + src = src.add(sstride); + dst = dst.add(dstride); + } + } + } else if a0 == 8 { + unsafe { + let mut src0 = src.as_ptr(); + let mut src1 = src0.add(sstride); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + for x in 0..w { + let a = *src0.add(x); + let b = *src1.add(x); + *dst.add(x) = (a * b0 + b * b1 + 4) >> 3; + } + src0 = src0.add(sstride); + src1 = src1.add(sstride); + dst = dst.add(dstride); + } + } + } else if b0 == 8 { + unsafe { + let mut src = src.as_ptr(); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + let mut a = *src; + for x in 0..w { + let b = *src.add(x + 1); + *dst.add(x) = (a * a0 + b * a1 + 4) >> 3; + a = b; + } + src = src.add(sstride); + dst = dst.add(dstride); + } + } + } else { + unsafe { + let mut src0 = src.as_ptr(); + let mut src1 = src0.add(sstride); + let mut dst = dst.as_mut_ptr(); + for _ in 0..h { + let mut a = *src0; + let mut c = *src1; + for x in 0..w { + let b = *src0.add(x + 1); + let d = *src1.add(x + 1); + *dst.add(x) = ((u32::from(a) * u32::from(a0 * b0) + u32::from(b) * u32::from(a1 * b0) + u32::from(c) * u32::from(a0 * b1) + u32::from(d) * u32::from(a1 * b1) + 0x20) >> 6) as u16; + a = b; + c = d; + } + src0 = src0.add(sstride); + src1 = src1.add(sstride); + dst = dst.add(dstride); + } + } + } +} + +pub fn chroma_interp_8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h); +} + +pub fn chroma_interp_4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h); +} + +pub fn chroma_interp_2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) { + chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h); +} + +macro_rules! luma_mc { + ($orig:ident, $func4:ident, $func8:ident, $func16:ident, $clip:expr) => { + fn $func4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) { + $orig(dst, dstride, src, sstride, 4, h, $clip); + } + fn $func8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) { + $orig(dst, dstride, src, sstride, 8, h, $clip); + } + fn $func16(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) { + $orig(dst, dstride, src, sstride, 16, h, $clip); + } + } +} + +luma_mc!(h264_mc00, h264_mc00_4_10, h264_mc00_8_10, h264_mc00_16_10, super::clip_10); +luma_mc!(h264_mc01, h264_mc01_4_10, h264_mc01_8_10, h264_mc01_16_10, super::clip_10); +luma_mc!(h264_mc02, h264_mc02_4_10, h264_mc02_8_10, h264_mc02_16_10, super::clip_10); +luma_mc!(h264_mc03, h264_mc03_4_10, h264_mc03_8_10, h264_mc03_16_10, super::clip_10); +luma_mc!(h264_mc10, h264_mc10_4_10, h264_mc10_8_10, h264_mc10_16_10, super::clip_10); +luma_mc!(h264_mc11, h264_mc11_4_10, h264_mc11_8_10, h264_mc11_16_10, super::clip_10); +luma_mc!(h264_mc12, h264_mc12_4_10, h264_mc12_8_10, h264_mc12_16_10, super::clip_10); +luma_mc!(h264_mc13, h264_mc13_4_10, h264_mc13_8_10, h264_mc13_16_10, super::clip_10); +luma_mc!(h264_mc20, h264_mc20_4_10, h264_mc20_8_10, h264_mc20_16_10, super::clip_10); +luma_mc!(h264_mc21, h264_mc21_4_10, h264_mc21_8_10, h264_mc21_16_10, super::clip_10); +luma_mc!(h264_mc22, h264_mc22_4_10, h264_mc22_8_10, h264_mc22_16_10, super::clip_10); +luma_mc!(h264_mc23, h264_mc23_4_10, h264_mc23_8_10, h264_mc23_16_10, super::clip_10); +luma_mc!(h264_mc30, h264_mc30_4_10, h264_mc30_8_10, h264_mc30_16_10, super::clip_10); +luma_mc!(h264_mc31, h264_mc31_4_10, h264_mc31_8_10, h264_mc31_16_10, super::clip_10); +luma_mc!(h264_mc32, h264_mc32_4_10, h264_mc32_8_10, h264_mc32_16_10, super::clip_10); +luma_mc!(h264_mc33, h264_mc33_4_10, h264_mc33_8_10, h264_mc33_16_10, super::clip_10); + +pub const H264_LUMA_INTERP_10: [[super::MCFunc; 16]; 3] = [ + [ + h264_mc00_4_10, h264_mc01_4_10, h264_mc02_4_10, h264_mc03_4_10, + h264_mc10_4_10, h264_mc11_4_10, h264_mc12_4_10, h264_mc13_4_10, + h264_mc20_4_10, h264_mc21_4_10, h264_mc22_4_10, h264_mc23_4_10, + h264_mc30_4_10, h264_mc31_4_10, h264_mc32_4_10, h264_mc33_4_10 + ], [ + h264_mc00_8_10, h264_mc01_8_10, h264_mc02_8_10, h264_mc03_8_10, + h264_mc10_8_10, h264_mc11_8_10, h264_mc12_8_10, h264_mc13_8_10, + h264_mc20_8_10, h264_mc21_8_10, h264_mc22_8_10, h264_mc23_8_10, + h264_mc30_8_10, h264_mc31_8_10, h264_mc32_8_10, h264_mc33_8_10 + ], [ + h264_mc00_16_10, h264_mc01_16_10, h264_mc02_16_10, h264_mc03_16_10, + h264_mc10_16_10, h264_mc11_16_10, h264_mc12_16_10, h264_mc13_16_10, + h264_mc20_16_10, h264_mc21_16_10, h264_mc22_16_10, h264_mc23_16_10, + h264_mc30_16_10, h264_mc31_16_10, h264_mc32_16_10, h264_mc33_16_10 + ] +]; + +impl super::RegisterSIMD for super::H264MC { + fn register_simd(&mut self) {} +} diff --git a/nihav-itu/src/codecs/h264/high/dsp/mod.rs b/nihav-itu/src/codecs/h264/high/dsp/mod.rs new file mode 100644 index 0000000..00ebe78 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/dsp/mod.rs @@ -0,0 +1,1186 @@ +#[allow(unexpected_cfgs)] +mod mc; +pub use mc::{H264MC, McBlock}; +//#[cfg(target_arch="x86_64")] +//use std::arch::asm; + +pub const CHROMA_QUANTS: [u8; 52] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, + 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38, + 39, 39, 39, 39 +]; + +pub const CHROMA_DC_SCAN: [usize; 4] = [ 0, 1, 2, 3]; +pub const ZIGZAG: [usize; 16] = [ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +]; +pub const ZIGZAG1: [usize; 15] = [ + 0, 3, 7, 4, 1, 2, 5, 8, 11, 12, 9, 6, 10, 13, 14 +]; +/*pub const IL_SCAN: [usize; 16] = [ + 0, 4, 1, 8, 12, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 +];*/ +pub const ZIGZAG8X8: [usize; 64] = [ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +]; + +const LEVEL_SCALE: [[i16; 6]; 3] = [ + [ 10, 11, 13, 14, 16, 18 ], + [ 16, 18, 20, 23, 25, 29 ], + [ 13, 14, 16, 18, 20, 23 ] +]; + +pub fn chroma_dc_transform(blk: &mut [i16; 4], qp: u8) { + let t0 = blk[0] + blk[2]; + let t1 = blk[0] - blk[2]; + let t2 = blk[1] + blk[3]; + let t3 = blk[1] - blk[3]; + blk[0] = t0 + t2; + blk[1] = t0 - t2; + blk[2] = t1 + t3; + blk[3] = t1 - t3; + if qp < 6 { + let mul = LEVEL_SCALE[0][qp as usize]; + for el in blk.iter_mut() { + *el = el.wrapping_mul(mul) >> 1; + } + } else { + let mul = LEVEL_SCALE[0][(qp % 6) as usize]; + let shift = qp / 6 - 1; + for el in blk.iter_mut() { + *el = el.wrapping_mul(mul) << shift; + } + } +} + +macro_rules! transform { + (luma_dc; $a: expr, $b: expr, $c: expr, $d: expr) => ({ + let t0 = $a.wrapping_add($c); + let t1 = $a.wrapping_sub($c); + let t2 = $b.wrapping_add($d); + let t3 = $b.wrapping_sub($d); + $a = t0.wrapping_add(t2); + $b = t1.wrapping_add(t3); + $c = t1.wrapping_sub(t3); + $d = t0.wrapping_sub(t2); + }); + ($a: expr, $b: expr, $c: expr, $d: expr, $shift: expr) => ({ + let t0 = $a.wrapping_add($c); + let t1 = $a.wrapping_sub($c); + let t2 = ($b >> 1).wrapping_sub($d); + let t3 = $b.wrapping_add($d >> 1); + let bias = 1 << $shift >> 1; + $a = t0.wrapping_add(t3).wrapping_add(bias) >> $shift; + $b = t1.wrapping_add(t2).wrapping_add(bias) >> $shift; + $c = t1.wrapping_sub(t2).wrapping_add(bias) >> $shift; + $d = t0.wrapping_sub(t3).wrapping_add(bias) >> $shift; + }); + ($a: expr, $b: expr, $c: expr, $d: expr, $e: expr, $f: expr, $g: expr, $h: expr) => { + let e0 = $a + $e; + let e1 = -$d + $f - $h - ($h >> 1); + let e2 = $a - $e; + let e3 = $b + $h - $d - ($d >> 1); + let e4 = ($c >> 1) - $g; + let e5 = -$b + $h + $f + ($f >> 1); + let e6 = $c + ($g >> 1); + let e7 = $d + $f + $b + ($b >> 1); + + let f0 = e0 + e6; + let f1 = e1 + (e7 >> 2); + let f2 = e2 + e4; + let f3 = e3 + (e5 >> 2); + let f4 = e2 - e4; + let f5 = (e3 >> 2) - e5; + let f6 = e0 - e6; + let f7 = e7 - (e1 >> 2); + + $a = f0 + f7; + $b = f2 + f5; + $c = f4 + f3; + $d = f6 + f1; + $e = f6 - f1; + $f = f4 - f3; + $g = f2 - f5; + $h = f0 - f7; + }; +} + +pub fn idct_luma_dc(blk: &mut [i16; 16], qp: u8) { + if qp < 12 { + let mul = LEVEL_SCALE[0][(qp % 6) as usize]; + let shift = 2 - qp / 6; + let bias = 1 << shift >> 1; + for el in blk.iter_mut() { + *el = el.wrapping_mul(mul).wrapping_add(bias) >> shift; + } + } else { + let mul = LEVEL_SCALE[0][(qp % 6) as usize]; + let shift = qp / 6 - 2; + for el in blk.iter_mut() { + *el = el.wrapping_mul(mul) << shift; + } + } + for i in 0..4 { + transform!(luma_dc; blk[i], blk[i + 4], blk[i + 8], blk[i + 12]); + } + for row in blk.chunks_exact_mut(4) { + transform!(luma_dc; row[0], row[1], row[2], row[3]); + } +} + +pub fn idct_skip_dc(blk: &mut [i16; 16], qp: u8) { + const BLK_INDEX: [usize; 16] = [ + 0, 2, 0, 2, + 2, 1, 2, 1, + 0, 2, 0, 2, + 2, 1, 2, 1 + ]; + let qidx = (qp % 6) as usize; + let shift = qp / 6; + for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()).skip(1) { + *el = (*el * LEVEL_SCALE[idx][qidx]) << shift; + } + for row in blk.chunks_exact_mut(4) { + transform!(row[0], row[1], row[2], row[3], 0); + } + for i in 0..4 { + transform!(blk[i], blk[i + 4], blk[i + 8], blk[i + 12], 6); + } +} + +pub fn idct(blk: &mut [i16; 16], qp: u8) { + const BLK_INDEX: [usize; 16] = [ + 0, 2, 0, 2, + 2, 1, 2, 1, + 0, 2, 0, 2, + 2, 1, 2, 1 + ]; + let qidx = (qp % 6) as usize; + let shift = qp / 6; + for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()) { + *el = (*el * LEVEL_SCALE[idx][qidx]) << shift; + } + for row in blk.chunks_exact_mut(4) { + transform!(row[0], row[1], row[2], row[3], 0); + } + for i in 0..4 { + transform!(blk[i], blk[i + 4], blk[i + 8], blk[i + 12], 6); + } +} + +pub fn idct_dc(blk: &mut [i16; 16], qp: u8, quant_dc: bool) { + let dc = if quant_dc { + (blk[0] * LEVEL_SCALE[0][(qp % 6) as usize]) << (qp / 6) + } else { + blk[0] + }; + *blk = [(dc + 0x20) >> 6; 16]; +} + +const QMAT_8X8: [[u8; 16]; 6] = [ + [ + 20, 19, 25, 19, + 19, 18, 24, 18, + 25, 24, 32, 24, + 19, 18, 24, 18 + ], [ + 22, 21, 28, 21, + 21, 19, 26, 19, + 28, 26, 35, 26, + 21, 19, 26, 19 + ], [ + 26, 24, 33, 24, + 24, 23, 31, 23, + 33, 31, 42, 31, + 24, 23, 31, 23 + ], [ + 28, 26, 35, 26, + 26, 25, 33, 25, + 35, 33, 45, 33, + 26, 25, 33, 25 + ], [ + 32, 30, 40, 30, + 30, 28, 38, 28, + 40, 38, 51, 38, + 30, 28, 38, 28 + ], [ + 36, 34, 46, 34, + 34, 32, 43, 32, + 46, 43, 58, 43, + 34, 32, 43, 32 + ] +]; + +pub fn dequant8x8(blk: &mut [i16; 64], slist: &[u8; 64]) { + for (el, &scan) in blk.iter_mut().zip(ZIGZAG8X8.iter()) { + if *el != 0 { + *el = el.wrapping_mul(i16::from(slist[scan])); + } + } +} + +pub fn idct8x8(blk: &mut [i16; 64], qp: u8) { + let mut tmp = [0i32; 64]; + let qmat = &QMAT_8X8[(qp % 6) as usize]; + if qp >= 36 { + let shift = qp / 6 - 6; + for (i, (dst, &src)) in tmp.iter_mut().zip(blk.iter()).enumerate() { + let x = i & 7; + let y = i >> 3; + let idx = (x & 3) + (y & 3) * 4; + *dst = i32::from(src).wrapping_mul(i32::from(qmat[idx])) << shift; + } + } else { + let shift = 6 - qp / 6; + let bias = (1 << shift) >> 1; + for (i, (dst, &src)) in tmp.iter_mut().zip(blk.iter()).enumerate() { + let x = i & 7; + let y = i >> 3; + let idx = (x & 3) + (y & 3) * 4; + *dst = i32::from(src).wrapping_mul(i32::from(qmat[idx])).wrapping_add(bias) >> shift; + } + } + for row in tmp.chunks_exact_mut(8) { + transform!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]); + } + for col in 0..8 { + transform!(tmp[col], tmp[col + 8], tmp[col + 8 * 2], tmp[col + 8 * 3], + tmp[col + 8 * 4], tmp[col + 8 * 5], tmp[col + 8 * 6], tmp[col + 8 * 7]); + } + for (dst, &src) in blk.iter_mut().zip(tmp.iter()) { + *dst = ((src + 0x20) >> 6) as i16; + } +} + +macro_rules! depth_funcs { + ($name_add:ident, $name_add8:ident, $clip_name:ident, + $name_dc:ident, $dc4:ident, $dc8y:ident, $dc8c:ident, $dc16:ident, + $plane8:ident, $plane16:ident, $val:expr) => { + fn $name_add(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16]) { + let out = &mut dst[offset..][..stride * 3 + 4]; + for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks_exact(4)) { + for (dst, src) in line.iter_mut().take(4).zip(src.iter()) { + *dst = ((*dst as i16) + *src).clamp(0, $val * 2 - 1) as u16; + } + } + } + fn $name_add8(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16; 64]) { + let out = &mut dst[offset..]; + for (line, src) in out.chunks_mut(stride).take(8).zip(coeffs.chunks_exact(8)) { + for (dst, src) in line.iter_mut().take(8).zip(src.iter()) { + *dst = ((*dst as i16) + *src).clamp(0, $val * 2 - 1) as u16; + } + } + } + fn $clip_name(val: i16) -> u16 { val.clamp(0, $val * 2 - 1) as u16 } + fn $name_dc(buf: &mut [u16], stride: usize, bsize: usize) { + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = $val; + } + } + } + fn $dc4(buf: &mut [u16], stride: usize, _top: &[u16], _left: &[u16], _tr: &[u16]) { + $name_dc(buf, stride, 4); + } + fn $dc8y(buf: &mut [u16], stride: usize, _ctx: &IPred8Context) { + $name_dc(buf, stride, 8); + } + fn $dc8c(buf: &mut [u16], stride: usize, _top: &[u16], _left: &[u16]) { + $name_dc(buf, stride, 8); + } + fn $dc16(buf: &mut [u16], stride: usize, _top: &[u16], _left: &[u16]) { + $name_dc(buf, stride, 16); + } + fn $plane8(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) { + let mut h: i32 = 4 * (i32::from(top[7]) - i32::from(left[0])); + let mut v: i32 = 4 * (i32::from(left[8]) - i32::from(left[0])); + for i in 0..3 { + let i1 = (i + 1) as i32; + h += i1 * (i32::from(top[4 + i]) - i32::from(top[2 - i])); + v += i1 * (i32::from(left[5 + i]) - i32::from(left[3 - i])); + } + let b = (17 * h + 16) >> 5; + let c = (17 * v + 16) >> 5; + let mut a = 16 * (i32::from(left[8]) + i32::from(top[7])) - 3 * (b + c) + 16; + for line in buf.chunks_mut(stride).take(8) { + let mut acc = a; + for el in line.iter_mut().take(8) { + *el = $clip_name((acc >> 5) as i16); + acc += b; + } + a += c; + } + } + fn $plane16(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) { + let mut h = 8 * (i32::from(top[15]) - i32::from(left[0])); + let mut v = 8 * (i32::from(left[16]) - i32::from(left[0])); + for k in 0..7 { + h += ((k as i32) + 1) * (i32::from(top[8 + k]) - i32::from(top[6 - k])); + v += ((k as i32) + 1) * (i32::from(left[9 + k]) - i32::from(left[7 - k])); + } + + h = (5 * h + 32) >> 6; + v = (5 * v + 32) >> 6; + + let mut a = 16 * (i32::from(left[16]) + i32::from(top[15]) + 1) - 7 * (v + h); + + for row in buf.chunks_mut(stride).take(16) { + let mut b = a; + a += v; + + for dst in row.chunks_exact_mut(4).take(4) { + dst[0] = $clip_name(((b ) >> 5) as i16); + dst[1] = $clip_name(((b + h) >> 5) as i16); + dst[2] = $clip_name(((b + 2*h) >> 5) as i16); + dst[3] = $clip_name(((b + 3*h) >> 5) as i16); + b += h * 4; + } + } + } + } +} + +depth_funcs!(add_coeffs_10, add_coeffs8_10, clip10, + ipred_dc128_10, ipred_4x4_dc128_10, ipred_y_8x8_dc128_10, ipred_8x8_dc128_10, ipred_16x16_dc128_10, + ipred_8x8_plane_10, ipred_16x16_plane_10, 0x200); + +fn ipred_ver(buf: &mut [u16], stride: usize, top: &[u16], bsize: usize) { + for row in buf.chunks_mut(stride).take(bsize) { + row[..bsize].copy_from_slice(&top[..bsize]); + } +} +fn ipred_hor(buf: &mut [u16], stride: usize, left: &[u16], bsize: usize) { + for (row, &left) in buf.chunks_mut(stride).zip(left[1..].iter()).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = left; + } + } +} +fn ipred_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], bsize: usize, shift: u8) { + let mut adc: u16 = 0; + for i in 0..bsize { adc += top[i]; } + for i in 0..bsize { adc += left[i + 1]; } + let dc = (adc + (1 << (shift - 1))) >> shift; + + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = dc; + } + } +} +fn ipred_left_dc(buf: &mut [u16], stride: usize, left: &[u16], bsize: usize, shift: u8) { + let mut adc: u16 = 0; + for i in 0..bsize { adc += left[i + 1]; } + let dc = (adc + (1 << (shift - 1))) >> shift; + + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = dc; + } + } +} +fn ipred_top_dc(buf: &mut [u16], stride: usize, top: &[u16], bsize: usize, shift: u8) { + let mut adc: u16 = 0; + for i in 0..bsize { adc += top[i]; } + let dc = (adc + (1 << (shift - 1))) >> shift; + + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = dc; + } + } +} + +fn load(dst: &mut [u16], src: &[u16]) { + for (dst, &src) in dst.iter_mut().zip(src.iter()) { + *dst = src; + } +} + +fn ipred_4x4_ver(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], _tr: &[u16]) { + ipred_ver(buf, stride, top, 4); +} +fn ipred_4x4_hor(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16], _tr: &[u16]) { + ipred_hor(buf, stride, left, 4); +} +fn ipred_4x4_diag_down_left(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], tr: &[u16]) { + let mut t: [u16; 9] = [0; 9]; + load(&mut t[..4], top); + load(&mut t[4..8], tr); + t[8] = t[7]; + + for i in 0..4 { + buf[i] = (t[i] + 2 * t[i + 1] + t[i + 2] + 2) >> 2; + } + let dst = &mut buf[stride..]; + for i in 0..4 { + dst[i] = (t[i + 1] + 2 * t[i + 2] + t[i + 3] + 2) >> 2; + } + let dst = &mut buf[stride * 2..]; + for i in 0..4 { + dst[i] = (t[i + 2] + 2 * t[i + 3] + t[i + 4] + 2) >> 2; + } + let dst = &mut buf[stride * 3..]; + for i in 0..4 { + dst[i] = (t[i + 3] + 2 * t[i + 4] + t[i + 5] + 2) >> 2; + } +} +fn ipred_4x4_diag_down_right(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) { + let mut t: [u16; 5] = [0; 5]; + t[0] = left[0]; + load(&mut t[1..], top); + let mut l: [u16; 5] = [0; 5]; + load(&mut l, left); + let dst = buf; + + for j in 0..4 { + for i in 0..j { + dst[i + j * stride] = (l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2; + } + dst[j + j * stride] = (l[1] + 2 * l[0] + t[1] + 2) >> 2; + for i in (j+1)..4 { + dst[i + j * stride] = (t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2; + } + } +} +fn ipred_4x4_ver_right(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) { + let mut t: [u16; 5] = [0; 5]; + t[0] = left[0]; + load(&mut t[1..], top); + let mut l: [u16; 5] = [0; 5]; + load(&mut l, left); + let dst = buf; + + for j in 0..4 { + for i in 0..4 { + let zvr = ((2 * i) as i8) - (j as i8); + let pix; + if zvr >= 0 { + if (zvr & 1) == 0 { + pix = (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1; + } else { + pix = (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2; + } + } else { + if zvr == -1 { + pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; + } else { + pix = (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2; + } + } + dst[i + j * stride] = pix; + } + } +} +fn ipred_4x4_ver_left(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], tr: &[u16]) { + let mut t: [u16; 8] = [0; 8]; + load(&mut t[..4], top); + load(&mut t[4..], tr); + let dst = buf; + + dst[0 + 0 * stride] = (t[0] + t[1] + 1) >> 1; + let pix = (t[1] + t[2] + 1) >> 1; + dst[1 + 0 * stride] = pix; + dst[0 + 2 * stride] = pix; + let pix = (t[2] + t[3] + 1) >> 1; + dst[2 + 0 * stride] = pix; + dst[1 + 2 * stride] = pix; + let pix = (t[3] + t[4] + 1) >> 1; + dst[3 + 0 * stride] = pix; + dst[2 + 2 * stride] = pix; + dst[3 + 2 * stride] = (t[4] + t[5] + 1) >> 1; + dst[0 + 1 * stride] = (t[0] + 2*t[1] + t[2] + 2) >> 2; + let pix = (t[1] + 2*t[2] + t[3] + 2) >> 2; + dst[1 + 1 * stride] = pix; + dst[0 + 3 * stride] = pix; + let pix = (t[2] + 2*t[3] + t[4] + 2) >> 2; + dst[2 + 1 * stride] = pix; + dst[1 + 3 * stride] = pix; + let pix = (t[3] + 2*t[4] + t[5] + 2) >> 2; + dst[3 + 1 * stride] = pix; + dst[2 + 3 * stride] = pix; + dst[3 + 3 * stride] = (t[4] + 2*t[5] + t[6] + 2) >> 2; +} +fn ipred_4x4_hor_down(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) { + let mut t: [u16; 5] = [0; 5]; + t[0] = left[0]; + load(&mut t[1..], top); + let mut l: [u16; 5] = [0; 5]; + load(&mut l, left); + let dst = buf; + + for j in 0..4 { + for i in 0..4 { + let zhd = ((2 * j) as i8) - (i as i8); + let pix; + if zhd >= 0 { + if (zhd & 1) == 0 { + pix = (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1; + } else { + pix = (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2; + } + } else { + if zhd == -1 { + pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; + } else { + pix = (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2; + } + } + dst[i + j * stride] = pix; + } + } +} +fn ipred_4x4_hor_up(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16], _tr: &[u16]) { + let mut l: [u16; 8] = [0; 8]; + load(&mut l, &left[1..]); + let dst = buf; + + dst[0 + 0 * stride] = (l[0] + l[1] + 1) >> 1; + dst[1 + 0 * stride] = (l[0] + 2*l[1] + l[2] + 2) >> 2; + let pix = (l[1] + l[2] + 1) >> 1; + dst[2 + 0 * stride] = pix; + dst[0 + 1 * stride] = pix; + let pix = (l[1] + 2*l[2] + l[3] + 2) >> 2; + dst[3 + 0 * stride] = pix; + dst[1 + 1 * stride] = pix; + let pix = (l[2] + l[3] + 1) >> 1; + dst[2 + 1 * stride] = pix; + dst[0 + 2 * stride] = pix; + let pix = (l[2] + 3*l[3] + 2) >> 2; + dst[3 + 1 * stride] = pix; + dst[1 + 2 * stride] = pix; + dst[3 + 2 * stride] = l[3]; + dst[1 + 3 * stride] = l[3]; + dst[0 + 3 * stride] = l[3]; + dst[2 + 2 * stride] = l[3]; + dst[2 + 3 * stride] = l[3]; + dst[3 + 3 * stride] = l[3]; +} +fn ipred_4x4_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) { + ipred_dc(buf, stride, top, left, 4, 3); +} +fn ipred_4x4_left_dc(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16], _tr: &[u16]) { + ipred_left_dc(buf, stride, left, 4, 2); +} +fn ipred_4x4_top_dc(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], _tr: &[u16]) { + ipred_top_dc(buf, stride, top, 4, 2); +} + +pub struct IPred8Context { + pub t: [u16; 16], + pub l: [u16; 8], + pub tl: u16, + pub fill: u16, +} + +impl IPred8Context { + pub fn new(fill: u16) -> Self { + Self { + t: [fill; 16], + l: [fill; 8], + tl: fill, + fill, + } + } + pub fn fill(&mut self, top: &[u16], left: &[u16], has_t: bool, has_tr: bool, has_l: bool, has_tl: bool) { + let mut t = [self.fill; 19]; + let mut l = [self.fill; 11]; + if has_t { + t[1..8 + 1].copy_from_slice(&top[..8]); + } + if has_tr { + t[8 + 1..16 + 1].copy_from_slice(&top[8..][..8]); + t[16 + 1] = t[15 + 1]; + t[17 + 1] = t[15 + 1]; + } else { + let (t0, t1) = t.split_at_mut(8 + 1); + for el in t1.iter_mut() { + *el = t0[7 + 1]; + } + } + if has_l { + l[1..9].copy_from_slice(&left[1..9]); + l[8 + 1] = l[7 + 1]; + l[9 + 1] = l[7 + 1]; + } + if has_tl { + t[0] = left[0]; + l[0] = left[0]; + } else { + t[0] = t[1]; + l[0] = l[1]; + } + + for i in 0..16 { + self.t[i] = (t[i] + 2 * t[i + 1] + t[i + 2] + 2) >> 2; + } + for i in 0..8 { + self.l[i] = (l[i] + 2 * l[i + 1] + l[i + 2] + 2) >> 2; + } + self.tl = if has_t && has_l { + (t[1] + 2 * t[0] + l[1] + 2) >> 2 + } else if has_t { + (3 * t[0] + t[1] + 2) >> 2 + } else if has_l { + (3 * l[0] + l[1] + 2) >> 2 + } else { + t[0] + }; + } +} + +fn ipred_y_8x8_ver(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + for row in buf.chunks_mut(stride).take(8) { + row[..8].copy_from_slice(&ctx.t[..8]); + } +} +fn ipred_y_8x8_hor(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + for (row, &l) in buf.chunks_mut(stride).zip(ctx.l.iter()).take(8) { + row[..8].copy_from_slice(&[l; 8]); + } +} +fn ipred_y_8x8_diag_down_left(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut t = [0u16; 16]; + load(&mut t, &ctx.t); + + for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { + for (x, pix) in row.iter_mut().take(8).enumerate() { + *pix = (if (x != 7) || (y != 7) { + t[x + y] + 2 * t[x + y + 1] + t[x + y + 2] + } else { + t[14] + 3 * t[15] + } + 2) >> 2; + } + } +} +fn ipred_y_8x8_diag_down_right(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut t = [0u16; 9]; + t[0] = ctx.tl; + load(&mut t[1..], &ctx.t); + let mut l = [0u16; 9]; + l[0] = ctx.tl; + load(&mut l[1..], &ctx.l); + let diag = t[1] + 2 * t[0] + l[1]; + + for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { + for (x, pix) in row.iter_mut().take(8).enumerate() { + *pix = (if x > y { + t[x - y - 1] + 2 * t[x - y] + t[x - y + 1] + } else if x < y { + l[y - x - 1] + 2 * l[y - x] + l[y - x + 1] + } else { + diag + } + 2) >> 2; + } + } +} +fn ipred_y_8x8_ver_right(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut t = [0u16; 9]; + t[0] = ctx.tl; + load(&mut t[1..], &ctx.t); + let mut l = [0u16; 9]; + l[0] = ctx.tl; + load(&mut l[1..], &ctx.l); + + for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { + for (x, pix) in row.iter_mut().take(8).enumerate() { + let zvr = 2 * (x as i8) - (y as i8); + *pix = if zvr >= 0 { + let ix = x - (y >> 1); + if (zvr & 1) == 0 { + (t[ix] + t[ix + 1] + 1) >> 1 + } else { + (t[ix - 1] + 2 * t[ix] + t[ix + 1] + 2) >> 2 + } + } else if zvr == -1 { + (l[1] + 2 * l[0] + t[1] + 2) >> 2 + } else { + let ix = y - 2 * x; + (l[ix] + 2 * l[ix - 1] + l[ix - 2] + 2) >> 2 + }; + } + } +} +fn ipred_y_8x8_ver_left(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut t = [0u16; 16]; + load(&mut t, &ctx.t); + + for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { + for (x, pix) in row.iter_mut().take(8).enumerate() { + let ix = x + (y >> 1); + *pix = if (y & 1) == 0 { + (t[ix] + t[ix + 1] + 1) >> 1 + } else { + (t[ix] + 2 * t[ix + 1] + t[ix + 2] + 2) >> 2 + }; + } + } + +} +fn ipred_y_8x8_hor_down(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut t = [0u16; 9]; + t[0] = ctx.tl; + load(&mut t[1..], &ctx.t); + let mut l = [0u16; 9]; + l[0] = ctx.tl; + load(&mut l[1..], &ctx.l); + + for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { + for (x, pix) in row.iter_mut().take(8).enumerate() { + let zhd = 2 * (y as i8) - (x as i8); + *pix = if zhd >= 0 { + let ix = y - (x >> 1); + if (zhd & 1) == 0 { + (l[ix] + l[ix + 1] + 1) >> 1 + } else { + (l[ix - 1] + 2 * l[ix] + l[ix + 1] + 2) >> 2 + } + } else if zhd == -1 { + (l[1] + 2 * l[0] + t[1] + 2) >> 2 + } else { + let ix = x - 2 * y; + (t[ix] + 2 * t[ix - 1] + t[ix - 2] + 2) >> 2 + }; + } + } +} +fn ipred_y_8x8_hor_up(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut l = [0u16; 8]; + load(&mut l, &ctx.l); + + for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { + for (x, pix) in row.iter_mut().take(8).enumerate() { + let zhu = x + 2 * y; + let ix = y + (x >> 1); + *pix = if zhu > 13 { + l[7] + } else if zhu == 13 { + (l[6] + 3 * l[7] + 2) >> 2 + } else if (zhu & 1) != 0 { + (l[ix] + 2 * l[ix + 1] + l[ix + 2] + 2) >> 2 + } else { + (l[ix] + l[ix + 1] + 1) >> 1 + }; + } + } +} +fn ipred_y_8x8_dc(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut sum = 0u16; + for &t in ctx.t[..8].iter() { + sum += t; + } + for &l in ctx.l[..8].iter() { + sum += l; + } + let dc = (sum + 8) >> 4; + for row in buf.chunks_mut(stride).take(8) { + for pix in row.iter_mut().take(8) { + *pix = dc; + } + } +} +fn ipred_y_8x8_left_dc(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut sum = 0u16; + for &l in ctx.l[..8].iter() { + sum += l; + } + let dc = (sum + 4) >> 3; + for row in buf.chunks_mut(stride).take(8) { + for pix in row.iter_mut().take(8) { + *pix = dc; + } + } +} +fn ipred_y_8x8_top_dc(buf: &mut [u16], stride: usize, ctx: &IPred8Context) { + let mut sum = 0u16; + for &t in ctx.t[..8].iter() { + sum += t; + } + let dc = (sum + 4) >> 3; + for row in buf.chunks_mut(stride).take(8) { + for pix in row.iter_mut().take(8) { + *pix = dc; + } + } +} + +fn ipred_8x8_ver(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) { + ipred_ver(buf, stride, top, 8); +} +fn ipred_8x8_hor(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) { + ipred_hor(buf, stride, left, 8); +} +fn ipred_8x8_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) { + let mut l = [0; 8]; + load(&mut l, &left[1..]); + let mut t = [0; 8]; + load(&mut t, top); + + let dc0 = (t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3; + let sum1 = t[4] + t[5] + t[6] + t[7]; + let dc1 = (sum1 + 2) >> 2; + let sum2 = l[4] + l[5] + l[6] + l[7]; + let dc2 = (sum2 + 2) >> 2; + let dc3 = (sum1 + sum2 + 4) >> 3; + + for row in buf.chunks_mut(stride).take(4) { + row[..4].copy_from_slice(&[dc0; 4]); + row[4..8].copy_from_slice(&[dc1; 4]); + } + for row in buf.chunks_mut(stride).skip(4).take(4) { + row[..4].copy_from_slice(&[dc2; 4]); + row[4..8].copy_from_slice(&[dc3; 4]); + } +} +fn ipred_8x8_left_dc(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) { + let mut left_dc0 = 0; + let mut left_dc1 = 0; + for &el in left[1..].iter().take(4) { + left_dc0 += el; + } + for &el in left[1..].iter().skip(4).take(4) { + left_dc1 += el; + } + let dc0 = (left_dc0 + 2) >> 2; + let dc2 = (left_dc1 + 2) >> 2; + for row in buf.chunks_mut(stride).take(4) { + row[..8].copy_from_slice(&[dc0; 8]); + } + for row in buf.chunks_mut(stride).skip(4).take(4) { + row[..8].copy_from_slice(&[dc2; 8]); + } +} +fn ipred_8x8_top_dc(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) { + ipred_top_dc(buf, stride, top, 4, 2); + ipred_top_dc(&mut buf[4..], stride, &top[4..], 4, 2); + let mut top = [0; 8]; + top.copy_from_slice(&buf[stride * 3..][..8]); + ipred_top_dc(&mut buf[4 * stride..], stride, &top, 4, 2); + ipred_top_dc(&mut buf[4 + 4 * stride..], stride, &top[4..], 4, 2); +} + +fn ipred_16x16_ver(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) { + ipred_ver(buf, stride, top, 16); +} +fn ipred_16x16_hor(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) { + ipred_hor(buf, stride, left, 16); +} +fn ipred_16x16_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) { + ipred_dc(buf, stride, top, left, 16, 5); +} +fn ipred_16x16_left_dc(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) { + ipred_left_dc(buf, stride, left, 16, 4); +} +fn ipred_16x16_top_dc(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) { + ipred_top_dc(buf, stride, top, 16, 4); +} + +pub type AddCoeffsFunc = fn(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16]); +pub type AddCoeffs8Func = fn(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16; 64]); +pub type IPred4x4Func = fn(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], tr: &[u16]); +pub type IPred8x8Func = fn(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]); +pub type IPred8x8LumaFunc = fn(buf: &mut [u16], stride: usize, ctx: &IPred8Context); + +pub const IPRED4_DC128: usize = 11; +pub const IPRED4_DC_TOP: usize = 10; +pub const IPRED4_DC_LEFT: usize = 9; +pub const IPRED8_DC128: usize = 6; +pub const IPRED8_DC_TOP: usize = 5; +pub const IPRED8_DC_LEFT: usize = 4; + +pub struct IPredFuncs { + pub fill_val: u16, + pub add_coeffs: AddCoeffsFunc, + pub add_coeffs8: AddCoeffs8Func, + pub ipred4x4: [IPred4x4Func; 12], + pub ipred8x8_luma: [IPred8x8LumaFunc; 12], + pub ipred8x8_chroma: [IPred8x8Func; 7], + pub ipred16x16: [IPred8x8Func; 7], +} + +static IPRED_FUNCS: [IPredFuncs; 1] = [ + IPredFuncs { + fill_val: 0x200, + add_coeffs: add_coeffs_10, + add_coeffs8: add_coeffs8_10, + ipred4x4: [ + ipred_4x4_ver, ipred_4x4_hor, ipred_4x4_dc, + ipred_4x4_diag_down_left, ipred_4x4_diag_down_right, + ipred_4x4_ver_right, ipred_4x4_hor_down, ipred_4x4_ver_left, ipred_4x4_hor_up, + ipred_4x4_left_dc, ipred_4x4_top_dc, ipred_4x4_dc128_10 + ], + ipred8x8_luma: [ + ipred_y_8x8_ver, ipred_y_8x8_hor, ipred_y_8x8_dc, + ipred_y_8x8_diag_down_left, ipred_y_8x8_diag_down_right, + ipred_y_8x8_ver_right, ipred_y_8x8_hor_down, + ipred_y_8x8_ver_left, ipred_y_8x8_hor_up, + ipred_y_8x8_left_dc, ipred_y_8x8_top_dc, ipred_y_8x8_dc128_10 + ], + ipred8x8_chroma: [ + ipred_8x8_dc, ipred_8x8_hor, ipred_8x8_ver, ipred_8x8_plane_10, + ipred_8x8_left_dc, ipred_8x8_top_dc, ipred_8x8_dc128_10 + ], + ipred16x16: [ + ipred_16x16_ver, ipred_16x16_hor, ipred_16x16_dc, ipred_16x16_plane_10, + ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128_10 + ] + } +]; + +pub fn find_ipred_funcs(fill_val: u16) -> &'static IPredFuncs { + for ipf in IPRED_FUNCS.iter() { + if ipf.fill_val == fill_val { + return ipf; + } + } + unreachable!() +} + +macro_rules! loop_filter { + (lumaedge; $buf: expr, $off: expr, $step: expr, $alpha: expr, $beta: expr, $clip: ident) => { + let p2 = $buf[$off - $step * 3] as i16; + let p1 = $buf[$off - $step * 2] as i16; + let p0 = $buf[$off - $step] as i16; + let q0 = $buf[$off] as i16; + let q1 = $buf[$off + $step] as i16; + let q2 = $buf[$off + $step * 2] as i16; + let a_p = (p2 - p0).abs() < $beta; + let a_q = (q2 - q0).abs() < $beta; + if a_p && (p0 - q0).abs() < (($alpha >> 2) + 2) { + let p3 = $buf[$off - $step * 4] as i16; + $buf[$off - $step * 3] = ((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) as u16; + $buf[$off - $step * 2] = ((p2 + p1 + p0 + q0 + 2) >> 2) as u16; + $buf[$off - $step] = ((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) as u16; + } else { + $buf[$off - $step] = ((2 * p1 + p0 + q1 + 2) >> 2) as u16; + } + if a_q && (p0 - q0).abs() < (($alpha >> 2) + 2) { + let q3 = $buf[$off + $step * 3] as i16; + $buf[$off] = ((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) as u16; + $buf[$off + $step] = ((p0 + q0 + q1 + q2 + 2) >> 2) as u16; + $buf[$off + $step * 2] = ((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) as u16; + } else { + $buf[$off] = ((2 * q1 + q0 + p1 + 2) >> 2) as u16; + } + }; + (chromaedge; $buf: expr, $off: expr, $step: expr, $clip:ident) => { + let p1 = $buf[$off - $step * 2] as i16; + let p0 = $buf[$off - $step] as i16; + let q0 = $buf[$off] as i16; + let q1 = $buf[$off + $step] as i16; + $buf[$off - $step] = ((2 * p1 + p0 + q1 + 2) >> 2) as u16; + $buf[$off] = ((2 * q1 + q0 + p1 + 2) >> 2) as u16; + }; + (lumanormal; $buf: expr, $off: expr, $step: expr, $tc0: expr, $beta: expr, $clip: ident) => { + let p2 = $buf[$off - $step * 3] as i16; + let p1 = $buf[$off - $step * 2] as i16; + let p0 = $buf[$off - $step] as i16; + let q0 = $buf[$off] as i16; + let q1 = $buf[$off + $step] as i16; + let q2 = $buf[$off + $step * 2] as i16; + let a_p = (p2 - p0).abs() < $beta; + let a_q = (q2 - q0).abs() < $beta; + let tc = $tc0 + (a_p as i16) + (a_q as i16); + let delta = (((q0 - p0) * 4 + (p1 - q1) + 4) >> 3).max(-tc).min(tc); + if a_p && ($tc0 > 0) { + $buf[$off - $step * 2] = $clip(p1 + ((p2 + ((p0 + q0 + 1) >> 1) - p1 * 2) >> 1).max(-$tc0).min($tc0)); + } + $buf[$off - $step] = $clip(p0 + delta); + $buf[$off] = $clip(q0 - delta); + if a_q && ($tc0 > 0) { + $buf[$off + $step] = $clip(q1 + ((q2 + ((p0 + q0 + 1) >> 1) - q1 * 2) >> 1).max(-$tc0).min($tc0)); + } + }; + (chromanormal; $buf: expr, $off: expr, $step: expr, $tc0: expr, $clip: ident) => { + let p1 = $buf[$off - $step * 2] as i16; + let p0 = $buf[$off - $step] as i16; + let q0 = $buf[$off] as i16; + let q1 = $buf[$off + $step] as i16; + let tc = $tc0 + 1; + let delta = (((q0 - p0) * 4 + (p1 - q1) + 4) >> 3).max(-tc).min(tc); + $buf[$off - $step] = $clip(p0 + delta); + $buf[$off] = $clip(q0 - delta); + } +} + +fn check_filter(buf: &[u16], off: usize, step: usize, alpha: i16, beta: i16) -> bool { + let p1 = buf[off - step * 2] as i16; + let p0 = buf[off - step] as i16; + let q0 = buf[off] as i16; + let q1 = buf[off + step] as i16; + (p0 - q0).abs() < alpha && (p1 - p0).abs() < beta && (q1 - q0).abs() < beta +} + +fn check_filter4(buf: &[u16], mut off: usize, step: usize, stride: usize, alpha: i16, beta: i16) -> [bool; 4] { + let mut flags = [false; 4]; + for flag in flags.iter_mut() { + let p1 = buf[off - step * 2] as i16; + let p0 = buf[off - step] as i16; + let q0 = buf[off] as i16; + let q1 = buf[off + step] as i16; + *flag = (p0 - q0).abs() < alpha && (p1 - p0).abs() < beta && (q1 - q0).abs() < beta; + off += stride; + } + flags +} + +fn loop_filter_lumaedge_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16) { + let flags = check_filter4(dst, off, 1, stride, alpha, beta); + for &flag in flags.iter() { + if flag { + loop_filter!(lumaedge; dst, off, 1, alpha, beta, clip10); + } + off += stride; + } +} +fn loop_filter_lumaedge_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16) { + let flags = check_filter4(dst, off, stride, 1, alpha, beta); + for (x, &flag) in flags.iter().enumerate() { + if flag { + loop_filter!(lumaedge; dst, off + x, stride, alpha, beta, clip10); + } + } +} +fn loop_filter_lumanormal_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) { + let flags = check_filter4(dst, off, 1, stride, alpha, beta); + for &flag in flags.iter() { + if flag { + loop_filter!(lumanormal; dst, off, 1, tc0, beta, clip10); + } + off += stride; + } +} +fn loop_filter_lumanormal_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) { + let flags = check_filter4(dst, off, stride, 1, alpha, beta); + for (x, &flag) in flags.iter().enumerate() { + if flag { + loop_filter!(lumanormal; dst, off + x, stride, tc0, beta, clip10); + } + } +} +fn loop_filter_chromaedge_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16) { + for _ in 0..2 { + if check_filter(dst, off, 1, alpha, beta) { + loop_filter!(chromaedge; dst, off, 1, clip10); + } + off += stride; + } +} +fn loop_filter_chromaedge_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16) { + for x in 0..2 { + if check_filter(dst, off + x, stride, alpha, beta) { + loop_filter!(chromaedge; dst, off + x, stride, clip10); + } + } +} +fn loop_filter_chromanormal_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) { + for _ in 0..2 { + if check_filter(dst, off, 1, alpha, beta) { + loop_filter!(chromanormal; dst, off, 1, tc0, clip10); + } + off += stride; + } +} +fn loop_filter_chromanormal_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) { + for x in 0..2 { + if check_filter(dst, off + x, stride, alpha, beta) { + loop_filter!(chromanormal; dst, off + x, stride, tc0, clip10); + } + } +} + +const ALPHA: [i16; 52] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28, + 32, 36, 40, 45, 50, 56, 63, 71, 80, 90, 100, 113, 127, 144, 162, 182, + 203, 226, 255, 255 +]; +const BETA: [i16; 52] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, + 17, 17, 18, 18 +]; + +const TC0: [[u8; 3]; 52] = [ + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 1], [ 0, 0, 1], [ 0, 0, 1], + [ 0, 0, 1], [ 0, 1, 1], [ 0, 1, 1], [ 1, 1, 1], + [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 2], + [ 1, 1, 2], [ 1, 1, 2], [ 1, 1, 2], [ 1, 2, 3], + [ 1, 2, 3], [ 2, 2, 3], [ 2, 2, 4], [ 2, 3, 4], + [ 2, 3, 4], [ 3, 3, 5], [ 3, 4, 6], [ 3, 4, 6], + [ 4, 5, 7], [ 4, 5, 8], [ 4, 6, 9], [ 5, 7, 10], + [ 6, 8, 11], [ 6, 8, 13], [ 7, 10, 14], [ 8, 11, 16], + [ 9, 12, 18], [10, 13, 20], [11, 15, 23], [13, 17, 25] +]; + +fn get_lf_idx(qp0: u8, qp1: u8, off: i8) -> usize { + (i16::from((qp0 + qp1 + 1) >> 1) + i16::from(off)).clamp(0, 51) as usize +} + +macro_rules! filter_edge_func { + ($funcname: ident, $edgefilter: ident, $normfilter: ident, $shift: expr) => { + fn $funcname(dst: &mut [u16], off: usize, stride: usize, dmode: u8, quants: [u8; 2], alpha_off: i8, beta_off: i8) { + let q = quants[0]; + let qleft = quants[1]; + if dmode != 0 { + let index_a = get_lf_idx(q, qleft, alpha_off); + let alpha = ALPHA[index_a] << $shift; + let beta = BETA[get_lf_idx(q, qleft, beta_off)] << $shift; + if dmode == 4 { + $edgefilter(dst, off, stride, alpha, beta); + } else { + let tc0 = i16::from(TC0[index_a][(dmode - 1) as usize]) << $shift; + $normfilter(dst, off, stride, alpha, beta, tc0); + } + } + } + } +} + +filter_edge_func!(filter_edge_y_v_10, loop_filter_lumaedge_v_10, loop_filter_lumanormal_v_10, 10 - 8); +filter_edge_func!(filter_edge_y_h_10, loop_filter_lumaedge_h_10, loop_filter_lumanormal_h_10, 10 - 8); +filter_edge_func!(filter_edge_c_v_10, loop_filter_chromaedge_v_10, loop_filter_chromanormal_v_10, 10 - 8); +filter_edge_func!(filter_edge_c_h_10, loop_filter_chromaedge_h_10, loop_filter_chromanormal_h_10, 10 - 8); + +pub type LoopFilterFn = fn (dst: &mut [u16], off: usize, stride: usize, dmode: u8, quants: [u8; 2], alpha_off: i8, beta_off: i8); + +pub struct LoopFilterFuncs { + pub fill_val: u16, + pub filter_edge_y_v: LoopFilterFn, + pub filter_edge_y_h: LoopFilterFn, + pub filter_edge_c_v: LoopFilterFn, + pub filter_edge_c_h: LoopFilterFn, +} + +static LOOP_FILTER_FUNCS: [LoopFilterFuncs; 1] = [ + LoopFilterFuncs { + fill_val: 0x200, + filter_edge_y_v: filter_edge_y_v_10, + filter_edge_y_h: filter_edge_y_h_10, + filter_edge_c_v: filter_edge_c_v_10, + filter_edge_c_h: filter_edge_c_h_10, + } +]; + +pub fn find_loop_filter_funcs(fill_val: u16) -> &'static LoopFilterFuncs { + for lf in LOOP_FILTER_FUNCS.iter() { + if lf.fill_val == fill_val { + return lf; + } + } + unreachable!() +} diff --git a/nihav-itu/src/codecs/h264/high/loopfilter.rs b/nihav-itu/src/codecs/h264/high/loopfilter.rs new file mode 100644 index 0000000..46dfb72 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/loopfilter.rs @@ -0,0 +1,55 @@ +use nihav_core::frame::NASimpleVideoFrame; +use super::types::SliceState; +use super::dsp::*; + +pub fn loop_filter_mb(frm: &mut NASimpleVideoFrame, sstate: &SliceState, alpha_off: i8, beta_off: i8) { + let lf = find_loop_filter_funcs(sstate.def_fill); + + let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; + let uoff = frm.offset[1] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[1]; + let voff = frm.offset[2] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[2]; + let mb_idx = sstate.mb.xpos + sstate.mb_x; + + let lqy = sstate.mb.data[mb_idx - 1].qp_y; + let lqu = sstate.mb.data[mb_idx - 1].qp_u; + let lqv = sstate.mb.data[mb_idx - 1].qp_v; + let qy = sstate.mb.data[mb_idx].qp_y; + let qu = sstate.mb.data[mb_idx].qp_u; + let qv = sstate.mb.data[mb_idx].qp_v; + + for (y, dmodes) in sstate.deblock.chunks(4).enumerate() { + (lf.filter_edge_y_v)(frm.data, yoff + y * 4 * frm.stride[0], frm.stride[0], dmodes[0] & 0xF, [qy, lqy], alpha_off, beta_off); + for x in 1..4 { + (lf.filter_edge_y_v)(frm.data, yoff + x * 4 + y * 4 * frm.stride[0], frm.stride[0], dmodes[x] & 0xF, [qy, qy], alpha_off, beta_off); + } + (lf.filter_edge_c_v)(frm.data, uoff + y * 2 * frm.stride[1], frm.stride[1], dmodes[0] & 0xF, [qu, lqu], alpha_off, beta_off); + (lf.filter_edge_c_v)(frm.data, uoff + y * 2 * frm.stride[1] + 4, frm.stride[1], dmodes[2] & 0xF, [qu, qu], alpha_off, beta_off); + (lf.filter_edge_c_v)(frm.data, voff + y * 2 * frm.stride[2], frm.stride[2], dmodes[0] & 0xF, [qv, lqv], alpha_off, beta_off); + (lf.filter_edge_c_v)(frm.data, voff + y * 2 * frm.stride[2] + 4, frm.stride[2], dmodes[2] & 0xF, [qv, qv], alpha_off, beta_off); + } + + let tqy = sstate.mb.data[mb_idx - sstate.mb.stride].qp_y; + let tqu = sstate.mb.data[mb_idx - sstate.mb.stride].qp_u; + let tqv = sstate.mb.data[mb_idx - sstate.mb.stride].qp_v; + + let dmodes = &sstate.deblock; + for x in 0..4 { + (lf.filter_edge_y_h)(frm.data, yoff + x * 4, frm.stride[0], dmodes[x] >> 4, [qy, tqy], alpha_off, beta_off); + } + for x in 0..4 { + (lf.filter_edge_c_h)(frm.data, uoff + x * 2, frm.stride[1], dmodes[x] >> 4, [qu, tqu], alpha_off, beta_off); + (lf.filter_edge_c_h)(frm.data, voff + x * 2, frm.stride[2], dmodes[x] >> 4, [qv, tqv], alpha_off, beta_off); + } + + for (y, dmodes) in sstate.deblock.chunks(4).enumerate().skip(1) { + for x in 0..4 { + (lf.filter_edge_y_h)(frm.data, yoff + x * 4 + y * 4 * frm.stride[0], frm.stride[0], dmodes[x] >> 4, [qy, qy], alpha_off, beta_off); + } + } + + let dmodes = &sstate.deblock[4 * 2..]; + for x in 0..4 { + (lf.filter_edge_c_h)(frm.data, uoff + x * 2 + frm.stride[1] * 4, frm.stride[1], dmodes[x] >> 4, [qu, qu], alpha_off, beta_off); + (lf.filter_edge_c_h)(frm.data, voff + x * 2 + frm.stride[2] * 4, frm.stride[2], dmodes[x] >> 4, [qv, qv], alpha_off, beta_off); + } +} diff --git a/nihav-itu/src/codecs/h264/high/mb_recon.rs b/nihav-itu/src/codecs/h264/high/mb_recon.rs new file mode 100644 index 0000000..9a0face --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/mb_recon.rs @@ -0,0 +1,939 @@ +use nihav_core::codecs::{DecoderResult, DecoderError}; +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::super::{CurrentMBInfo, I4X4_SCAN, Shareable}; +use super::dispatch::{ThreadDispatcher, FrameDecodingStatus}; +use super::dsp::*; +use super::pic_ref::SimplifiedSliceRefs; +use super::super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO}; +use super::super::common_types::*; +use super::types::*; + +fn pred_intra(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { + let ipf = find_ipred_funcs(sstate.def_fill); + let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; + match mb_info.mb_type { + MBType::Intra16x16(imode, _, _) => { + let id = if imode != 2 || (sstate.has_top && sstate.has_left) { + imode as usize + } else if !sstate.has_top && !sstate.has_left { + IPRED8_DC128 + } else if !sstate.has_left { + IPRED8_DC_TOP + } else { + IPRED8_DC_LEFT + }; + (ipf.ipred16x16[id])(&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y); + }, + MBType::Intra8x8 => { + let mut ictx = IPred8Context::new(sstate.def_fill); + for part in 0..4 { + let x = (part & 1) * 2; + let y = part & 2; + let blk4 = x + y * 4; + + let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0]; + let has_top = y > 0 || sstate.has_top; + let has_left = x > 0 || sstate.has_left; + let imode = mb_info.ipred[blk4]; + let id = if imode != IntraPredMode::DC || (has_top && has_left) { + let im_id: u8 = imode.into(); + im_id as usize + } else if !has_top && !has_left { + IPRED4_DC128 + } else if !has_left { + IPRED4_DC_TOP + } else { + IPRED4_DC_LEFT + }; + let mb_idx = sstate.mb_x + sstate.mb_y * sstate.mb_w; + let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2); + let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w); + if id != IPRED4_DC128 { + let top = if y == 0 { + &sstate.top_line_y[sstate.mb_x * 16 + x * 4..] + } else { + &frm.data[cur_yoff - frm.stride[0]..] + }; + let mut left_buf = [0; 9]; + let left = if x == 0 { + &sstate.left_y[y * 4..] + } else { + if has_tl { + if y == 0 { + left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; + } else { + left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]]; + } + } + if has_left { + for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { + *dst = src[0]; + } + } + &left_buf + }; + ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl); + } + (ipf.ipred8x8_luma[id])(&mut frm.data[cur_yoff..], frm.stride[0], &ictx); + if mb_info.coded[blk4] { + (ipf.add_coeffs8)(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs8x8[part].coeffs); + } + } + }, + MBType::Intra4x4 => { + for &(x,y) in I4X4_SCAN.iter() { + let x = x as usize; + let y = y as usize; + let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0]; + let has_top = y > 0 || sstate.has_top; + let has_left = x > 0 || sstate.has_left; + let imode = mb_info.ipred[x + y * 4]; + let id = if imode != IntraPredMode::DC || (has_top && has_left) { + let im_id: u8 = imode.into(); + im_id as usize + } else if !has_top && !has_left { + IPRED4_DC128 + } else if !has_left { + IPRED4_DC_TOP + } else { + IPRED4_DC_LEFT + }; + let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3); + let tr: [u16; 4] = if y == 0 { + let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]; + if has_top && !noright { + [tsrc[4], tsrc[5], tsrc[6], tsrc[7]] + } else if has_top { + [tsrc[3]; 4] + } else { + [0; 4] + } + } else if (x & 1) == 0 || (x == 1 && y == 2) { + let i = cur_yoff - frm.stride[0]; + [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]] + } else { + let i = cur_yoff - frm.stride[0]; + [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]] + }; + let mut top = [sstate.def_fill; 4]; + let mut left = [sstate.def_fill; 9]; + if y == 0 { + if has_top { + top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]); + } + } else { + top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]); + } + if x == 0 { + if has_left { + for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) { + *dst = src; + } + } + } else { + if y == 0 { + if x == 0 { + left[0] = sstate.left_y[y * 4]; + } else if has_top { + left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; + } + } else { + left[0] = frm.data[cur_yoff - frm.stride[0] - 1]; + } + for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { + *dst = row[0]; + } + } + (ipf.ipred4x4[id])(&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr); + if mb_info.coded[x + y * 4] { + (ipf.add_coeffs)(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]); + } + } + }, + _ => unreachable!(), + }; + let id = if mb_info.chroma_ipred != 0 || (sstate.has_top && sstate.has_left) { + mb_info.chroma_ipred as usize + } else if !sstate.has_top && !sstate.has_left { + IPRED8_DC128 + } else if !sstate.has_left { + IPRED8_DC_TOP + } else { + IPRED8_DC_LEFT + }; + for chroma in 1..3 { + let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; + let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..]; + (ipf.ipred8x8_chroma[id])(&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]); + } +} + +fn add_luma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { + let ipf = find_ipred_funcs(sstate.def_fill); + let mut yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; + if !mb_info.transform_size_8x8 { + for y in 0..4 { + for x in 0..4 { + if mb_info.coded[x + y * 4] { + (ipf.add_coeffs)(frm.data, yoff + x * 4, frm.stride[0], &mb_info.coeffs[x + y * 4]); + } + } + yoff += frm.stride[0] * 4; + } + } else { + for y in 0..2 { + for x in 0..2 { + if mb_info.coded[x * 2 + y * 2 * 4] { + (ipf.add_coeffs8)(frm.data, yoff + x * 8, frm.stride[0], &mb_info.coeffs8x8[x + y * 2].coeffs); + } + } + yoff += frm.stride[0] * 8; + } + } +} + +fn add_chroma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { + let ipf = find_ipred_funcs(sstate.def_fill); + for chroma in 1..3 { + let mut off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; + for y in 0..2 { + for x in 0..2 { + let blk_no = 16 + (chroma - 1) * 4 + x + y * 2; + if mb_info.coded[blk_no] || mb_info.coeffs[blk_no][0] != 0 { + (ipf.add_coeffs)(frm.data, off + x * 4, frm.stride[chroma], &mb_info.coeffs[blk_no]); + } + } + off += frm.stride[chroma] * 4; + } + } +} + +fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<&SimpleFrame>, weight: &WeightInfo, mc_dsp: &mut H264MC) { + if let Some(buf) = ref_pic { + if !weight.is_weighted() { + mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv); + } else { + let mut tmp = McBlock::new(); + mc_dsp.mc_blocks(&mut tmp, buf, xpos, ypos, w, h, mv); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = if weight.luma_weighted { + [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8] + } else { + [1, 0, 0] + }; + let wmode = match w { + 2 => 0, + 4 => 1, + 8 => 2, + _ => 3, + }; + (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp.y, h, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw = if weight.chroma_weighted { + [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8] + } else { + [1, 0, 0] + }; + let csrc = if chroma == 0 { &tmp.u } else { &tmp.v }; + (mc_dsp.put_block_weighted[wmode - 1])(&mut frm.data[coff..], cstride, csrc, h / 2, cw); + } + } + } else { + mc_dsp.gray_block(frm, xpos, ypos, w, h); + } +} + +#[allow(clippy::match_like_matches_macro)] +fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { + let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) { + (BMode::L0, true, _) => true, + (BMode::L1, _, true) => true, + (BMode::Bi, true, true) => true, + _ => false, + }; + if !do_weight { + match mode { + BMode::L0 => { + if let Some(buf) = ref_pic0 { + mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv0); + } else { + mc_dsp.gray_block(frm, xpos, ypos, w, h); + } + }, + BMode::L1 => { + if let Some(buf) = ref_pic1 { + mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv1); + } else { + mc_dsp.gray_block(frm, xpos, ypos, w, h); + } + }, + BMode::Bi => { + match (ref_pic0, ref_pic1) { + (Some(buf0), Some(buf1)) => { + mc_dsp.do_mc(frm, buf0, xpos, ypos, w, h, mv0); + mc_dsp.do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1); + }, + (Some(buf0), None) => { + mc_dsp.do_mc(frm, buf0, xpos, ypos, w, h, mv0); + }, + (None, Some(buf1)) => { + mc_dsp.do_mc(frm, buf1, xpos, ypos, w, h, mv1); + }, + (None, None) => { + mc_dsp.gray_block(frm, xpos, ypos, w, h); + }, + }; + }, + }; + } else { + let mut tmp0 = McBlock::new(); + let mut tmp1 = McBlock::new(); + match (mode, ref_pic0, ref_pic1) { + (BMode::L0, Some(buf), _) | (BMode::L1, _, Some(buf)) => { + let (mv, weight) = if mode == BMode::L0 { (mv0, weight0) } else { (mv1, weight1) }; + mc_dsp.mc_blocks(&mut tmp0, buf, xpos, ypos, w, h, mv); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = if weight.luma_weighted { + [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8] + } else { + [1, 0, 0] + }; + let wmode = match w { + 2 => 0, + 4 => 1, + 8 => 2, + _ => 3, + }; + (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, h, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw = if weight.chroma_weighted { + [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8] + } else { + [1, 0, 0] + }; + let csrc = if chroma == 0 { &tmp0.u } else { &tmp0.v }; + (mc_dsp.put_block_weighted[wmode - 1])(&mut frm.data[coff..], cstride, csrc, h / 2, cw); + } + }, + (BMode::Bi, Some(buf0), Some(buf1)) => { // do both and avg + mc_dsp.mc_blocks(&mut tmp0, buf0, xpos, ypos, w, h, mv0); + mc_dsp.mc_blocks(&mut tmp1, buf1, xpos, ypos, w, h, mv1); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = match (weight0.luma_weighted, weight1.luma_weighted) { + (true, true) => [weight0.luma_weight, weight0.luma_offset, weight1.luma_weight, weight1.luma_offset, weight0.luma_shift as i8], + (true, false) => [weight0.luma_weight, weight0.luma_offset, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8], + (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8], + (false, false) => [1, 0, 1, 0, 0], + }; + let wmode = match w { + 2 => 0, + 4 => 1, + 8 => 2, + _ => 3, + }; + (mc_dsp.put_block_weighted2[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, &tmp1.y, h, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw0 = weight0.chroma_weight[chroma]; + let co0 = weight0.chroma_offset[chroma]; + let cw1 = weight1.chroma_weight[chroma]; + let co1 = weight1.chroma_offset[chroma]; + let cw = match (weight0.chroma_weighted, weight1.chroma_weighted) { + (true, true) => [cw0, co0, cw1, co1, weight0.luma_shift as i8], + (true, false) => [cw0, co0, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8], + (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8], + (false, false) => [1, 0, 1, 0, 0], + }; + let csrc0 = if chroma == 0 { &tmp0.u } else { &tmp0.v }; + let csrc1 = if chroma == 0 { &tmp1.u } else { &tmp1.v }; + (mc_dsp.put_block_weighted2[wmode - 1])(&mut frm.data[coff..], cstride, csrc0, csrc1, h / 2, cw); + } + }, + _ => { + mc_dsp.gray_block(frm, xpos, ypos, w, h); + }, + }; + } +} + +fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, mv: &[MV; 2], ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) { + if !weight0.is_weighted() || !weight1.is_weighted() { + match (ref_pic0, ref_pic1) { + (Some(buf0), Some(buf1)) => { + mc_dsp.do_mc(frm, buf0, xpos, ypos, 4, 4, mv[0]); + mc_dsp.do_mc_avg(frm, buf1, xpos, ypos, 4, 4, mv[1]); + }, + (Some(buf0), None) => { + mc_dsp.do_mc(frm, buf0, xpos, ypos, 4, 4, mv[0]); + }, + (None, Some(buf1)) => { + mc_dsp.do_mc(frm, buf1, xpos, ypos, 4, 4, mv[1]); + }, + (None, None) => { + mc_dsp.gray_block(frm, xpos, ypos, 4, 4); + }, + }; + } else { + let mut tmp0 = McBlock::new(); + let mut tmp1 = McBlock::new(); + match (ref_pic0, ref_pic1) { + (Some(buf0), Some(buf1)) => { // do both and avg + mc_dsp.mc_blocks(&mut tmp0, buf0, xpos, ypos, 4, 4, mv[0]); + mc_dsp.mc_blocks(&mut tmp1, buf1, xpos, ypos, 4, 4, mv[1]); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = match (weight0.luma_weighted, weight1.luma_weighted) { + (true, true) => [weight0.luma_weight, weight0.luma_offset, weight1.luma_weight, weight1.luma_offset, weight0.luma_shift as i8], + (true, false) => [weight0.luma_weight, weight0.luma_offset, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8], + (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8], + (false, false) => [1, 0, 1, 0, 0], + }; + (mc_dsp.put_block_weighted2[1])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, &tmp1.y, 4, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw0 = weight0.chroma_weight[chroma]; + let co0 = weight0.chroma_offset[chroma]; + let cw1 = weight1.chroma_weight[chroma]; + let co1 = weight1.chroma_offset[chroma]; + let cw = match (weight0.chroma_weighted, weight1.chroma_weighted) { + (true, true) => [cw0, co0, cw1, co1, weight0.luma_shift as i8], + (true, false) => [cw0, co0, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8], + (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8], + (false, false) => [1, 0, 1, 0, 0], + }; + let csrc0 = if chroma == 0 { &tmp0.u } else { &tmp0.v }; + let csrc1 = if chroma == 0 { &tmp1.u } else { &tmp1.v }; + (mc_dsp.put_block_weighted2[0])(&mut frm.data[coff..], cstride, csrc0, csrc1, 2, cw); + } + }, + _ => { + mc_dsp.gray_block(frm, xpos, ypos, 4, 4); + }, + }; + } +} + +fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SimplifiedSliceRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) { + let idx_l0 = ref_l0.index(); + let idx_l1 = ref_l1.index(); + if mode != BMode::Bi || weight_mode != 2 { + (slice_hdr.get_weight(0, idx_l0), slice_hdr.get_weight(1, idx_l1)) + } else if let (Some(Some(ref pic0)), Some(Some(ref pic1))) = (frame_refs.ref_list0.get(idx_l0), frame_refs.ref_list1.get(idx_l1)) { + let r0_poc = pic0.full_id as u16; + let r1_poc = pic1.full_id as u16; + let cur_id = frame_refs.cur_id as u16; + if (r0_poc == r1_poc) || pic0.long_term || pic1.long_term { + return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO); + } + + let td = (i32::from(r1_poc) - i32::from(r0_poc)).clamp(-128, 127); + let tx = (16384 + (td / 2).abs()) / td; + let tb = (i32::from(cur_id) - i32::from(r0_poc)).clamp(-128, 127); + let scale = ((tb * tx + 32) >> 6).clamp(-1024, 1023); + if scale == 128 || (scale >> 2) < -64 || (scale >> 2) > 128 { + return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO); + } + let w1 = (scale >> 2) as i8; + let w0 = 64 - w1; + + let weight0 = WeightInfo { + luma_weighted: true, + luma_weight: w0, + luma_offset: 0, + luma_shift: 5, + chroma_weighted: true, + chroma_weight: [w0; 2], + chroma_offset: [0; 2], + chroma_shift: 5, + }; + let weight1 = WeightInfo { + luma_weighted: true, + luma_weight: w1, + luma_offset: 0, + luma_shift: 5, + chroma_weighted: true, + chroma_weight: [w1; 2], + chroma_offset: [0; 2], + chroma_shift: 5, + }; + + (weight0, weight1) + } else { + (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO) + } +} + +pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8) { + let xpos = sstate.mb_x * 16; + let ypos = sstate.mb_y * 16; + + match mb_info.mb_type { + MBType::Intra16x16(_, _, _) => { + pred_intra(frm, sstate, mb_info); + }, + MBType::Intra4x4 | MBType::Intra8x8 => { + pred_intra(frm, sstate, mb_info); + }, + MBType::PCM => {}, + MBType::PSkip => { + let mv = sstate.get_cur_blk4(0).mv[0]; + let rpic = frame_refs.select_ref_pic(0, 0); + let weight = &slice_hdr.get_weight(0, 0); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); + }, + MBType::P16x16 => { + let mv = sstate.get_cur_blk4(0).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); + }, + MBType::P16x8 | MBType::P8x16 => { + let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 { + (16, 8, 0, 8) + } else { + (8, 16, 8, 0) + }; + let mv = sstate.get_cur_blk4(0).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index()); + do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight, mc_dsp); + }, + MBType::P8x8 | MBType::P8x8Ref0 => { + for part in 0..4 { + let bx = (part & 1) * 8; + let by = (part & 2) * 4; + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index()); + + match mb_info.sub_mb_type[part] { + SubMBType::P8x8 => { + do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp); + }, + SubMBType::P8x4 => { + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; + do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp); + }, + SubMBType::P4x8 => { + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; + do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); + }, + SubMBType::P4x4 => { + for sb_no in 0..4 { + let sxpos = xpos + bx + (sb_no & 1) * 4; + let sypos = ypos + by + (sb_no & 2) * 2; + let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; + let mv = sstate.get_cur_blk4(sblk_no).mv[0]; + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp); + } + }, + _ => unreachable!(), + }; + } + }, + MBType::B16x16(mode) => { + let mv0 = sstate.get_cur_blk4(0).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let mv1 = sstate.get_cur_blk4(0).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]); + do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); + }, + MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { + let (pw, ph) = mb_info.mb_type.size(); + let (px, py) = (pw & 8, ph & 8); + let modes = [mode0, mode1]; + let (mut bx, mut by) = (0, 0); + for part in 0..2 { + let blk = if part == 0 { 0 } else { (px / 4) + py }; + let mv0 = sstate.get_cur_blk4(blk).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let mv1 = sstate.get_cur_blk4(blk).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]); + do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); + bx += px; + by += py; + } + }, + MBType::Direct | MBType::BSkip => { + let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type; + let is_16x16 = colo_mb_type.is_16x16_ref(); + + if is_16x16 { + let mv = sstate.get_cur_blk4(0).mv; + let ref_idx = sstate.get_cur_blk8(0).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + } else { + for blk4 in 0..16 { + let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + let mv = &sstate.get_cur_blk4(blk4).mv; + do_b_mc_4x4bi(frm, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, mv, rpic0, &weight0, rpic1, &weight1, mc_dsp); + } + } + sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); }); + }, + MBType::B8x8 => { + for part in 0..4 { + let ridx = sstate.get_cur_blk8(part).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index()); + let subtype = mb_info.sub_mb_type[part]; + let blk8 = (part & 1) * 2 + (part & 2) * 4; + let mut bx = (part & 1) * 8; + let mut by = (part & 2) * 4; + match subtype { + SubMBType::Direct8x8 => { + for blk in 0..4 { + let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + let mv = &sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv; + do_b_mc_4x4bi(frm, xpos + bx, ypos + by, mv, rpic0, &weight0, rpic1, &weight1, mc_dsp); + bx += 4; + if blk == 1 { + bx -= 8; + by += 4; + } + } + sstate.get_cur_blk8(part).ref_idx[0].set_direct(); + sstate.get_cur_blk8(part).ref_idx[1].set_direct(); + }, + SubMBType::B8x8(mode) => { + let mv = sstate.get_cur_blk4(blk8).mv; + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + }, + SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + let (pw, ph) = subtype.size(); + let mv = sstate.get_cur_blk4(blk8).mv; + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); + let mv = sstate.get_cur_blk4(addr2).mv; + do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + }, + SubMBType::B4x4(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + for i in 0..4 { + let addr2 = blk8 + (i & 1) + (i & 2) * 2; + let mv = sstate.get_cur_blk4(addr2).mv; + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + bx += 4; + if i == 1 { + bx -= 8; + by += 4; + } + } + }, + _ => unreachable!(), + }; + } + }, + }; + if !mb_info.mb_type.is_skip() { + if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + add_luma(frm, sstate, mb_info); + } + add_chroma(frm, sstate, mb_info); + } +} + +pub fn wait_for_mb(disp: &Shareable, sstate: &SliceState, xpos: usize, ypos: usize, mv: MV, ref_id: u32) -> DecoderResult<()> { + let xpos = xpos as isize + ((mv.x >> 2) as isize) + 6; + let ypos = ypos as isize + ((mv.y >> 2) as isize) + 6; + let dst_mb_x = ((xpos.max(0) as usize) / 16).min(sstate.mb_w - 1); + let dst_mb_y = ((ypos.max(0) as usize) / 16).min(sstate.mb_h - 1); + let expected_mb = dst_mb_x + dst_mb_y * sstate.mb_w; + loop { + if let Ok(ds) = disp.read() { + match ds.check_pos(ref_id, expected_mb) { + FrameDecodingStatus::Ok => return Ok(()), + FrameDecodingStatus::NotReady => {}, + _ => return Err(DecoderError::MissingReference), + }; + } + std::thread::yield_now(); + } +} + +fn wait_b_mc(disp: &Shareable, sstate: &SliceState, frame_refs: &SimplifiedSliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> { + if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) { + wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[0], ref_id)?; + } + if let Some(ref_id) = frame_refs.get_ref_id(1, ref_idx[1].index()) { + wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[1], ref_id)?; + } + Ok(()) +} + +pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable) -> DecoderResult<()> { + let xpos = sstate.mb_x * 16; + let ypos = sstate.mb_y * 16; + + match mb_info.mb_type { + MBType::Intra16x16(_, _, _) => { + pred_intra(frm, sstate, mb_info); + }, + MBType::Intra4x4 | MBType::Intra8x8 => { + pred_intra(frm, sstate, mb_info); + }, + MBType::PCM => {}, + MBType::PSkip => { + let mv = sstate.get_cur_blk4(0).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, 0) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, 0); + let weight = &slice_hdr.get_weight(0, 0); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); + }, + MBType::P16x16 => { + let mv = sstate.get_cur_blk4(0).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp); + }, + MBType::P16x8 | MBType::P8x16 => { + let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 { + (16, 8, 0, 8) + } else { + (8, 16, 8, 0) + }; + let mv = sstate.get_cur_blk4(0).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) { + wait_for_mb(disp, sstate, xpos + bw, ypos + bh, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[1].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?; + } + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index()); + do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight, mc_dsp); + }, + MBType::P8x8 | MBType::P8x8Ref0 => { + for part in 0..4 { + let bx = (part & 1) * 8; + let by = (part & 2) * 4; + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index()); + + match mb_info.sub_mb_type[part] { + SubMBType::P8x8 => { + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp); + }, + SubMBType::P8x4 => { + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 4, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp); + }, + SubMBType::P4x8 => { + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 4, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); + let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?; + } + do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp); + }, + SubMBType::P4x4 => { + for sb_no in 0..4 { + let sxpos = xpos + bx + (sb_no & 1) * 4; + let sypos = ypos + by + (sb_no & 2) * 2; + let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; + let mv = sstate.get_cur_blk4(sblk_no).mv[0]; + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) { + wait_for_mb(disp, sstate, sxpos + 4, sypos + 4, mv, ref_id)?; + } + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp); + } + }, + _ => unreachable!(), + }; + } + }, + MBType::B16x16(mode) => { + let mv0 = sstate.get_cur_blk4(0).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let mv1 = sstate.get_cur_blk4(0).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]); + wait_b_mc(disp, sstate, frame_refs, [mv0, mv1], [mb_info.ref_l0[0], mb_info.ref_l1[0]], xpos, ypos, 16, 16)?; + do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); + }, + MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { + let (pw, ph) = mb_info.mb_type.size(); + let (px, py) = (pw & 8, ph & 8); + let modes = [mode0, mode1]; + let (mut bx, mut by) = (0, 0); + for part in 0..2 { + let blk = if part == 0 { 0 } else { (px / 4) + py }; + let mv0 = sstate.get_cur_blk4(blk).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let mv1 = sstate.get_cur_blk4(blk).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]); + wait_b_mc(disp, sstate, frame_refs, [mv0, mv1], [mb_info.ref_l0[part], mb_info.ref_l1[part]], xpos + bx, ypos + by, pw, ph)?; + do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp); + bx += px; + by += py; + } + }, + MBType::Direct | MBType::BSkip => { + if let Some(ref_id) = frame_refs.get_ref_id(1, mb_info.ref_l1[0].index()) { + wait_for_mb(disp, sstate, xpos, ypos, ZERO_MV, ref_id)?; + } + let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type; + let is_16x16 = colo_mb_type.is_16x16_ref(); + + if is_16x16 { + let mv = sstate.get_cur_blk4(0).mv; + let ref_idx = sstate.get_cur_blk8(0).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[0], ref_id)?; + } + if let Some(ref_id) = frame_refs.get_ref_id(1, mb_info.ref_l1[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[1], ref_id)?; + } + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos, ypos, 16, 16)?; + do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + } else { + for blk4 in 0..16 { + let mv = sstate.get_cur_blk4(blk4).mv; + let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx; + if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[0], ref_id)?; + } + if let Some(ref_id) = frame_refs.get_ref_id(1, ref_idx[1].index()) { + wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[1], ref_id)?; + } + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4)?; + do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + } + } + sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); }); + }, + MBType::B8x8 => { + for part in 0..4 { + let ridx = sstate.get_cur_blk8(part).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index()); + let subtype = mb_info.sub_mb_type[part]; + let blk8 = (part & 1) * 2 + (part & 2) * 4; + let mut bx = (part & 1) * 8; + let mut by = (part & 2) * 4; + match subtype { + SubMBType::Direct8x8 => { + for blk in 0..4 { + let mv = sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv; + let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos + bx, ypos + by, 4, 4)?; + do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + bx += 4; + if blk == 1 { + bx -= 8; + by += 4; + } + } + sstate.get_cur_blk8(part).ref_idx[0].set_direct(); + sstate.get_cur_blk8(part).ref_idx[1].set_direct(); + }, + SubMBType::B8x8(mode) => { + let mv = sstate.get_cur_blk4(blk8).mv; + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 8, 8)?; + do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + }, + SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + let (pw, ph) = subtype.size(); + let mv = sstate.get_cur_blk4(blk8).mv; + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, pw, ph)?; + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); + let mv = sstate.get_cur_blk4(addr2).mv; + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph)?; + do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + }, + SubMBType::B4x4(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + for i in 0..4 { + let addr2 = blk8 + (i & 1) + (i & 2) * 2; + let mv = sstate.get_cur_blk4(addr2).mv; + wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 4, 4)?; + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp); + bx += 4; + if i == 1 { + bx -= 8; + by += 4; + } + } + }, + _ => unreachable!(), + }; + } + }, + }; + if !mb_info.mb_type.is_skip() { + if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + add_luma(frm, sstate, mb_info); + } + add_chroma(frm, sstate, mb_info); + } + Ok(()) +} diff --git a/nihav-itu/src/codecs/h264/high/mod.rs b/nihav-itu/src/codecs/h264/high/mod.rs new file mode 100644 index 0000000..c7e28f4 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/mod.rs @@ -0,0 +1,28 @@ +mod types; +pub use types::*; +mod pic_ref; +pub use pic_ref::*; +#[allow(clippy::identity_op)] +#[allow(clippy::erasing_op)] +#[allow(clippy::many_single_char_names)] +#[allow(clippy::range_plus_one)] +mod dsp; +use dsp::*; +mod cabac; +use cabac::*; +use super::cabac_coder::*; +mod cavlc; +use cavlc::*; +mod loopfilter; +use loopfilter::*; +mod mb_recon; +use mb_recon::*; +use super::sets::*; +use super::slice::*; + +pub mod decoder_st; +mod dispatch; +pub mod decoder_mt; + +use super::common_types::*; + diff --git a/nihav-itu/src/codecs/h264/high/pic_ref.rs b/nihav-itu/src/codecs/h264/high/pic_ref.rs new file mode 100644 index 0000000..b467aa8 --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/pic_ref.rs @@ -0,0 +1,655 @@ +use nihav_core::codecs::DecoderResult; +use nihav_core::frame::{FrameType, NAVideoBufferRef, NATimeInfo}; +use nihav_core::refs::*; +use nihav_codec_support::codecs::MV; +use super::super::common_types::*; +use super::super::sets::SeqParameterSet; +use super::super::slice::*; +use super::types::*; + +#[derive(Clone)] +pub struct PictureInfo { + pub id: u16, + pub full_id: u32, + pub time: NATimeInfo, + pub user_id: u32, + pub pic_type: FrameType, + pub buf: NAVideoBufferRef, + pub cur_mb: usize, + pub is_ref: bool, + pub is_idr: bool, + pub long_term: Option, + + pub mv_info: NABufferRef, +} + +#[derive(Clone,Copy,Default, Debug)] +pub struct FrameMBInfo { + pub mb_type: CompactMBType, + pub ref_poc: [[u16; 2]; 4], + pub ref_idx: [[PicRef; 2]; 4], + pub mv: [[MV; 2]; 16], +} + +impl FrameMBInfo { + pub fn new() -> Self { Self::default() } +} + +#[derive(Clone)] +pub struct FrameMV { + pub mbs: Vec, + pub mb_stride: usize, +} + +impl FrameMV { + pub fn new(mb_w: usize, mb_h: usize) -> Self { + Self { + mbs: vec![FrameMBInfo::default(); mb_w * mb_h], + mb_stride: mb_w, + } + } +} + +#[derive(Clone)] +pub struct SliceRefs { + pub ref_list0: Vec>, + pub ref_list1: Vec>, + pub cur_id: u32, +} + +#[allow(dead_code)] +impl SliceRefs { + pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| pic.full_id) + } else { + None + } + } + pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option> { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| pic.buf.clone()) + } else { + None + } + } + pub fn get_colocated_info(&self, mb_x: usize, mb_y: usize) -> (FrameMBInfo, u16, bool) { + if let Some(ref ref_pic) = &self.ref_list1[0] { + let mv_info = &ref_pic.mv_info; + let mb = mv_info.mbs[mb_x + mb_y * mv_info.mb_stride]; + (mb, ref_pic.full_id as u16, ref_pic.long_term.is_some()) + } else { + (FrameMBInfo::default(), 0, false) + } + } + pub fn map_ref0(&self, ref0_id: u16) -> (PicRef, bool) { + let mut r0_idx = 0; + let mut long = false; + for (i, rpic0) in self.ref_list0.iter().enumerate() { + if let Some(ref pic) = rpic0 { + if (pic.full_id as u16) == ref0_id { + r0_idx = i as u8; + long = pic.long_term.is_some(); + break; + } + } + } + (PicRef::new(r0_idx), long) + } + pub fn map_refs(&self, ref_idx: [PicRef; 2]) -> [u16; 2] { + let r0 = ref_idx[0].index(); + let r1 = ref_idx[1].index(); + let ref0 = if r0 < self.ref_list0.len() { + if let Some(ref pic) = self.ref_list0[r0] { + pic.full_id as u16 + } else { + MISSING_POC + } + } else { + MISSING_POC + }; + let ref1 = if r1 < self.ref_list1.len() { + if let Some(ref pic) = self.ref_list1[r1] { + pic.full_id as u16 + } else { + MISSING_POC + } + } else { + MISSING_POC + }; + [ref0, ref1] + } + pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool { + if ref1 != ref2 { + self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1) + } else { + true + } + } + fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool { + if ref1 == ref2 { + true + } else { + let idx0 = ref1.index(); + let idx1 = ref2.index(); + if idx0 == idx1 { + return true; + } + let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if idx0 >= src.len() || idx1 >= src.len() { +//panic!("wrong refs"); + return false; + } + if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) { + pic0.full_id == pic1.full_id + } else { +//panic!("missing pics"); + false + } + } + } +} + +#[derive(Clone)] +pub struct SimplePictureInfo<'a> { + pub full_id: u32, + pub buf: SimpleFrame<'a>, + pub long_term: bool, + pub mv_info: &'a FrameMV, +} + +#[derive(Clone)] +pub struct SimplifiedSliceRefs<'a> { + pub ref_list0: Vec>>, + pub ref_list1: Vec>>, + pub cur_id: u32, +} + +impl<'a> SimplifiedSliceRefs<'a> { + pub fn new(srefs: &'a SliceRefs) -> Self { + let mut ref_list0 = Vec::with_capacity(srefs.ref_list0.len()); + let mut ref_list1 = Vec::with_capacity(srefs.ref_list1.len()); + for entry in srefs.ref_list0.iter() { + ref_list0.push(entry.as_ref().map(|pic| SimplePictureInfo { + full_id: pic.full_id, + buf: SimpleFrame::new(&pic.buf), + long_term: pic.long_term.is_some(), + mv_info: &pic.mv_info, + })); + } + for entry in srefs.ref_list1.iter() { + ref_list1.push(entry.as_ref().map(|pic| SimplePictureInfo { + full_id: pic.full_id, + buf: SimpleFrame::new(&pic.buf), + long_term: pic.long_term.is_some(), + mv_info: &pic.mv_info, + })); + } + Self { + cur_id: srefs.cur_id, + ref_list0, ref_list1 + } + } + pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| pic.full_id) + } else { + None + } + } + pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option<&SimpleFrame<'_>> { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| &pic.buf) + } else { + None + } + } + pub fn get_colocated_info(&self, mb_x: usize, mb_y: usize) -> (FrameMBInfo, u16, bool) { + if let Some(ref ref_pic) = &self.ref_list1[0] { + let mv_info = ref_pic.mv_info; + let mb = mv_info.mbs[mb_x + mb_y * mv_info.mb_stride]; + (mb, ref_pic.full_id as u16, ref_pic.long_term) + } else { + (FrameMBInfo::default(), 0, false) + } + } + pub fn map_ref0(&self, ref0_id: u16) -> (PicRef, bool) { + let mut r0_idx = 0; + let mut long = false; + for (i, rpic0) in self.ref_list0.iter().enumerate() { + if let Some(ref pic) = rpic0 { + if (pic.full_id as u16) == ref0_id { + r0_idx = i as u8; + long = pic.long_term; + break; + } + } + } + (PicRef::new(r0_idx), long) + } + pub fn map_refs(&self, ref_idx: [PicRef; 2]) -> [u16; 2] { + let r0 = ref_idx[0].index(); + let r1 = ref_idx[1].index(); + let ref0 = if r0 < self.ref_list0.len() { + if let Some(ref pic) = self.ref_list0[r0] { + pic.full_id as u16 + } else { + MISSING_POC + } + } else { + MISSING_POC + }; + let ref1 = if r1 < self.ref_list1.len() { + if let Some(ref pic) = self.ref_list1[r1] { + pic.full_id as u16 + } else { + MISSING_POC + } + } else { + MISSING_POC + }; + [ref0, ref1] + } + pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool { + if ref1 != ref2 { + self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1) + } else { + true + } + } + fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool { + if ref1 == ref2 { + true + } else { + let idx0 = ref1.index(); + let idx1 = ref2.index(); + if idx0 == idx1 { + return true; + } + let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if idx0 >= src.len() || idx1 >= src.len() { +//panic!("wrong refs"); + return false; + } + if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) { + pic0.full_id == pic1.full_id + } else { +//panic!("missing pics"); + false + } + } + } +} + +pub struct FrameRefs { + pub ref_pics: Vec, + pub cur_refs: SliceRefs, + pub long_term: Vec>, + + prev_poc_msb: u32, + prev_poc_lsb: u16, + prev_ref_poc_lsb: u16, + prev_frame_num: u16, + frame_num_offset: u32, + max_frame_num: i32, +} + +impl FrameRefs { + pub fn new() -> Self { + Self { + ref_pics: Vec::with_capacity(16), + cur_refs: SliceRefs { + ref_list0: Vec::with_capacity(3), + ref_list1: Vec::with_capacity(3), + cur_id: 0, + }, + long_term: Vec::new(), + + prev_poc_msb: 0, + prev_poc_lsb: 0, + prev_ref_poc_lsb: 0, + prev_frame_num: 0, + frame_num_offset: 0, + max_frame_num: 0, + } + } + pub fn fill_ref_nums(&self, dst: &mut Vec) { + for pic in self.ref_pics.iter() { + if !dst.contains(&pic.full_id) { + dst.push(pic.full_id); + } + } + for pic in self.long_term.iter().flatten() { + if !dst.contains(&pic.full_id) { + dst.push(pic.full_id); + } + } + } + pub fn calc_picture_num(&mut self, slice_hdr: &SliceHeader, is_idr: bool, ref_id: u8, sps: &SeqParameterSet) -> u32 { + self.max_frame_num = 1 << sps.log2_max_frame_num; + match sps.pic_order_cnt_type { + 0 => { + if is_idr { + //self.prev_poc_msb = 0; + self.prev_poc_lsb = 0; + } else { + self.prev_poc_lsb = self.prev_ref_poc_lsb; + } + let max_poc_lsb = 1 << sps.log2_max_pic_order_cnt_lsb; + let half_max_poc_lsb = 1 << (sps.log2_max_pic_order_cnt_lsb - 1); + let cur_lsb = slice_hdr.pic_order_cnt_lsb; + let poc_msb = if cur_lsb < self.prev_poc_lsb && (self.prev_poc_lsb - cur_lsb >= half_max_poc_lsb) { + self.prev_poc_msb + max_poc_lsb + } else if cur_lsb > self.prev_poc_lsb && (cur_lsb - self.prev_poc_lsb > half_max_poc_lsb) { + self.prev_poc_msb.wrapping_sub(max_poc_lsb) + } else { + self.prev_poc_msb + }; + let poc = poc_msb + u32::from(cur_lsb); + if ref_id != 0 { + self.prev_ref_poc_lsb = slice_hdr.pic_order_cnt_lsb; + self.prev_poc_msb = poc_msb; + } + poc + }, + 1 => { + let off = if self.prev_frame_num > slice_hdr.frame_num { + self.frame_num_offset + (1 << sps.log2_max_frame_num) + } else { + self.frame_num_offset + }; + let mut anum = if sps.num_ref_frames_in_pic_order_cnt_cycle != 0 { + (off as i32) + i32::from(slice_hdr.frame_num) + } else { + 0 + }; + if ref_id == 0 && anum > 0 { + anum -= 1; + } + let (poc_cycle_cnt, fno_in_poc_cycle) = if anum > 0 { + let nrf = sps.num_ref_frames_in_pic_order_cnt_cycle as i32; + ((anum - 1) / nrf, (anum - 1) % nrf) + } else { + (0, 0) + }; + let mut expected_delta = 0; + for &offset in sps.offset_for_ref_frame[..sps.num_ref_frames_in_pic_order_cnt_cycle].iter() { + expected_delta += offset; + } + let mut expected_poc = if anum > 0 { + let mut sum = poc_cycle_cnt * expected_delta; + for &offset in sps.offset_for_ref_frame[..=fno_in_poc_cycle as usize].iter() { + sum += offset; + } + sum + } else { + 0 + }; + if ref_id == 0 { + expected_poc += sps.offset_for_non_ref_pic; + } + let (top_id, _bottom_id) = if !slice_hdr.field_pic { + let top_id = expected_poc + slice_hdr.delta_pic_order_cnt[0]; + let bot_id = top_id + sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1]; + (top_id, bot_id) + } else if !slice_hdr.bottom_field { + (expected_poc + slice_hdr.delta_pic_order_cnt[0], 0) + } else { + (0, sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1]) + }; + self.prev_frame_num = slice_hdr.frame_num; + self.frame_num_offset = off; + top_id as u32 + }, + _ => { + if slice_hdr.frame_num < self.prev_frame_num { + self.frame_num_offset += 1 << sps.log2_max_frame_num; + } + self.prev_frame_num = slice_hdr.frame_num; + self.frame_num_offset + u32::from(slice_hdr.frame_num) + }, + } + } + pub fn apply_adaptive_marking(&mut self, marking: &AdaptiveMarking, cur_id: u16, max_id_mask: u16) -> DecoderResult<()> { + let all_ref_pics = self.ref_pics.clone(); + + for (&op, (&arg1, &arg2)) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter().zip(marking.operation_arg2.iter())).take(marking.num_ops) { + match op { + 1 => { + let src_id = cur_id.wrapping_sub(arg1) & max_id_mask; + let mut found = false; + let mut idx = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + if pic.id == src_id { + found = true; + idx = i; + break; + } + } + if found { + self.ref_pics.remove(idx); + } + }, + 2 => { // mark long term picture as unused + let idx = arg1 as usize; + if idx < self.long_term.len() { + self.long_term[idx] = None; + } + }, + 3 => { + let src_id = cur_id.wrapping_sub(arg1) & max_id_mask; + + let didx = arg2 as usize; + for pic in all_ref_pics.iter() { + if pic.id == src_id { + if didx < self.long_term.len() { + self.long_term[didx] = Some(pic.clone()); + } + break; + } + } + }, + 4 => { + self.long_term.resize(arg1 as usize, None); + }, + 5 => { + self.ref_pics.clear(); + self.long_term.clear(); + }, + 6 => { + // assign an long term index to current pic - done elsewhere + }, + _ => {}, + }; + } + Ok(()) + } + pub fn clear_refs(&mut self) { + self.ref_pics.clear(); + self.long_term.clear(); + } + #[allow(clippy::cognitive_complexity)] + pub fn select_refs(&mut self, sps: &SeqParameterSet, slice_hdr: &SliceHeader, cur_id: u32) { + self.cur_refs.cur_id = cur_id; + self.cur_refs.ref_list0.clear(); + self.cur_refs.ref_list1.clear(); + let pic_num_mask = if sps.log2_max_frame_num == 16 { + 0xFFFF + } else { + (1 << sps.log2_max_frame_num) - 1 + }; + + if !slice_hdr.slice_type.is_intra() { + let has_reordering = slice_hdr.ref_pic_list_reordering_l0; + if !has_reordering { + let num_ref = slice_hdr.num_ref_idx_l0_active; + if slice_hdr.slice_type.is_p() { + if !self.ref_pics.is_empty() { + for pic in self.ref_pics.iter().rev().take(num_ref) { + self.cur_refs.ref_list0.push(Some(pic.clone())); + } + } + } else { + let mut pivot = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + pivot = i; + if pic.full_id > cur_id { + break; + } + } + for pic in self.ref_pics[..pivot].iter().rev() { + if self.cur_refs.ref_list0.len() >= num_ref { + break; + } + self.cur_refs.ref_list0.push(Some(pic.clone())); + } + for pic in self.ref_pics.iter().skip(pivot) { + if self.cur_refs.ref_list0.len() >= num_ref { + break; + } + self.cur_refs.ref_list0.push(Some(pic.clone())); + } + } + if !self.long_term.is_empty() && self.cur_refs.ref_list0.len() < num_ref { + let copy_size = num_ref - self.cur_refs.ref_list0.len(); + for ltpic in self.long_term.iter().take(copy_size) { + self.cur_refs.ref_list0.push(ltpic.clone()); + } + } + } else { + form_ref_list(&mut self.cur_refs.ref_list0, + &self.ref_pics, &self.long_term, + &slice_hdr.reordering_list_l0, + slice_hdr.frame_num, pic_num_mask); + } + if slice_hdr.slice_type.is_b() { + let has_reordering = slice_hdr.ref_pic_list_reordering_l1; + if !has_reordering { + let num_ref = slice_hdr.num_ref_idx_l1_active; + let mut pivot = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + pivot = i; + if pic.full_id > cur_id { + break; + } + } + for pic in self.ref_pics.iter().skip(pivot) { + if self.cur_refs.ref_list1.len() >= num_ref { + break; + } + self.cur_refs.ref_list1.push(Some(pic.clone())); + } + for pic in self.ref_pics[..pivot].iter().rev() { + if self.cur_refs.ref_list1.len() >= num_ref { + break; + } + self.cur_refs.ref_list1.push(Some(pic.clone())); + } + if !self.long_term.is_empty() && self.cur_refs.ref_list1.len() < num_ref { + let copy_size = num_ref - self.cur_refs.ref_list1.len(); + for ltpic in self.long_term.iter().take(copy_size) { + self.cur_refs.ref_list1.push(ltpic.clone()); + } + } + if self.cur_refs.ref_list1.len() > 1 && self.cur_refs.ref_list0.len() == self.cur_refs.ref_list1.len() { + let mut equal = true; + for (pic1, pic2) in self.cur_refs.ref_list0.iter().zip(self.cur_refs.ref_list1.iter()) { + match (pic1, pic2) { + (Some(p1), Some(p2)) => { + if p1.full_id != p2.full_id { + equal = false; + break; + } + }, + (None, None) => {}, + _ => { + equal = false; + break; + }, + }; + } + if equal { + self.cur_refs.ref_list1.swap(0, 1); + } + } + } else { + form_ref_list(&mut self.cur_refs.ref_list1, + &self.ref_pics, &self.long_term, + &slice_hdr.reordering_list_l1, + slice_hdr.frame_num, pic_num_mask); + } + } + } + } + pub fn add_short_term(&mut self, cpic: PictureInfo, num_ref_frames: usize) { + if !self.ref_pics.is_empty() && self.ref_pics.len() >= num_ref_frames { + let base_id = i32::from(cpic.id); + let mut min_id = base_id; + let mut min_idx = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + let mut pic_id = i32::from(pic.id); + if pic_id > base_id { + pic_id -= self.max_frame_num; + } + if pic_id < min_id { + min_id = pic_id; + min_idx = i; + } + } + self.ref_pics.remove(min_idx); + } + if self.ref_pics.is_empty() || self.ref_pics.last().unwrap().full_id < cpic.full_id { + self.ref_pics.push(cpic); + } else { + let mut idx = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + if pic.full_id < cpic.full_id { + idx = i; + } else { + break; + } + } + self.ref_pics.insert(idx + 1, cpic); + } + } + pub fn add_long_term(&mut self, lt_idx: usize, cpic: PictureInfo) { + if lt_idx < self.long_term.len() { + self.long_term[lt_idx] = Some(cpic); + } + } +} + +fn form_ref_list(ref_list: &mut Vec>, ref_pics: &[PictureInfo], long_term: &[Option], reord_info: &ReorderingInfo, cur_id: u16, pic_num_mask: u16) { + let mut ref_pic_id = cur_id; + for (&op, &num) in reord_info.reordering_of_pic_nums_idc.iter().zip(reord_info.abs_diff_or_num.iter()).take(reord_info.num_ops) { + if op < 2 { + if op == 0 { + ref_pic_id = ref_pic_id.wrapping_sub(num) & pic_num_mask; + } else { + ref_pic_id = ref_pic_id.wrapping_add(num) & pic_num_mask; + } + let mut found = false; + for pic in ref_pics.iter() { + if pic.id == ref_pic_id { + ref_list.push(Some(pic.clone())); + found = true; + break; + } + } + if !found { + ref_list.push(None); + } + } else { + let idx = num as usize; + if idx < long_term.len() { + ref_list.push(long_term[idx].clone()); + } else { + ref_list.push(None); + } + } + } +} diff --git a/nihav-itu/src/codecs/h264/high/types.rs b/nihav-itu/src/codecs/h264/high/types.rs new file mode 100644 index 0000000..e46b88a --- /dev/null +++ b/nihav-itu/src/codecs/h264/high/types.rs @@ -0,0 +1,283 @@ +use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame}; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::SimplifiedSliceRefs; +use super::pic_ref::FrameMBInfo; +use super::super::common_types::*; + +#[derive(Clone,Copy)] +pub struct SimpleFrame<'a> { + pub data: &'a [u16], + pub offset: [usize; 3], + pub stride: [usize; 3], +} + +impl<'a> SimpleFrame<'a> { + pub fn new(buf: &'a NAVideoBuffer) -> Self { + let mut offset = [0; 3]; + let mut stride = [0; 3]; + for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() { + *offs = buf.get_offset(plane); + *strd = buf.get_stride(plane); + } + Self { + data: buf.get_data(), + offset, stride + } + } +} + +pub type SliceState = SliceStateCommon; + +impl SliceState { + pub fn new() -> Self { + let mut obj = SliceState::new_default(); + obj.def_fill = 0; + obj + } + pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame) { + let dstoff = self.mb_x * 16; + let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0]; + self.left_y[0] = self.top_line_y[dstoff + 15]; + self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]); + for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) { + *dst = src[15]; + } + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let dstoff = self.mb_x * 8; + let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride; + self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7]; + self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]); + for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) { + *dst = src[7]; + } + } + } + pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) { + if deblock_mode == 1 { + return; + } + + self.deblock = [0; 16]; + + let tx8x8 = self.get_cur_mb().transform_8x8; + + let cur_intra = self.get_cur_mb().mb_type.is_intra(); + let left_intra = self.get_left_mb().mb_type.is_intra(); + let mut top_intra = self.get_top_mb().mb_type.is_intra(); + + let mut coded_cache = [false; 25]; + let mut mvc = MVCache::default(); + let mv_cache = &mut mvc.data; + let mut ref_cache = [[INVALID_REF; 2]; 25]; + + if self.mb_y != 0 || self.has_top { + for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() { + let blk4 = self.get_top_blk4(x); + *cc = blk4.ncoded != 0; + *mv = blk4.mv; + if (x & 1) == 0 { + let blk8 = self.get_top_blk8(x / 2); + ref_cache[x + 1] = blk8.ref_idx; + } else { + ref_cache[x + 1] = ref_cache[x]; + } + } + } + for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip( + mv_cache[5..].chunks_exact_mut(5)).enumerate() { + if self.has_left || self.mb_x != 0 { + let blk4 = self.get_left_blk4(y * 4); + ccs[0] = blk4.ncoded != 0; + mvs[0] = blk4.mv; + if (y & 1) == 0 { + let blk8 = self.get_left_blk8(y); + ref_cache[y * 5 + 5] = blk8.ref_idx; + } else { + ref_cache[y * 5 + 5] = ref_cache[y * 5]; + } + } + for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() { + let blk4 = self.get_cur_blk4(x + y * 4); + *cc = blk4.ncoded != 0; + *mv = blk4.mv; + ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) { + self.get_cur_blk8(x / 2 + y).ref_idx + } else { + ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5] + }; + } + } + + for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in + coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip( + mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip( + ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() { + let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2)); + if can_do_top && (!tx8x8 || (y & 1) == 0) { + if is_s || cur_intra || top_intra { + let val = if y == 0 { 0x40 } else { 0x30 }; + for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; } + } else { + for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in + cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip( + cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip( + cur_refs[1..].iter().zip( + top_refs[1..].iter())).take(4).enumerate() { + let mut mask = 0; + if cur_cc || top_cc { + mask = 0x20; + } else { + if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) { + mask = 0x10; + } + } + if mask != 0 { + self.deblock[y * 4 + x] = mask; + } + } + } + } + let mut lleft_intra = left_intra; + for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in + cur_ccs[1..].iter().zip(cur_ccs.iter()).zip( + cur_mvs[1..].iter().zip(cur_mvs.iter())).zip( + cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() { + let skip_8 = tx8x8 && (x & 1) != 0; + let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2); + if !can_do_left { + continue; + } + let mut mask = 0; + if skip_8 { + } else if is_s || cur_intra || lleft_intra { + mask = if x == 0 { 4 } else { 3 }; + } else if cur_cc || left_cc { + mask = 2; + } else { + if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) { + mask = 1; + } + } + if mask != 0 { + self.deblock[y * 4 + x] |= mask; + } + lleft_intra = cur_intra; + } + top_intra = cur_intra; + } + } + pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) { + let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); + if direct_8x8 { + for blk4 in 0..16 { + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]); + self.get_cur_blk4(blk4).mv = [mv0, mv1]; + self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; + } + } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv { + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0); + self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]); + self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]); + } else { + for blk4 in 0..16 { + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4); + self.get_cur_blk4(blk4).mv = [mv0, mv1]; + self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; + } + } + } + pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) { + let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] }; + let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk); + self.get_cur_blk4(blk4).mv = [mv0, mv1]; + self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; + } + #[allow(clippy::nonminimal_bool)] + pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { + let blk8 = blk4_to_blk8(blk4); + let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] { + (ZERO_MV, MISSING_POC, MISSING_REF) + } else if mbi.ref_poc[blk8][0] != MISSING_POC { + (mbi.mv[blk4][0], mbi.ref_poc[blk8][0], mbi.ref_idx[blk8][0]) + } else { + (mbi.mv[blk4][1], mbi.ref_poc[blk8][1], mbi.ref_idx[blk8][1]) + }; + let (col_ref, r0_long) = frame_refs.map_ref0(r0_poc); + if temporal_mv { + let td = (i32::from(r1_poc) - i32::from(r0_poc)).clamp(-128, 127); + if r0_long || td == 0 { + (col_mv, col_ref, ZERO_MV, ZERO_REF) + } else { + let tx = (16384 + (td / 2).abs()) / td; + let tb = (i32::from(cur_id) - i32::from(r0_poc)).clamp(-128, 127); + let scale = ((tb * tx + 32) >> 6).clamp(-1024, 1023); + let mv0 = MV { + x: ((i32::from(col_mv.x) * scale + 128) >> 8) as i16, + y: ((i32::from(col_mv.y) * scale + 128) >> 8) as i16, + }; + let mv1 = mv0 - col_mv; + (mv0, col_ref, mv1, ZERO_REF) + } + } else { + let blk4 = 0; // we generate the same MV prediction for the whole MB + let blk8 = blk4_to_blk8(blk4); + let midx = self.get_cur_blk4_idx(blk4); + let ridx = self.get_cur_blk8_idx(blk8); + let ridx_c = self.get_cur_blk8_idx(blk8) + 16 / 8 - self.blk8.stride; + + let mv_a = self.blk4.data[midx - 1].mv; + let mv_b = self.blk4.data[midx - self.blk4.stride].mv; + let mut mv_c = self.blk4.data[midx - self.blk4.stride + 16 / 4].mv; + + let ref_a = self.blk8.data[ridx - 1].ref_idx; + let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx; + let mut ref_c = self.blk8.data[ridx_c].ref_idx; + + if ref_c == [MISSING_REF; 2] { + mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv; + ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx; + } + let mut refs = [INVALID_REF; 2]; + for cur_ref in [ref_a, ref_b, ref_c].iter() { + refs[0] = refs[0].min_pos(cur_ref[0]); + refs[1] = refs[1].min_pos(cur_ref[1]); + } + if refs == [INVALID_REF; 2] { + return (ZERO_MV, ZERO_REF, ZERO_MV, ZERO_REF); + } + + let mut col_zero = true; + if r1_long || col_idx != ZERO_REF { + col_zero = false; + } + if col_mv.x.abs() > 1 || col_mv.y.abs() > 1 { + col_zero = false; + } + let mut mvs = [ZERO_MV; 2]; + for ref_l in 0..2 { + if mbi.mb_type.is_intra() || (!refs[ref_l].not_avail() && !(refs[ref_l] == ZERO_REF && col_zero)) { + let ref_idx = refs[ref_l]; + mvs[ref_l] = if ref_b[ref_l] == MISSING_REF && ref_c[ref_l] == MISSING_REF { + mv_a[ref_l] + } else { + let count = ((ref_a[ref_l] == ref_idx) as u8) + ((ref_b[ref_l] == ref_idx) as u8) + ((ref_c[ref_l] == ref_idx) as u8); + if count == 1 { + if ref_a[ref_l] == ref_idx { + mv_a[ref_l] + } else if ref_b[ref_l] == ref_idx { + mv_b[ref_l] + } else { + mv_c[ref_l] + } + } else { + MV::pred(mv_a[ref_l], mv_b[ref_l], mv_c[ref_l]) + } + }; + } + } + (mvs[0], refs[0], mvs[1], refs[1]) + } + } +} diff --git a/nihav-itu/src/codecs/h264/mod.rs b/nihav-itu/src/codecs/h264/mod.rs index 160970e..bc75f9d 100644 --- a/nihav-itu/src/codecs/h264/mod.rs +++ b/nihav-itu/src/codecs/h264/mod.rs @@ -13,6 +13,8 @@ use nihav_core::io::intcode::*; use nihav_codec_support::codecs::{MV, ZERO_MV}; mod baseline; +mod high; + mod cabac_coder; mod common_types; use common_types::*; @@ -166,80 +168,223 @@ fn unescape_nal(src: &[u8], dst: &mut Vec) -> usize { off } +fn is_high_bitdepth(edata: &[u8]) -> DecoderResult { + if edata.len() > 11 && &edata[0..4] == b"avcC" { + let mut br = MemoryReader::new_read(edata); + let mut nal_buf = Vec::new(); + + br.read_skip(4)?; + let version = br.read_byte()?; + validate!(version == 1); + let profile = br.read_byte()?; + let _compatibility = br.read_byte()?; + let _level = br.read_byte()?; + let _b = br.read_byte()?; + //validate!((b & 0xFC) == 0xFC); + //self.nal_len = (b & 3) + 1; + let b = br.read_byte()?; + //validate!((b & 0xE0) == 0xE0); + let num_sps = (b & 0x1F) as usize; + for _ in 0..num_sps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 7); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + br.read_skip(len)?; + let sps = sets::parse_sps(&nal_buf[1..])?; + if sps.bit_depth_luma > 8 { + return Ok(true); + } + } + let num_pps = br.read_byte()? as usize; + for _ in 0..num_pps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 8); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + br.read_skip(len)?; + } + if br.left() > 0 { + match profile { + 100 | 110 | 122 | 144 => { + let b = br.read_byte()?; + // some encoders put something different here + if (b & 0xFC) != 0xFC { + return Ok(false); + } + // b & 3 -> chroma format + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> luma depth minus 8 + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> chroma depth minus 8 + let num_spsext = br.read_byte()? as usize; + for _ in 0..num_spsext { + let len = br.read_u16be()? as usize; + // parse spsext + br.read_skip(len)?; + } + }, + _ => {}, + }; + } + } + Ok(false) +} + struct STDecoderWrapper { h264: Box, + h264_hi: Option>, } impl NADecoder for STDecoderWrapper { fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef) -> DecoderResult<()> { - self.h264.init(supp, info) + let is_high_depth = if let Some(ref edata) = info.get_extradata() { + is_high_bitdepth(edata).unwrap_or(false) + } else { + false + }; + if !is_high_depth { + self.h264.init(supp, info) + } else { + let mut h264_hi = Box::new(high::decoder_st::H264Decoder::new()); + h264_hi.init(supp, info)?; + self.h264_hi = Some(h264_hi); + Ok(()) + } } fn decode(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket) -> DecoderResult { - self.h264.decode(supp, pkt) + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.decode(supp, pkt) + } else { + self.h264.decode(supp, pkt) + } } fn flush(&mut self) { + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.flush() + } self.h264.flush(); } } impl NAOptionHandler for STDecoderWrapper { fn get_supported_options(&self) -> &[NAOptionDefinition] { - self.h264.get_supported_options() + if let Some(ref h264_hi) = self.h264_hi { + h264_hi.get_supported_options() + } else { + self.h264.get_supported_options() + } } fn set_options(&mut self, options: &[NAOption]) { - self.h264.set_options(options); + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.set_options(options); + } else { + self.h264.set_options(options); + } } fn query_option_value(&self, name: &str) -> Option { - self.h264.query_option_value(name) + if let Some(ref h264_hi) = self.h264_hi { + h264_hi.query_option_value(name) + } else { + self.h264.query_option_value(name) + } } } pub fn get_decoder() -> Box { Box::new(STDecoderWrapper { h264: Box::new(baseline::decoder_st::H264Decoder::new()), + h264_hi: None, }) } struct MTDecoderWrapper { h264: Box, + h264_hi: Option>, } impl NADecoderMT for MTDecoderWrapper { fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef, nthreads: usize) -> DecoderResult<()> { - self.h264.init(supp, info, nthreads) + let is_high_depth = if let Some(ref edata) = info.get_extradata() { + is_high_bitdepth(edata).unwrap_or(false) + } else { + false + }; + if !is_high_depth { + self.h264.init(supp, info, nthreads) + } else { + let mut h264_hi = Box::new(high::decoder_mt::H264MTDecoder::new()); + h264_hi.init(supp, info, nthreads)?; + self.h264_hi = Some(h264_hi); + Ok(()) + } } fn can_take_input(&mut self) -> bool { - self.h264.can_take_input() + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.can_take_input() + } else { + self.h264.can_take_input() + } } fn queue_pkt(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket, user_id: u32) -> DecoderResult { - self.h264.queue_pkt(supp, pkt, user_id) + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.queue_pkt(supp, pkt, user_id) + } else { + self.h264.queue_pkt(supp, pkt, user_id) + } } fn has_output(&mut self) -> bool { - self.h264.has_output() + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.has_output() + } else { + self.h264.has_output() + } } fn get_frame(&mut self) -> (DecoderResult, u32) { - self.h264.get_frame() + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.get_frame() + } else { + self.h264.get_frame() + } } fn flush(&mut self) { + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.flush(); + } self.h264.flush(); } } impl NAOptionHandler for MTDecoderWrapper { fn get_supported_options(&self) -> &[NAOptionDefinition] { - self.h264.get_supported_options() + if let Some(ref h264_hi) = self.h264_hi { + h264_hi.get_supported_options() + } else { + self.h264.get_supported_options() + } } fn set_options(&mut self, options: &[NAOption]) { - self.h264.set_options(options); + if let Some(ref mut h264_hi) = self.h264_hi { + h264_hi.set_options(options); + } else { + self.h264.set_options(options); + } } fn query_option_value(&self, name: &str) -> Option { - self.h264.query_option_value(name) + if let Some(ref h264_hi) = self.h264_hi { + h264_hi.query_option_value(name) + } else { + self.h264.query_option_value(name) + } } } pub fn get_decoder_mt() -> Box { Box::new(MTDecoderWrapper { h264: Box::new(baseline::decoder_mt::H264MTDecoder::new()), + h264_hi: None, }) } @@ -467,6 +612,49 @@ mod test { [0x26078d38, 0xf6a59d57, 0xcd14eaf8, 0x8eb08259], [0x31494337, 0x6f8d3f52, 0x4bc9ff92, 0x0c601b1c]])); } + // a sample from libav FATE suite + #[test] + fn test_h264_10_bit() { + let mut dmx_reg = RegisteredDemuxers::new(); + dmx_reg.add_demuxer(&RawH264DemuxerCreator{}); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredDecoders::new(); + itu_register_all_decoders(&mut dec_reg); + test_decoding("rawh264", "h264", "assets/ITU/normal-10.h264", + None, &dmx_reg, &dec_reg, + ExpectedTestResult::MD5Frames(vec![ + [0x50aa3b23, 0x62021b6d, 0x5a4ed176, 0x309bbeb2], + [0xdc632b62, 0x5a588c84, 0x7a5f8537, 0xc82e356b], + [0x0be7e536, 0xf54f8a6f, 0xe6ed1bb5, 0xf10ee116], + [0xcb69cc4d, 0xfe8cccab, 0x161c3687, 0x7bf5de8b], + [0x36667f83, 0x78468cab, 0xa378f86b, 0x9358a3f2], + [0x31a31140, 0xb8069e3e, 0x22d90a05, 0x0b3f8bff], + [0xb5f144a8, 0xb56ac2d6, 0x0bcae310, 0xebc3042e], + [0x324a14ce, 0x87bc9e89, 0x312bc02f, 0x9c6e2b0b], + [0xa8e10c16, 0xa85de23e, 0xd8e8e47e, 0xd00a9fd9], + [0x0b626c0f, 0x9ab5212d, 0x98728b97, 0xb8ce84a0]])); + } + #[test] + fn test_h264_10bit_mt() { + let mut dmx_reg = RegisteredDemuxers::new(); + dmx_reg.add_demuxer(&RawH264DemuxerCreator{}); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredMTDecoders::new(); + itu_register_all_mt_decoders(&mut dec_reg); + test_mt_decoding("rawh264", "h264", "assets/ITU/normal-10.h264", + None, &dmx_reg, &dec_reg, + ExpectedTestResult::MD5Frames(vec![ + [0x50aa3b23, 0x62021b6d, 0x5a4ed176, 0x309bbeb2], + [0xdc632b62, 0x5a588c84, 0x7a5f8537, 0xc82e356b], + [0x0be7e536, 0xf54f8a6f, 0xe6ed1bb5, 0xf10ee116], + [0xcb69cc4d, 0xfe8cccab, 0x161c3687, 0x7bf5de8b], + [0x36667f83, 0x78468cab, 0xa378f86b, 0x9358a3f2], + [0x31a31140, 0xb8069e3e, 0x22d90a05, 0x0b3f8bff], + [0xb5f144a8, 0xb56ac2d6, 0x0bcae310, 0xebc3042e], + [0x324a14ce, 0x87bc9e89, 0x312bc02f, 0x9c6e2b0b], + [0xa8e10c16, 0xa85de23e, 0xd8e8e47e, 0xd00a9fd9], + [0x0b626c0f, 0x9ab5212d, 0x98728b97, 0xb8ce84a0]])); + } } pub const I4X4_SCAN: [(u8, u8); 16] = [ -- 2.39.5