It can easily support other bitdepths but currently there's no need for that.
--- /dev/null
+//use nihav_core::codecs::{DecoderResult, DecoderError};
+use nihav_codec_support::codecs::MV;
+
+use super::super::*;
+use super::*;
+use super::super::cabac_coder::*;
+use super::dsp::{CHROMA_DC_SCAN, ZIGZAG, ZIGZAG1, ZIGZAG8X8};
+use super::super::slice::SliceHeader;
+
+pub fn cabac_decode_mbskip(cabac: &mut CABAC, sstate: &SliceState, slice_hdr: &SliceHeader) -> bool {
+ let skip_idx = if slice_hdr.slice_type.is_p() { 11 } else { 24 };
+ let mut mb_skip_ctx = 0;
+ let left_mbt = sstate.get_left_mb().mb_type;
+ let top_mbt = sstate.get_top_mb().mb_type;
+ if left_mbt != CompactMBType::None && !left_mbt.is_skip() {
+ mb_skip_ctx += 1;
+ }
+ if top_mbt != CompactMBType::None && !top_mbt.is_skip() {
+ mb_skip_ctx += 1;
+ }
+ if !slice_hdr.slice_type.is_intra() {
+ cabac.decode_bit(skip_idx + mb_skip_ctx)
+ } else {
+ false
+ }
+}
+
+fn decode_i_type(cabac: &mut CABAC, start: usize, ctx: usize) -> MBType {
+ if !cabac.decode_bit(start + ctx) {
+ MBType::Intra4x4
+ } else if !cabac.decode_terminate() {
+ let cbpy = if cabac.decode_bit(start + 3) { 0xF } else { 0x0 };
+ let cbpc = cabac.decode_012(start + 4);
+ let ipred = cabac.decode_bits(start + 6, start + 7, 2);
+
+ MBType::Intra16x16(ipred, cbpy, cbpc)
+ } else {
+ MBType::PCM
+ }
+}
+
+fn decode_i_type_inter(cabac: &mut CABAC, start: usize) -> MBType {
+ if !cabac.decode_bit(start) {
+ MBType::Intra4x4
+ } else if !cabac.decode_terminate() {
+ let cbpy = if cabac.decode_bit(start + 1) { 0xF } else { 0x0 };
+ let cbpc = if !cabac.decode_bit(start + 2) {
+ 0
+ } else if !cabac.decode_bit(start + 2) {
+ 1
+ } else {
+ 2
+ };
+ let ipred = cabac.decode_bits(start + 3, start + 3, 2);
+
+ MBType::Intra16x16(ipred, cbpy, cbpc)
+ } else {
+ MBType::PCM
+ }
+}
+
+fn remap_si_mbtype(mbtype: MBType) -> MBType {
+ match mbtype {
+ MBType::Intra16x16(0, 0, 0) => MBType::Intra4x4,
+ MBType::Intra16x16(imode, cbpy, cbpc) => {
+ let idx = imode + if cbpy != 0 { 12 } else { 0 } + cbpc * 4 - 1;
+ let nimode = idx & 3;
+ let (ncbpy, ncbpc) = if (idx >> 2) >= 3 {
+ (0xF, (idx >> 2) - 3)
+ } else {
+ (0x0, idx >> 2)
+ };
+ MBType::Intra16x16(nimode, ncbpy, ncbpc)
+ },
+ MBType::PCM => MBType::Intra16x16(3, 1, 2),
+ _ => mbtype,
+ }
+}
+
+pub fn cabac_decode_mb_type(cabac: &mut CABAC, slice_hdr: &SliceHeader, sstate: &SliceState) -> MBType {
+ match slice_hdr.slice_type {
+ SliceType::I | SliceType::SI => {
+ let mut ctx = 0;
+ if sstate.get_left_mb().mb_type.is_intra16orpcm() {
+ ctx += 1;
+ }
+ if sstate.get_top_mb().mb_type.is_intra16orpcm() {
+ ctx += 1;
+ }
+ let mbtype = decode_i_type(cabac, 3, ctx);
+ if slice_hdr.slice_type == SliceType::I {
+ mbtype
+ } else {
+ remap_si_mbtype(mbtype)
+ }
+ },
+ SliceType::P | SliceType::SP => {
+ if cabac.decode_bit(14) {
+ decode_i_type_inter(cabac, 17)
+ } else if !cabac.decode_bit(15) {
+ if !cabac.decode_bit(16) {
+ MBType::P16x16
+ } else {
+ MBType::P8x8
+ }
+ } else {
+ if !cabac.decode_bit(17) {
+ MBType::P8x16
+ } else {
+ MBType::P16x8
+ }
+ }
+ },
+ SliceType::B => {
+ let mut ctx = 0;
+ if !sstate.get_left_mb().mb_type.is_direct() {
+ ctx += 1;
+ }
+ if !sstate.get_top_mb().mb_type.is_direct() {
+ ctx += 1;
+ }
+ if !cabac.decode_bit(27 + ctx) {
+ MBType::Direct
+ } else if !cabac.decode_bit(30) {
+ if !cabac.decode_bit(32) {
+ MBType::B16x16(BMode::L0)
+ } else {
+ MBType::B16x16(BMode::L1)
+ }
+ } else {
+ let idx = cabac.decode_bits(31, 32, 4);
+ match idx {
+ 0x0 => MBType::B16x16(BMode::Bi),
+ 0x1 => MBType::B16x8(BMode::L0, BMode::L0),
+ 0x2 => MBType::B8x16(BMode::L0, BMode::L0),
+ 0x3 => MBType::B16x8(BMode::L1, BMode::L1),
+ 0x4 => MBType::B8x16(BMode::L1, BMode::L1),
+ 0x5 => MBType::B16x8(BMode::L0, BMode::L1),
+ 0x6 => MBType::B8x16(BMode::L0, BMode::L1),
+ 0x7 => MBType::B16x8(BMode::L1, BMode::L0),
+ 0xE => MBType::B8x16(BMode::L1, BMode::L0),
+ 0xF => MBType::B8x8,
+ 0xD => decode_i_type_inter(cabac, 32),
+ _ => {
+ let idx = (idx - 8) * 2 + (cabac.decode_bit(32) as u8);
+ match idx {
+ 0 => MBType::B16x8(BMode::L0, BMode::Bi),
+ 1 => MBType::B8x16(BMode::L0, BMode::Bi),
+ 2 => MBType::B16x8(BMode::L1, BMode::Bi),
+ 3 => MBType::B8x16(BMode::L1, BMode::Bi),
+ 4 => MBType::B16x8(BMode::Bi, BMode::L0),
+ 5 => MBType::B8x16(BMode::Bi, BMode::L0),
+ 6 => MBType::B16x8(BMode::Bi, BMode::L1),
+ 7 => MBType::B8x16(BMode::Bi, BMode::L1),
+ 8 => MBType::B16x8(BMode::Bi, BMode::Bi),
+ _ => MBType::B8x16(BMode::Bi, BMode::Bi),
+ }
+ },
+ }
+ }
+ },
+ }
+}
+
+fn decode_sub_mb_type_cabac(cabac: &mut CABAC, slice_hdr: &SliceHeader) -> SubMBType {
+ match slice_hdr.slice_type {
+ SliceType::P | SliceType::SP => {
+ if cabac.decode_bit(21) {
+ SubMBType::P8x8
+ } else if !cabac.decode_bit(22) {
+ SubMBType::P8x4
+ } else if cabac.decode_bit(23) {
+ SubMBType::P4x8
+ } else {
+ SubMBType::P4x4
+ }
+ },
+ SliceType::B => {
+ if !cabac.decode_bit(36) {
+ SubMBType::Direct8x8
+ } else if !cabac.decode_bit(37) {
+ if !cabac.decode_bit(39) {
+ SubMBType::B8x8(BMode::L0)
+ } else {
+ SubMBType::B8x8(BMode::L1)
+ }
+ } else {
+ let idx = cabac.decode_bits(38, 39, 3);
+ match idx {
+ 0 => SubMBType::B8x8(BMode::Bi),
+ 1 => SubMBType::B8x4(BMode::L0),
+ 2 => SubMBType::B4x8(BMode::L0),
+ 3 => SubMBType::B8x4(BMode::L1),
+ 6 => SubMBType::B4x4(BMode::L1),
+ 7 => SubMBType::B4x4(BMode::Bi),
+ _ => {
+ let idx = (idx - 4) * 2 + (cabac.decode_bit(39) as u8);
+ match idx {
+ 0 => SubMBType::B4x8(BMode::L1),
+ 1 => SubMBType::B8x4(BMode::Bi),
+ 2 => SubMBType::B4x8(BMode::Bi),
+ _ => SubMBType::B4x4(BMode::L0),
+ }
+ },
+ }
+ }
+ },
+ _ => unreachable!(),
+ }
+}
+
+fn decode_ref_idx(cabac: &mut CABAC, num_refs: usize, ctx: usize) -> PicRef {
+ if num_refs == 1 {
+ return ZERO_REF;
+ }
+ if !cabac.decode_bit(54 + ctx) {
+ ZERO_REF
+ } else if !cabac.decode_bit(54 + 4) {
+ PicRef::new(1)
+ } else {
+ let mut idx = 2;
+ while cabac.decode_bit(54 + 5) && idx < 32 {
+ idx += 1;
+ }
+ if idx < num_refs {
+ PicRef::new(idx as u8)
+ } else {
+ INVALID_REF
+ }
+ }
+}
+
+fn decode_mv_component(cabac: &mut CABAC, base: usize, ctx: usize) -> i16 {
+ if !cabac.decode_bit(base + ctx) {
+ 0
+ } else {
+ let mut val = 1;
+ while val < 9 && cabac.decode_bit(base + (2 + val).min(6)) {
+ val += 1;
+ }
+ if val >= 9 {
+ let mut pfx = 3;
+ while pfx < 16 && cabac.decode_bypass() {
+ val += 1 << pfx;
+ pfx += 1;
+ }
+ val += cabac.decode_bypass_bits(pfx) as usize;
+ }
+ if val == 0 || !cabac.decode_bypass() {
+ val as i16
+ } else {
+ -(val as i16)
+ }
+ }
+}
+
+fn decode_mv(cabac: &mut CABAC, ctx0: usize, ctx1: usize) -> MV {
+ let x = decode_mv_component(cabac, 40, ctx0);
+ let y = decode_mv_component(cabac, 47, ctx1);
+ MV{ x, y }
+}
+
+#[allow(clippy::cognitive_complexity)]
+pub fn decode_mb_pred_cabac(cabac: &mut CABAC, slice_hdr: &SliceHeader, mb_type: MBType, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo) {
+ mb_info.mb_type = mb_type;
+ let num_l0 = slice_hdr.num_ref_idx_l0_active;
+ let num_l1 = slice_hdr.num_ref_idx_l1_active;
+ sstate.reset_mb_mv();
+ match mb_type {
+ MBType::Intra4x4 => {
+ for &(x, y) in I4X4_SCAN.iter() {
+ let x = x as usize;
+ let y = y as usize;
+ let top_pred = sstate.get_top_blk4(x + y * 4).ipred;
+ let left_pred = sstate.get_left_blk4(x + y * 4).ipred;
+
+ let top_idx = top_pred.into_pred_idx();
+ let left_idx = left_pred.into_pred_idx();
+ let pred_mode = top_idx.min(left_idx);
+ let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 };
+
+ if !cabac.decode_bit(68) {
+ let m0 = cabac.decode_bit(69) as u8;
+ let m1 = cabac.decode_bit(69) as u8;
+ let m2 = cabac.decode_bit(69) as u8;
+ let new_mode = (m2 << 2) | (m1 << 1) | m0;
+ pred_mode = if new_mode >= pred_mode {
+ new_mode + 1
+ } else { new_mode };
+ }
+ mb_info.ipred[x + y * 4] = pred_mode.into();
+ sstate.get_cur_blk4(x + y * 4).ipred = pred_mode.into();
+ }
+ let mut ctx = 0;
+ if sstate.get_left_mb().cmode != 0 {
+ ctx += 1;
+ }
+ if sstate.get_top_mb().cmode != 0 {
+ ctx += 1;
+ }
+ mb_info.chroma_ipred = if !cabac.decode_bit(64 + ctx) {
+ 0
+ } else if !cabac.decode_bit(67) {
+ 1
+ } else if !cabac.decode_bit(67) {
+ 2
+ } else {
+ 3
+ };
+ },
+ MBType::Intra8x8 => {
+ for part in 0..4 {
+ let blk4 = (part & 1) * 2 + (part & 2) * 4;
+ let top_pred = sstate.get_top_blk4(blk4).ipred;
+ let left_pred = sstate.get_left_blk4(blk4).ipred;
+
+ let top_idx = top_pred.into_pred_idx();
+ let left_idx = left_pred.into_pred_idx();
+ let pred_mode = top_idx.min(left_idx);
+ let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 };
+ if !cabac.decode_bit(68) {
+ let m0 = cabac.decode_bit(69) as u8;
+ let m1 = cabac.decode_bit(69) as u8;
+ let m2 = cabac.decode_bit(69) as u8;
+ let new_mode = (m2 << 2) | (m1 << 1) | m0;
+ pred_mode = if new_mode >= pred_mode {
+ new_mode + 1
+ } else { new_mode };
+ }
+ mb_info.ipred[blk4] = pred_mode.into();
+ mb_info.ipred[blk4 + 1] = pred_mode.into();
+ mb_info.ipred[blk4 + 4] = pred_mode.into();
+ mb_info.ipred[blk4 + 5] = pred_mode.into();
+ sstate.get_cur_blk4(blk4).ipred = pred_mode.into();
+ sstate.get_cur_blk4(blk4 + 1).ipred = pred_mode.into();
+ sstate.get_cur_blk4(blk4 + 4).ipred = pred_mode.into();
+ sstate.get_cur_blk4(blk4 + 5).ipred = pred_mode.into();
+ }
+ let mut ctx = 0;
+ if sstate.get_left_mb().cmode != 0 {
+ ctx += 1;
+ }
+ if sstate.get_top_mb().cmode != 0 {
+ ctx += 1;
+ }
+ mb_info.chroma_ipred = if !cabac.decode_bit(64 + ctx) {
+ 0
+ } else if !cabac.decode_bit(67) {
+ 1
+ } else if !cabac.decode_bit(67) {
+ 2
+ } else {
+ 3
+ };
+ },
+ MBType::Intra16x16(_ipred, _, _) => {
+ let mut ctx = 0;
+ if sstate.get_left_mb().cmode != 0 {
+ ctx += 1;
+ }
+ if sstate.get_top_mb().cmode != 0 {
+ ctx += 1;
+ }
+ mb_info.chroma_ipred = if !cabac.decode_bit(64 + ctx) {
+ 0
+ } else if !cabac.decode_bit(67) {
+ 1
+ } else if !cabac.decode_bit(67) {
+ 2
+ } else {
+ 3
+ };
+ },
+ MBType::P16x16 | MBType::P16x8 | MBType::P8x16 => {
+ let num_subparts = mb_type.num_parts();
+ let (pw, ph) = mb_type.size();
+ let mut xoff = 0;
+ let mut yoff = 0;
+ for i in 0..num_subparts {
+ let ctx = sstate.get_mv_ref_ctx(xoff, yoff, 0);
+ let ref_idx = decode_ref_idx(cabac, num_l0, ctx);
+ mb_info.ref_l0[i] = ref_idx;
+ sstate.fill_ref(xoff, yoff, pw, ph, 0, ref_idx);
+ xoff += pw;
+ if xoff == 16 {
+ xoff = 0;
+ yoff += ph;
+ }
+ }
+ let mut xoff = 0;
+ let mut yoff = 0;
+ for i in 0..num_subparts {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(xoff, yoff, 0);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l0[i] = mv;
+ sstate.fill_mvd(xoff, yoff, pw, ph, 0, mv);
+ xoff += pw;
+ if xoff == 16 {
+ xoff = 0;
+ yoff += ph;
+ }
+ }
+ },
+ MBType::B16x16(mode) => {
+ if mode != BMode::L1 {
+ let ctx = sstate.get_mv_ref_ctx(0, 0, 0);
+ let ref_idx = decode_ref_idx(cabac, num_l0, ctx);
+ mb_info.ref_l0[0] = ref_idx;
+ sstate.fill_ref(0, 0, 16, 16, 0, ref_idx);
+ }
+ if mode != BMode::L0 {
+ let ctx = sstate.get_mv_ref_ctx(0, 0, 1);
+ let ref_idx = decode_ref_idx(cabac, num_l1, ctx);
+ mb_info.ref_l1[0] = ref_idx;
+ sstate.fill_ref(0, 0, 16, 16, 1, ref_idx);
+ }
+ if mode != BMode::L1 {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 0);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l0[0] = mv;
+ sstate.fill_mvd(0, 0, 16, 16, 0, mv);
+ }
+ if mode != BMode::L0 {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 1);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l1[0] = mv;
+ sstate.fill_mvd(0, 0, 16, 16, 1, mv);
+ }
+ },
+ MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
+ let (pw, ph) = mb_info.mb_type.size();
+ let (px, py) = (pw & 8, ph & 8);
+ if mode0 != BMode::L1 {
+ let ctx = sstate.get_mv_ref_ctx(0, 0, 0);
+ let ref_idx = decode_ref_idx(cabac, num_l0, ctx);
+ mb_info.ref_l0[0] = ref_idx;
+ sstate.fill_ref(0, 0, pw, ph, 0, ref_idx);
+ }
+ if mode1 != BMode::L1 {
+ let ctx = sstate.get_mv_ref_ctx(pw & 8, ph & 8, 0);
+ let ref_idx = decode_ref_idx(cabac, num_l0, ctx);
+ mb_info.ref_l0[1] = ref_idx;
+ sstate.fill_ref(px, py, pw, ph, 0, ref_idx);
+ }
+ if mode0 != BMode::L0 {
+ let ctx = sstate.get_mv_ref_ctx(0, 0, 1);
+ let ref_idx = decode_ref_idx(cabac, num_l1, ctx);
+ mb_info.ref_l1[0] = ref_idx;
+ sstate.fill_ref(0, 0, pw, ph, 1, ref_idx);
+ }
+ if mode1 != BMode::L0 {
+ let ctx = sstate.get_mv_ref_ctx(pw & 8, ph & 8, 1);
+ let ref_idx = decode_ref_idx(cabac, num_l1, ctx);
+ mb_info.ref_l1[1] = ref_idx;
+ sstate.fill_ref(px, py, pw, ph, 1, ref_idx);
+ }
+ if mode0 != BMode::L1 {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 0);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l0[0] = mv;
+ sstate.fill_mvd(0, 0, pw, ph, 0, mv);
+ }
+ if mode1 != BMode::L1 {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(pw & 8, ph & 8, 0);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l0[1] = mv;
+ sstate.fill_mvd(px, py, pw, ph, 0, mv);
+ }
+ if mode0 != BMode::L0 {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(0, 0, 1);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l1[0] = mv;
+ sstate.fill_mvd(0, 0, pw, ph, 1, mv);
+ }
+ if mode1 != BMode::L0 {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(pw & 8, ph & 8, 1);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ mb_info.mv_l1[1] = mv;
+ sstate.fill_mvd(px, py, pw, ph, 1, mv);
+ }
+ },
+ MBType::P8x8 | MBType::B8x8 => {
+ for sub_type in mb_info.sub_mb_type.iter_mut() {
+ *sub_type = decode_sub_mb_type_cabac(cabac, slice_hdr);
+ }
+ let num_l = [num_l0, num_l1];
+ let dst_ref = [&mut mb_info.ref_l0, &mut mb_info.ref_l1];
+ for ref_l in 0..2 {
+ for spart in 0..4 {
+ let stype = mb_info.sub_mb_type[spart];
+ if stype != SubMBType::Direct8x8 && ((ref_l == 0 && !stype.is_l1()) || (ref_l == 1 && !stype.is_l0())) {
+ let ctx = sstate.get_mv_ref_ctx((spart & 1) * 8, (spart & 2) * 4, ref_l);
+ let ref_idx = decode_ref_idx(cabac, num_l[ref_l], ctx);
+ dst_ref[ref_l][spart] = ref_idx;
+ sstate.get_cur_blk8(spart).ref_idx[ref_l] = ref_idx;
+ }
+ }
+ }
+ let dst_mv = [&mut mb_info.mv_l0, &mut mb_info.mv_l1];
+ for ref_l in 0..2 {
+ for spart in 0..4 {
+ let stype = mb_info.sub_mb_type[spart];
+ if stype == SubMBType::Direct8x8 || (ref_l == 0 && stype.is_l1()) || (ref_l == 1 && stype.is_l0()) {
+ continue;
+ }
+ let (pw, ph) = stype.size();
+ let mut xoff = (spart & 1) * 8;
+ let mut yoff = (spart & 2) * 4;
+ let num_sub = stype.num_parts();
+ let orig_x = xoff;
+ for i in 0..num_sub {
+ let (ctx0, ctx1) = sstate.get_mv_ctx(xoff, yoff, ref_l);
+ let mv = decode_mv(cabac, ctx0, ctx1);
+ dst_mv[ref_l][spart * 4 + i] = mv;
+ sstate.fill_mvd(xoff, yoff, pw, ph, ref_l, mv);
+ xoff += pw;
+ if xoff == orig_x + 8 {
+ xoff -= 8;
+ yoff += ph;
+ }
+ }
+ }
+ }
+ },
+ _ => {},
+ };
+}
+
+pub fn decode_cbp_cabac(cabac: &mut CABAC, sstate: &SliceState) -> (u8, u8) {
+ let mbt_a = sstate.get_left_mb().mb_type;
+ let mbt_b = sstate.get_top_mb().mb_type;
+ let left = if mbt_a == CompactMBType::None || mbt_a == CompactMBType::PCM {
+ 0x3F
+ } else if !mbt_a.is_skip() {
+ sstate.get_left_mb().cbp
+ } else {
+ 0
+ };
+ let top = if mbt_b == CompactMBType::None || mbt_b == CompactMBType::PCM {
+ 0x3F
+ } else if !mbt_b.is_skip() {
+ sstate.get_top_mb().cbp
+ } else {
+ 0
+ };
+
+ let cbp_ctx = if (left & 2) != 0 { 0 } else { 1 } + if (top & 4) != 0 { 0 } else { 2 };
+ let mut cbpy = cabac.decode_bit(73 + cbp_ctx) as u8;
+ let cbp_ctx = if cbpy != 0 { 0 } else { 1 } + if (top & 8) != 0 { 0 } else { 2 };
+ cbpy |= (cabac.decode_bit(73 + cbp_ctx) as u8) << 1;
+ let cbp_ctx = if (left & 8) != 0 { 0 } else { 1 } + if (cbpy & 1) != 0 { 0 } else { 2 };
+ cbpy |= (cabac.decode_bit(73 + cbp_ctx) as u8) << 2;
+ let cbp_ctx = if (cbpy & 4) != 0 { 0 } else { 1 } + if (cbpy & 2) != 0 { 0 } else { 2 };
+ cbpy |= (cabac.decode_bit(73 + cbp_ctx) as u8) << 3;
+
+ let left = if mbt_a == CompactMBType::PCM {
+ 0x2F
+ } else if mbt_a == CompactMBType::None || !mbt_a.is_skip() {
+ sstate.get_left_mb().cbp
+ } else {
+ 0
+ };
+ let top = if mbt_b == CompactMBType::PCM {
+ 0x2F
+ } else if mbt_b == CompactMBType::None || !mbt_b.is_skip() {
+ sstate.get_top_mb().cbp
+ } else {
+ 0
+ };
+ let cleft = left >> 4;
+ let ctop = top >> 4;
+ let cbp_ctx0 = if cleft != 0 { 1 } else { 0 } + if ctop != 0 { 2 } else { 0 };
+ let cbp_ctx1 = if cleft == 2 { 1 } else { 0 } + if ctop == 2 { 2 } else { 0 };
+ let cbpc = if !cabac.decode_bit(77 + cbp_ctx0) {
+ 0
+ } else {
+ cabac.decode_bit(81 + cbp_ctx1) as u8 + 1
+ };
+
+ (cbpy, cbpc)
+}
+
+pub fn decode_mb_qp_delta_cabac(cabac: &mut CABAC, ctx: usize) -> i32 {
+ if !cabac.decode_bit(60 + ctx) {
+ 0
+ } else if !cabac.decode_bit(62) {
+ 1
+ } else {
+ let mut val = 0;
+ while val < 128 && cabac.decode_bit(63) {
+ val += 1;
+ }
+ if (val & 1) != 0 {
+ (val >> 1) + 2
+ } else {
+ -(val >> 1) - 1
+ }
+ }
+}
+
+fn decode_block(cabac: &mut CABAC, coeffs: &mut [i16], cat: usize, ctx_off: usize) -> bool {
+ const CTX_BASE: [(usize, usize); 5] = [
+ (0, 0), (15, 10), (29, 20), (44, 30), (47, 39)
+ ];
+ let (flag_off, coef_off) = CTX_BASE[cat];
+ let scan: &[usize] = match coeffs.len() {
+ 4 => &CHROMA_DC_SCAN,
+ 15 => &ZIGZAG1,
+ 16 => &ZIGZAG,
+ _ => unreachable!(),
+ };
+
+ let coded_block_flag = cabac.decode_bit(85 + ctx_off);
+ let mut coded = [false; 16];
+ if coded_block_flag {
+ let mut last_idx = coeffs.len() - 1;
+ for i in 0..coeffs.len() - 1 {
+ coded[i] = cabac.decode_bit(105 + flag_off + i); // or 277 for interlaced
+ if coded[i] {
+ let last = cabac.decode_bit(166 + flag_off + i); // or 338 for interlaced
+ if last {
+ last_idx = i;
+ break;
+ }
+ }
+ }
+ coded[last_idx] = true;
+ let mut coef_ctx = 0;
+ for i in (0..=last_idx).rev() {
+ if coded[i] {
+ let zero_ctx = if coef_ctx < 4 { coef_ctx + 1 } else { 0 };
+ coeffs[scan[i]] = if !cabac.decode_bit(227 + coef_off + zero_ctx) {
+ if coef_ctx < 3 {
+ coef_ctx += 1;
+ }
+ 1
+ } else {
+ let cur_ctx = 227 + coef_off + (coef_ctx + 2).max(5);
+ coef_ctx = (coef_ctx + 1).clamp(4, 7);
+
+ let mut coef = 2;
+ while coef < 15 && cabac.decode_bit(cur_ctx) {
+ coef += 1;
+ }
+ if coef == 15 {
+ let mut pfx = 0;
+ while pfx < 15 && cabac.decode_bypass() {
+ pfx += 1;
+ }
+ let mut tail = 1;
+ for _ in 0..pfx {
+ tail = (tail << 1) + (cabac.decode_bypass() as i16);
+ }
+ coef + tail - 1
+ } else {
+ coef
+ }
+ };
+ if cabac.decode_bypass() {
+ coeffs[scan[i]] = -coeffs[scan[i]];
+ }
+ }
+ }
+ }
+ coded_block_flag
+}
+
+fn decode_block8x8(cabac: &mut CABAC, coeffs: &mut [i16; 64], _cat: usize) {
+ const SIG_FLAG_MAP: [usize; 63] = [
+ 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
+ 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9, 10, 9, 8, 7,
+ 7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6, 11,
+ 12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11, 14, 10, 12
+ ];
+ const LAST_SIG_FLAG_MAP: [usize; 63] = [
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+ ];
+ let (flag_off, coef_off) = (0, 0);
+ let scan = &ZIGZAG8X8;
+
+ let mut coded = [false; 64];
+ let mut last_idx = coeffs.len() - 1;
+ for i in 0..coeffs.len() - 1 {
+ coded[i] = cabac.decode_bit(402 + flag_off + SIG_FLAG_MAP[i]);
+ if coded[i] {
+ let last = cabac.decode_bit(417 + flag_off + LAST_SIG_FLAG_MAP[i]);
+ if last {
+ last_idx = i;
+ break;
+ }
+ }
+ }
+ coded[last_idx] = true;
+ let mut coef_ctx = 0;
+ for i in (0..=last_idx).rev() {
+ if coded[i] {
+ let zero_ctx = if coef_ctx < 4 { coef_ctx + 1 } else { 0 };
+ coeffs[scan[i]] = if !cabac.decode_bit(426 + coef_off + zero_ctx) {
+ if coef_ctx < 3 {
+ coef_ctx += 1;
+ }
+ 1
+ } else {
+ let cur_ctx = 426 + coef_off + (coef_ctx + 2).max(5);
+ coef_ctx = (coef_ctx + 1).clamp(4, 7);
+
+ let mut coef = 2;
+ while coef < 15 && cabac.decode_bit(cur_ctx) {
+ coef += 1;
+ }
+ if coef == 15 {
+ let mut pfx = 0;
+ while pfx < 15 && cabac.decode_bypass() {
+ pfx += 1;
+ }
+ let mut tail = 1;
+ for _ in 0..pfx {
+ tail = (tail << 1) + (cabac.decode_bypass() as i16);
+ }
+ coef + tail - 1
+ } else {
+ coef
+ }
+ };
+ if cabac.decode_bypass() {
+ coeffs[scan[i]] = -coeffs[scan[i]];
+ }
+ }
+ }
+}
+
+fn derive_ctx_off(sstate: &mut SliceState, cat: usize, blk_no: usize) -> usize {
+ let mbt = sstate.get_cur_mb().mb_type;
+ let mut mbt_a = sstate.get_left_mb().mb_type;
+ let mut mbt_b = sstate.get_top_mb().mb_type;
+ let (trans_a, trans_b, mut cond_term_a, mut cond_term_b) = match cat {
+ 0 => {
+ (mbt_a == CompactMBType::Intra16x16,
+ mbt_b == CompactMBType::Intra16x16,
+ (sstate.get_left_mb().coded_flags & 1) as usize,
+ (sstate.get_top_mb().coded_flags & 1) as usize)
+ },
+ 1 | 2 => {
+ if (blk_no & 3) != 0 {
+ mbt_a = mbt;
+ }
+ if blk_no >= 4 {
+ mbt_b = mbt;
+ }
+ let nc_left = sstate.get_left_blk4(blk_no).ncoded;
+ let nc_top = sstate.get_top_blk4(blk_no).ncoded;
+ (nc_left != 0,
+ nc_top != 0,
+ (nc_left != 0) as usize,
+ (nc_top != 0) as usize)
+ },
+ 3 => {
+ ((sstate.get_left_mb().cbp & 0x30) != 0,
+ (sstate.get_top_mb().cbp & 0x30) != 0,
+ ((sstate.get_left_mb().coded_flags & (1 << (blk_no + 1 + 16))) != 0) as usize,
+ ((sstate.get_top_mb().coded_flags & (1 << (blk_no + 1 + 16))) != 0) as usize)
+ },
+ 4 => {
+ let chroma = blk_no >> 2;
+ if (blk_no & 1) != 0 {
+ mbt_a = mbt;
+ }
+ if (blk_no & 2) != 0 {
+ mbt_b = mbt;
+ }
+ ((blk_no & 1) != 0 || (sstate.get_left_mb().cbp & 0x20) != 0,
+ (blk_no & 2) != 0 || (sstate.get_top_mb().cbp & 0x20) != 0,
+ (sstate.get_left_blk8(blk_no & 3).ncoded_c[chroma] != 0) as usize,
+ (sstate.get_top_blk8(blk_no & 3).ncoded_c[chroma] != 0) as usize)
+ },
+ _ => unreachable!(),
+ };
+ /*let coded_no = match cat {
+ 0 => 0,
+ 1 | 2 => blk_no + 1,
+ 3 => 1 + 16 + blk_no,
+ 4 => 1 + 16 + 2 + blk_no,
+ _ => unreachable!(),
+ };*/
+
+ if mbt_a == CompactMBType::None && mbt.is_inter() {
+ cond_term_a = 0;
+ }
+ if !trans_a && mbt_a != CompactMBType::PCM {
+ cond_term_a = 0;
+ }
+ /*if mbt.is_intra() && pps.constrained_intra_pred && mbt_a.is_inter() && slice_partitioning {
+ cond_term_a = 0;
+ }*/
+ if (mbt_a == CompactMBType::PCM) || (mbt_a == CompactMBType::None && mbt.is_intra()) {
+ cond_term_a = 1;
+ }
+
+ if mbt_b == CompactMBType::None && mbt.is_inter() {
+ cond_term_b = 0;
+ }
+ if !trans_b && mbt_b != CompactMBType::PCM {
+ cond_term_b = 0;
+ }
+ /*if mbt.is_intra() && pps.constrained_intra_pred && mbt_b.is_inter() && slice_partitioning {
+ cond_term_b = 0;
+ }*/
+ if (mbt_b == CompactMBType::PCM) || (mbt_b == CompactMBType::None && mbt.is_intra()) {
+ cond_term_b = 1;
+ }
+
+ cat * 4 + cond_term_b * 2 + cond_term_a
+}
+
+pub fn decode_residual_cabac(cabac: &mut CABAC, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo) {
+ sstate.get_cur_mb().mb_type = mb_info.mb_type.into();
+ let mut coded_flags = 0;
+ if mb_info.mb_type.is_intra16x16() {
+ let off = derive_ctx_off(sstate, 0, 0);
+ let coded = decode_block(cabac, &mut mb_info.coeffs[24], 0, off);
+ mb_info.coded[24] = coded;
+ if coded {
+ coded_flags |= 1;
+ }
+ }
+ if !mb_info.transform_size_8x8 {
+ for blk8 in 0..4 {
+ if (mb_info.cbpy & (1 << blk8)) != 0 {
+ for blk4 in 0..4 {
+ let blk_no = (blk8 & 1) * 2 + (blk8 & 2) * 4 + (blk4 & 1) + (blk4 & 2) * 2;
+ let coded = if mb_info.mb_type.is_intra16x16() {
+ let off = derive_ctx_off(sstate, 1, blk_no);
+ decode_block(cabac, &mut mb_info.coeffs[blk_no][1..], 1, off)
+ } else {
+ let off = derive_ctx_off(sstate, 2, blk_no);
+ decode_block(cabac, &mut mb_info.coeffs[blk_no], 2, off)
+ };
+ sstate.get_cur_blk4(blk_no).ncoded = coded as u8;
+ mb_info.coded[blk_no] = coded;
+ if coded {
+ coded_flags |= 1 << (1 + blk_no);
+ }
+ }
+ }
+ }
+ } else {
+ for blk8 in 0..4 {
+ if (mb_info.cbpy & (1 << blk8)) != 0 {
+ let blk4 = (blk8 & 1) * 2 + (blk8 & 2) * 4;
+ decode_block8x8(cabac, &mut mb_info.coeffs8x8[blk8].coeffs, 5);
+ coded_flags |= 0x33 << blk4;
+ mb_info.coded[blk4] = true;
+ mb_info.coded[blk4 + 1] = true;
+ mb_info.coded[blk4 + 4] = true;
+ mb_info.coded[blk4 + 5] = true;
+ sstate.get_cur_blk4(blk4).ncoded = 1;
+ sstate.get_cur_blk4(blk4 + 1).ncoded = 1;
+ sstate.get_cur_blk4(blk4 + 4).ncoded = 1;
+ sstate.get_cur_blk4(blk4 + 5).ncoded = 1;
+ }
+ }
+ }
+ for chroma in 0..2 {
+ if (mb_info.cbpc & 3) != 0 {
+ let off = derive_ctx_off(sstate, 3, chroma);
+ let coded = decode_block(cabac, &mut mb_info.chroma_dc[chroma], 3, off);
+ if coded {
+ coded_flags |= 1 << (16 + 1 + chroma);
+ }
+ }
+ }
+ for chroma in 0..2 {
+ if (mb_info.cbpc & 2) != 0 {
+ for blk4 in 0..4 {
+ let blk_no = 16 + chroma * 4 + blk4;
+ let off = derive_ctx_off(sstate, 4, blk_no - 16);
+ let coded = decode_block(cabac, &mut mb_info.coeffs[blk_no][1..], 4, off);
+ sstate.get_cur_blk8(blk4).ncoded_c[chroma] = coded as u8;
+ mb_info.coded[blk_no] = coded;
+ if coded {
+ coded_flags |= 1 << (1 + 2 + blk_no);
+ }
+ }
+ }
+ }
+ sstate.get_cur_mb().coded_flags = coded_flags;
+}
--- /dev/null
+use nihav_core::codecs::{DecoderResult, DecoderError};
+use nihav_core::io::bitreader::*;
+use nihav_core::io::codebook::*;
+use nihav_core::io::intcode::*;
+use nihav_codec_support::codecs::MV;
+
+use super::super::*;
+use super::*;
+use super::dsp::{CHROMA_DC_SCAN, ZIGZAG, ZIGZAG1};
+use super::super::slice::SliceHeader;
+
+fn map_i_type(idx: usize) -> MBType {
+ if idx == 0 {
+ MBType::Intra4x4
+ } else if idx == 25 {
+ MBType::PCM
+ } else {
+ let imode = ((idx - 1) & 3) as u8;
+ let cbpc = ((idx - 1) / 4) as u8;
+ let (cbpy, cbpc) = if cbpc >= 3 { (0xF, cbpc - 3) } else { (0x0, cbpc) };
+ MBType::Intra16x16(imode, cbpy, cbpc)
+ }
+}
+
+const NUM_I_TYPES: usize = 26;
+
+const P_TYPES: [MBType; 5] = [
+ MBType::P16x16, MBType::P16x8, MBType::P8x16, MBType::P8x8, MBType::P8x8Ref0
+];
+
+const B_TYPES: [MBType; 23] = [
+ MBType::Direct,
+ MBType::B16x16(BMode::L0),
+ MBType::B16x16(BMode::L1),
+ MBType::B16x16(BMode::Bi),
+ MBType::B16x8(BMode::L0, BMode::L0),
+ MBType::B8x16(BMode::L0, BMode::L0),
+ MBType::B16x8(BMode::L1, BMode::L1),
+ MBType::B8x16(BMode::L1, BMode::L1),
+ MBType::B16x8(BMode::L0, BMode::L1),
+ MBType::B8x16(BMode::L0, BMode::L1),
+ MBType::B16x8(BMode::L1, BMode::L0),
+ MBType::B8x16(BMode::L1, BMode::L0),
+ MBType::B16x8(BMode::L0, BMode::Bi),
+ MBType::B8x16(BMode::L0, BMode::Bi),
+ MBType::B16x8(BMode::L1, BMode::Bi),
+ MBType::B8x16(BMode::L1, BMode::Bi),
+ MBType::B16x8(BMode::Bi, BMode::L0),
+ MBType::B8x16(BMode::Bi, BMode::L0),
+ MBType::B16x8(BMode::Bi, BMode::L1),
+ MBType::B8x16(BMode::Bi, BMode::L1),
+ MBType::B16x8(BMode::Bi, BMode::Bi),
+ MBType::B8x16(BMode::Bi, BMode::Bi),
+ MBType::B8x8,
+];
+
+pub fn decode_mb_type_cavlc(br: &mut BitReader, slice_hdr: &SliceHeader) -> DecoderResult<MBType> {
+ let mb_type_id = br.read_ue()? as usize;
+ match slice_hdr.slice_type {
+ SliceType::I => {
+ validate!(mb_type_id < NUM_I_TYPES);
+ Ok(map_i_type(mb_type_id))
+ },
+ SliceType::SI => {
+ validate!(mb_type_id < NUM_I_TYPES + 1);
+ if mb_type_id == 0 {
+ Ok(MBType::Intra4x4) // special SI one
+ } else {
+ Ok(map_i_type(mb_type_id - 1))
+ }
+ },
+ SliceType::P | SliceType::SP => {
+ validate!(mb_type_id < NUM_I_TYPES + P_TYPES.len());
+ if mb_type_id < P_TYPES.len() {
+ Ok(P_TYPES[mb_type_id])
+ } else {
+ Ok(map_i_type(mb_type_id - P_TYPES.len()))
+ }
+ },
+ SliceType::B => {
+ validate!(mb_type_id < NUM_I_TYPES + B_TYPES.len());
+ if mb_type_id < B_TYPES.len() {
+ Ok(B_TYPES[mb_type_id])
+ } else {
+ Ok(map_i_type(mb_type_id - B_TYPES.len()))
+ }
+ },
+ }
+}
+
+fn read_refs(br: &mut BitReader, dst: &mut [PicRef], num_refs: usize) -> DecoderResult<()> {
+ if num_refs > 1 {
+ for pic_ref in dst.iter_mut() {
+ *pic_ref = PicRef::new(br.read_te(num_refs as u32 - 1)? as u8);
+ }
+ } else {
+ for pic_ref in dst.iter_mut() {
+ *pic_ref = ZERO_REF;
+ }
+ }
+ Ok(())
+}
+
+fn read_mvs(br: &mut BitReader, mvs: &mut [MV]) -> DecoderResult<()> {
+ for mv in mvs.iter_mut() {
+ mv.x = br.read_se()? as i16;
+ mv.y = br.read_se()? as i16;
+ }
+ Ok(())
+}
+
+#[allow(clippy::cognitive_complexity)]
+pub fn decode_mb_pred_cavlc(br: &mut BitReader, slice_hdr: &SliceHeader, mb_type: MBType, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo) -> DecoderResult<()> {
+ mb_info.mb_type = mb_type;
+ let num_l0 = slice_hdr.num_ref_idx_l0_active;
+ let num_l1 = slice_hdr.num_ref_idx_l1_active;
+ match mb_type {
+ MBType::Intra4x4 => {
+ for &(x, y) in I4X4_SCAN.iter() {
+ let x = x as usize;
+ let y = y as usize;
+ let top_pred = sstate.get_top_blk4(x + y * 4).ipred;
+ let left_pred = sstate.get_left_blk4(x + y * 4).ipred;
+
+ let top_idx = top_pred.into_pred_idx();
+ let left_idx = left_pred.into_pred_idx();
+ let pred_mode = top_idx.min(left_idx);
+ let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 };
+ if !br.read_bool()? {
+ let new_mode = br.read(3)? as u8;
+ pred_mode = if new_mode >= pred_mode {
+ new_mode + 1
+ } else { new_mode };
+ }
+ mb_info.ipred[x + y * 4] = pred_mode.into();
+ sstate.get_cur_blk4(x + y * 4).ipred = pred_mode.into();
+ }
+ mb_info.chroma_ipred = br.read_ue_lim(3)? as u8;
+ },
+ MBType::Intra8x8 => {
+ for part in 0..4 {
+ let blk4 = (part & 1) * 2 + (part & 2) * 4;
+ let top_pred = sstate.get_top_blk4(blk4).ipred;
+ let left_pred = sstate.get_left_blk4(blk4).ipred;
+
+ let top_idx = top_pred.into_pred_idx();
+ let left_idx = left_pred.into_pred_idx();
+ let pred_mode = top_idx.min(left_idx);
+ let mut pred_mode = if pred_mode != -1 { pred_mode as u8 } else { 2 };
+ if !br.read_bool()? {
+ let new_mode = br.read(3)? as u8;
+ pred_mode = if new_mode >= pred_mode {
+ new_mode + 1
+ } else { new_mode };
+ }
+ mb_info.ipred[blk4] = pred_mode.into();
+ mb_info.ipred[blk4 + 1] = pred_mode.into();
+ mb_info.ipred[blk4 + 4] = pred_mode.into();
+ mb_info.ipred[blk4 + 5] = pred_mode.into();
+ sstate.get_cur_blk4(blk4).ipred = pred_mode.into();
+ sstate.get_cur_blk4(blk4 + 1).ipred = pred_mode.into();
+ sstate.get_cur_blk4(blk4 + 4).ipred = pred_mode.into();
+ sstate.get_cur_blk4(blk4 + 5).ipred = pred_mode.into();
+ }
+ mb_info.chroma_ipred = br.read_ue_lim(3)? as u8;
+ },
+ MBType::Intra16x16(_ipred, _, _) => {
+ sstate.fill_ipred(IntraPredMode::DC);
+ mb_info.chroma_ipred = br.read_ue_lim(3)? as u8;
+ },
+ MBType::P16x16 | MBType::P16x8 | MBType::P8x16 => {
+ let nparts = mb_type.num_parts();
+ read_refs(br, &mut mb_info.ref_l0[..nparts], num_l0)?;
+ read_mvs(br, &mut mb_info.mv_l0[..nparts])?;
+ },
+ MBType::B16x16(mode) => {
+ if mode != BMode::L1 {
+ read_refs(br, &mut mb_info.ref_l0[..1], num_l0)?;
+ }
+ if mode != BMode::L0 {
+ read_refs(br, &mut mb_info.ref_l1[..1], num_l1)?;
+ }
+ if mode != BMode::L1 {
+ read_mvs(br, &mut mb_info.mv_l0[..1])?;
+ }
+ if mode != BMode::L0 {
+ read_mvs(br, &mut mb_info.mv_l1[..1])?;
+ }
+ },
+ MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
+ if num_l0 > 1 {
+ if mode0 != BMode::L1 {
+ read_refs(br, &mut mb_info.ref_l0[0..1], num_l0)?;
+ }
+ if mode1 != BMode::L1 {
+ read_refs(br, &mut mb_info.ref_l0[1..2], num_l0)?;
+ }
+ }
+ if num_l1 > 1 {
+ if mode0 != BMode::L0 {
+ read_refs(br, &mut mb_info.ref_l1[0..1], num_l1)?;
+ }
+ if mode1 != BMode::L0 {
+ read_refs(br, &mut mb_info.ref_l1[1..2], num_l1)?;
+ }
+ }
+ if mode0 != BMode::L1 {
+ read_mvs(br, &mut mb_info.mv_l0[0..1])?;
+ }
+ if mode1 != BMode::L1 {
+ read_mvs(br, &mut mb_info.mv_l0[1..2])?;
+ }
+ if mode0 != BMode::L0 {
+ read_mvs(br, &mut mb_info.mv_l1[0..1])?;
+ }
+ if mode1 != BMode::L0 {
+ read_mvs(br, &mut mb_info.mv_l1[1..2])?;
+ }
+ },
+ MBType::P8x8 | MBType::P8x8Ref0 | MBType::B8x8 => {
+ for sub_mb in mb_info.sub_mb_type.iter_mut() {
+ *sub_mb = decode_sub_mb_type(br, mb_type != MBType::B8x8)?;
+ }
+ for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() {
+ if num_l0 > 1 && mb_type != MBType::P8x8Ref0 && sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l1() {
+ read_refs(br, &mut mb_info.ref_l0[part..][..1], num_l0)?;
+ }
+ }
+ for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() {
+ if num_l1 > 1 && sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l0() {
+ read_refs(br, &mut mb_info.ref_l1[part..][..1], num_l1)?;
+ }
+ }
+ for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() {
+ if sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l1() {
+ let num_subparts = sub_mb.num_parts();
+ read_mvs(br, &mut mb_info.mv_l0[part * 4..][..num_subparts])?;
+ }
+ }
+ for (part, &sub_mb) in mb_info.sub_mb_type.iter().enumerate() {
+ if sub_mb != SubMBType::Direct8x8 && !sub_mb.is_l0() {
+ let num_subparts = sub_mb.num_parts();
+ read_mvs(br, &mut mb_info.mv_l1[part * 4..][..num_subparts])?;
+ }
+ }
+ },
+ _ => {},
+ };
+ Ok(())
+}
+
+fn decode_sub_mb_type(br: &mut BitReader, is_p: bool) -> DecoderResult<SubMBType> {
+ const SUB_MB_P_TYPES: [SubMBType; 4] = [
+ SubMBType::P8x8, SubMBType::P8x4, SubMBType::P4x8, SubMBType::P4x4
+ ];
+ const SUB_MB_B_TYPES: [SubMBType; 13] = [
+ SubMBType::Direct8x8,
+ SubMBType::B8x8(BMode::L0), SubMBType::B8x8(BMode::L1), SubMBType::B8x8(BMode::Bi),
+ SubMBType::B8x4(BMode::L0), SubMBType::B4x8(BMode::L0),
+ SubMBType::B8x4(BMode::L1), SubMBType::B4x8(BMode::L1),
+ SubMBType::B8x4(BMode::Bi), SubMBType::B4x8(BMode::Bi),
+ SubMBType::B4x4(BMode::L0), SubMBType::B4x4(BMode::L1), SubMBType::B4x4(BMode::Bi),
+ ];
+ if is_p {
+ let idx = br.read_ue_lim(SUB_MB_P_TYPES.len() as u32 - 1)? as usize;
+ Ok(SUB_MB_P_TYPES[idx])
+ } else {
+ let idx = br.read_ue_lim(SUB_MB_B_TYPES.len() as u32 - 1)? as usize;
+ Ok(SUB_MB_B_TYPES[idx])
+ }
+}
+
+fn map_coeff_token(val: u8) -> (usize, usize) {
+ const TRAILING_ONES: [u8; 6] = [ 0, 0, 1, 0, 1, 2 ];
+ const TOTAL_COEFF: [u8; 6] = [0, 1, 1, 2, 2, 2];
+
+ if val < 6 {
+ (TRAILING_ONES[val as usize] as usize, TOTAL_COEFF[val as usize] as usize)
+ } else {
+ (((val - 6) & 3) as usize, ((val + 6) >> 2) as usize)
+ }
+}
+
+fn decode_coeffs(br: &mut BitReader, coeffs: &mut [i16], scan: &[usize], cb: &Codebook<u8>, tables: &CAVLCTables) -> DecoderResult<u8> {
+ let coeff_token = br.read_cb(cb)?;
+ let (trail_ones, total_coeff) = map_coeff_token(coeff_token);
+ let mut level = [0i16; 16];
+ let mut run = [0u8; 16];
+ if total_coeff > 0 {
+ let mut suffix_length = (total_coeff > 10 && trail_ones < 3) as u8;
+ for i in 0..total_coeff {
+ if i < trail_ones {
+ if !br.read_bool()? {
+ level[i] = 1;
+ } else {
+ level[i] = -1;
+ }
+ } else {
+ let level_prefix = br.read_code(UintCodeType::UnaryZeroes)?;
+ validate!(level_prefix <= 19);
+ let mut level_code = level_prefix.min(15) << suffix_length;
+ if suffix_length > 0 || level_prefix >= 14 {
+ let level_suffix_size = if level_prefix == 14 && suffix_length == 0 {
+ 4
+ } else if level_prefix >= 15 {
+ (level_prefix - 3) as u8
+ } else {
+ suffix_length
+ };
+ let level_suffix = br.read(level_suffix_size)?;
+ level_code += level_suffix;
+ }
+ if level_prefix >= 15 && suffix_length == 0 {
+ level_code += 15;
+ }
+ if level_prefix >= 16 {
+ level_code += (1 << (level_prefix - 3)) - 4096;
+ }
+ if i == trail_ones && trail_ones < 3 {
+ level_code += 2;
+ }
+ level[i] = if (level_code & 1) == 0 {
+ (level_code as i32 + 2) >> 1
+ } else {
+ -((level_code as i32 + 1) >> 1)
+ } as i16;
+ if suffix_length == 0 {
+ suffix_length = 1;
+ }
+ if level[i].abs() > (3 << (suffix_length - 1)) && suffix_length < 6 {
+ suffix_length += 1;
+ }
+ }
+ }
+ let mut zeros_left = if total_coeff < coeffs.len() {
+ let cb = if coeffs.len() > 4 {
+ &tables.total_zeros_cb[total_coeff - 1]
+ } else {
+ &tables.cdc_total_zeros_cb[total_coeff - 1]
+ };
+ br.read_cb(cb)?
+ } else { 0 };
+ for i in 0..total_coeff - 1 {
+ if zeros_left > 0 {
+ let run_before = br.read_cb(&tables.run_before_cb[(zeros_left - 1).min(6) as usize])?;
+ run[i] = run_before;
+ zeros_left -= run_before;
+ }
+ }
+ run[total_coeff - 1] = zeros_left;
+ let mut idx = 0;
+ for i in (0..total_coeff).rev() {
+ idx += run[i] as usize;
+ coeffs[scan[idx]] = level[i];
+ idx += 1;
+ }
+ }
+ Ok(total_coeff as u8)
+}
+
+fn decode_block(br: &mut BitReader, coeffs: &mut [i16; 16], cb: &Codebook<u8>, tables: &CAVLCTables) -> DecoderResult<u8> {
+ decode_coeffs(br, coeffs, &ZIGZAG, cb, tables)
+}
+
+fn decode_block_ac(br: &mut BitReader, coeffs: &mut [i16; 16], cb: &Codebook<u8>, tables: &CAVLCTables) -> DecoderResult<u8> {
+ decode_coeffs(br, &mut coeffs[1..], &ZIGZAG1, cb, tables)
+}
+
+fn decode_chroma_dc(br: &mut BitReader, coeffs: &mut [i16; 4], cb: &Codebook<u8>, tables: &CAVLCTables) -> DecoderResult<u8> {
+ decode_coeffs(br, coeffs, &CHROMA_DC_SCAN, cb, tables)
+}
+
+fn get_cb_idx(nc: u8) -> usize {
+ match nc {
+ 0 | 1 => 0,
+ 2 | 3 => 1,
+ 4..=7 => 2,
+ _ => 3,
+ }
+}
+
+pub fn decode_residual_cavlc(br: &mut BitReader, sstate: &mut SliceState, mb_info: &mut CurrentMBInfo, tables: &CAVLCTables) -> DecoderResult<()> {
+ if mb_info.mb_type.is_intra16x16() {
+ let mut top_nc = sstate.get_top_blk4(0).ncoded;
+ let mut left_nc = sstate.get_left_blk4(0).ncoded;
+ if !sstate.has_left {
+ left_nc = top_nc;
+ } else if !sstate.has_top {
+ top_nc = left_nc;
+ }
+ let cb_idx = get_cb_idx((left_nc + top_nc + 1) >> 1);
+
+ let nc = decode_block(br, &mut mb_info.coeffs[24], &tables.coeff_token_cb[cb_idx], tables)?;
+ mb_info.coded[24] = nc != 0;
+ }
+ for blk8 in 0..4 {
+ if (mb_info.cbpy & (1 << blk8)) != 0 {
+ for blk4 in 0..4 {
+ let bx = (blk8 & 1) * 2 + (blk4 & 1);
+ let by = ((blk8 & 2) * 2 + (blk4 & 2)) >> 1;
+ let blk_no = bx + by * 4;
+
+ let mut top_nc = sstate.get_top_blk4(blk_no).ncoded;
+ let mut left_nc = sstate.get_left_blk4(blk_no).ncoded;
+ if bx == 0 && !sstate.has_left {
+ left_nc = top_nc;
+ } else if by == 0 && !sstate.has_top {
+ top_nc = left_nc;
+ }
+ let cb_idx = get_cb_idx((left_nc + top_nc + 1) >> 1);
+
+ let nc = if mb_info.mb_type.is_intra16x16() {
+ decode_block_ac(br, &mut mb_info.coeffs[blk_no], &tables.coeff_token_cb[cb_idx], tables)?
+ } else {
+ decode_block(br, &mut mb_info.coeffs[blk_no], &tables.coeff_token_cb[cb_idx], tables)?
+ };
+ sstate.get_cur_blk4(blk_no).ncoded = nc;
+ mb_info.coded[blk_no] = nc != 0;
+ }
+ }
+ }
+ if mb_info.transform_size_8x8 {
+ for y in 0..2 {
+ for x in 0..2 {
+ let b0 = &mb_info.coeffs[x + y * 8];
+ let b1 = &mb_info.coeffs[x + 1 + y * 8];
+ let b2 = &mb_info.coeffs[x + 4 + y * 8];
+ let b3 = &mb_info.coeffs[x + 5 + y * 8];
+ let dst = &mut mb_info.coeffs8x8[x + y * 2].coeffs;
+ for (dst, (s0, s1)) in dst.chunks_mut(8).zip(b0.chunks(4).zip(b1.chunks(4))) {
+ let (d0, d1) = dst.split_at_mut(4);
+ d0.copy_from_slice(s0);
+ d1.copy_from_slice(s1);
+ }
+ for (dst, (s0, s1)) in dst.chunks_mut(8).skip(4).zip(b2.chunks(4).zip(b3.chunks(4))) {
+ let (d0, d1) = dst.split_at_mut(4);
+ d0.copy_from_slice(s0);
+ d1.copy_from_slice(s1);
+ }
+ }
+ }
+ }
+ for chroma in 0..2 {
+ if (mb_info.cbpc & 3) != 0 {
+ decode_chroma_dc(br, &mut mb_info.chroma_dc[chroma], &tables.cdc_coeff_token_cb, tables)?;
+ }
+ }
+ for chroma in 0..2 {
+ if (mb_info.cbpc & 2) != 0 {
+ for blk4 in 0..4 {
+ let blk_no = 16 + chroma * 4 + blk4;
+ let bx = blk4 & 1;
+ let by = blk4 >> 1;
+
+ let mut top_nc = sstate.get_top_blk8(blk4).ncoded_c[chroma];
+ let mut left_nc = sstate.get_left_blk8(blk4).ncoded_c[chroma];
+ if bx == 0 && !sstate.has_left {
+ left_nc = top_nc;
+ } else if by == 0 && !sstate.has_top {
+ top_nc = left_nc;
+ }
+ let cb_idx = get_cb_idx((left_nc + top_nc + 1) >> 1);
+
+ let nc = decode_block_ac(br, &mut mb_info.coeffs[blk_no], &tables.coeff_token_cb[cb_idx], tables)?;
+ sstate.get_cur_blk8(blk4).ncoded_c[chroma] = nc;
+ mb_info.coded[blk_no] = nc != 0;
+ }
+ }
+ }
+
+ Ok(())
+}
+
+pub struct CAVLCTables {
+ coeff_token_cb: [Codebook<u8>; 4],
+ cdc_coeff_token_cb: Codebook<u8>,
+ total_zeros_cb: [Codebook<u8>; 15],
+ cdc_total_zeros_cb: [Codebook<u8>; 3],
+ run_before_cb: [Codebook<u8>; 7],
+}
+
+fn map_idx(idx: usize) -> u8 { idx as u8 }
+
+macro_rules! create_cb {
+ ($bits: expr, $lens: expr) => {{
+ let mut reader = TableCodebookDescReader::new($bits, $lens, map_idx);
+ Codebook::new(&mut reader, CodebookMode::MSB).unwrap()
+ }}
+}
+
+impl CAVLCTables {
+ pub fn new() -> Self {
+ /*let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[0], &COEFF_TOKEN_LENS[0], map_idx);
+ let coef_tok_cb0 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap();
+ let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[1], &COEFF_TOKEN_LENS[1], map_idx);
+ let coef_tok_cb1 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap();
+ let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[2], &COEFF_TOKEN_LENS[2], map_idx);
+ let coef_tok_cb2 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap();
+ let mut reader = TableCodebookDescReader::new(&COEFF_TOKEN_BITS[3], &COEFF_TOKEN_LENS[3], map_idx);
+ let coef_tok_cb3 = Codebook::new(&mut reader, CodebookMode::MSB).unwrap();
+
+ let mut reader = TableCodebookDescReader::new(&CHROMA_DC_COEFF_TOKEN_BITS, &CHROMA_DC_COEFF_TOKEN_LENS, map_idx);
+ let cdc_coeff_token_cb = Codebook::new(&mut reader, CodebookMode::MSB).unwrap();*/
+
+ let coef_tok_cb0 = create_cb!(&COEFF_TOKEN_BITS[0], &COEFF_TOKEN_LENS[0]);
+ let coef_tok_cb1 = create_cb!(&COEFF_TOKEN_BITS[1], &COEFF_TOKEN_LENS[1]);
+ let coef_tok_cb2 = create_cb!(&COEFF_TOKEN_BITS[2], &COEFF_TOKEN_LENS[2]);
+ let coef_tok_cb3 = create_cb!(&COEFF_TOKEN_BITS[3], &COEFF_TOKEN_LENS[3]);
+
+ let cdc_coeff_token_cb = create_cb!(&CHROMA_DC_COEFF_TOKEN_BITS, &CHROMA_DC_COEFF_TOKEN_LENS);
+
+ let total_zeros0 = create_cb!(&TOTAL_ZERO_BITS[ 0], &TOTAL_ZERO_LENS[ 0]);
+ let total_zeros1 = create_cb!(&TOTAL_ZERO_BITS[ 1], &TOTAL_ZERO_LENS[ 1]);
+ let total_zeros2 = create_cb!(&TOTAL_ZERO_BITS[ 2], &TOTAL_ZERO_LENS[ 2]);
+ let total_zeros3 = create_cb!(&TOTAL_ZERO_BITS[ 3], &TOTAL_ZERO_LENS[ 3]);
+ let total_zeros4 = create_cb!(&TOTAL_ZERO_BITS[ 4], &TOTAL_ZERO_LENS[ 4]);
+ let total_zeros5 = create_cb!(&TOTAL_ZERO_BITS[ 5], &TOTAL_ZERO_LENS[ 5]);
+ let total_zeros6 = create_cb!(&TOTAL_ZERO_BITS[ 6], &TOTAL_ZERO_LENS[ 6]);
+ let total_zeros7 = create_cb!(&TOTAL_ZERO_BITS[ 7], &TOTAL_ZERO_LENS[ 7]);
+ let total_zeros8 = create_cb!(&TOTAL_ZERO_BITS[ 8], &TOTAL_ZERO_LENS[ 8]);
+ let total_zeros9 = create_cb!(&TOTAL_ZERO_BITS[ 9], &TOTAL_ZERO_LENS[ 9]);
+ let total_zeros10 = create_cb!(&TOTAL_ZERO_BITS[10], &TOTAL_ZERO_LENS[10]);
+ let total_zeros11 = create_cb!(&TOTAL_ZERO_BITS[11], &TOTAL_ZERO_LENS[11]);
+ let total_zeros12 = create_cb!(&TOTAL_ZERO_BITS[12], &TOTAL_ZERO_LENS[12]);
+ let total_zeros13 = create_cb!(&TOTAL_ZERO_BITS[13], &TOTAL_ZERO_LENS[13]);
+ let total_zeros14 = create_cb!(&TOTAL_ZERO_BITS[14], &TOTAL_ZERO_LENS[14]);
+
+ let cdc_total_zeros_cb0 = create_cb!(&CHROMA_DC_TOTAL_ZERO_BITS[0], &CHROMA_DC_TOTAL_ZERO_LENS[0]);
+ let cdc_total_zeros_cb1 = create_cb!(&CHROMA_DC_TOTAL_ZERO_BITS[1], &CHROMA_DC_TOTAL_ZERO_LENS[1]);
+ let cdc_total_zeros_cb2 = create_cb!(&CHROMA_DC_TOTAL_ZERO_BITS[2], &CHROMA_DC_TOTAL_ZERO_LENS[2]);
+
+ let run_before_cb0 = create_cb!(&RUN_BEFORE_BITS[0], &RUN_BEFORE_LENS[0]);
+ let run_before_cb1 = create_cb!(&RUN_BEFORE_BITS[1], &RUN_BEFORE_LENS[1]);
+ let run_before_cb2 = create_cb!(&RUN_BEFORE_BITS[2], &RUN_BEFORE_LENS[2]);
+ let run_before_cb3 = create_cb!(&RUN_BEFORE_BITS[3], &RUN_BEFORE_LENS[3]);
+ let run_before_cb4 = create_cb!(&RUN_BEFORE_BITS[4], &RUN_BEFORE_LENS[4]);
+ let run_before_cb5 = create_cb!(&RUN_BEFORE_BITS[5], &RUN_BEFORE_LENS[5]);
+ let run_before_cb6 = create_cb!(&RUN_BEFORE_BITS[6], &RUN_BEFORE_LENS[6]);
+
+ Self {
+ coeff_token_cb: [coef_tok_cb0, coef_tok_cb1, coef_tok_cb2, coef_tok_cb3],
+ cdc_coeff_token_cb,
+ total_zeros_cb: [total_zeros0, total_zeros1, total_zeros2,
+ total_zeros3, total_zeros4, total_zeros5,
+ total_zeros6, total_zeros7, total_zeros8,
+ total_zeros9, total_zeros10, total_zeros11,
+ total_zeros12, total_zeros13, total_zeros14 ],
+ cdc_total_zeros_cb: [cdc_total_zeros_cb0, cdc_total_zeros_cb1, cdc_total_zeros_cb2],
+ run_before_cb: [ run_before_cb0, run_before_cb1, run_before_cb2,
+ run_before_cb3, run_before_cb4, run_before_cb5,
+ run_before_cb6 ],
+ }
+ }
+}
+
+const COEFF_TOKEN_BITS: [[u16; 62]; 4] = [
+ [
+ 0x01, 0x05, 0x01, 0x07, 0x04, 0x01, 0x07, 0x06,
+ 0x05, 0x03, 0x07, 0x06, 0x05, 0x03, 0x07, 0x06,
+ 0x05, 0x04, 0x0F, 0x06, 0x05, 0x04, 0x0B, 0x0E,
+ 0x05, 0x04, 0x08, 0x0A, 0x0D, 0x04, 0x0F, 0x0E,
+ 0x09, 0x04, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E,
+ 0x09, 0x0C, 0x0B, 0x0A, 0x0D, 0x08, 0x0F, 0x01,
+ 0x09, 0x0C, 0x0B, 0x0E, 0x0D, 0x08, 0x07, 0x0A,
+ 0x09, 0x0C, 0x04, 0x06, 0x05, 0x08
+ ], [
+ 0x03, 0x0B, 0x02, 0x07, 0x07, 0x03, 0x07, 0x0A,
+ 0x09, 0x05, 0x07, 0x06, 0x05, 0x04, 0x04, 0x06,
+ 0x05, 0x06, 0x07, 0x06, 0x05, 0x08, 0x0F, 0x06,
+ 0x05, 0x04, 0x0B, 0x0E, 0x0D, 0x04, 0x0F, 0x0A,
+ 0x09, 0x04, 0x0B, 0x0E, 0x0D, 0x0C, 0x08, 0x0A,
+ 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
+ 0x09, 0x0C, 0x07, 0x0B, 0x06, 0x08, 0x09, 0x08,
+ 0x0A, 0x01, 0x07, 0x06, 0x05, 0x04
+ ], [
+ 0x0F, 0x0F, 0x0E, 0x0B, 0x0F, 0x0D, 0x08, 0x0C,
+ 0x0E, 0x0C, 0x0F, 0x0A, 0x0B, 0x0B, 0x0B, 0x08,
+ 0x09, 0x0A, 0x09, 0x0E, 0x0D, 0x09, 0x08, 0x0A,
+ 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0D, 0x0B, 0x0E,
+ 0x0A, 0x0C, 0x0F, 0x0A, 0x0D, 0x0C, 0x0B, 0x0E,
+ 0x09, 0x0C, 0x08, 0x0A, 0x0D, 0x08, 0x0D, 0x07,
+ 0x09, 0x0C, 0x09, 0x0C, 0x0B, 0x0A, 0x05, 0x08,
+ 0x07, 0x06, 0x01, 0x04, 0x03, 0x02
+ ], [
+ 0x03, 0x00, 0x01, 0x04, 0x05, 0x06, 0x08, 0x09,
+ 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+ 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21,
+ 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
+ 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31,
+ 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+ 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
+ ]
+];
+const COEFF_TOKEN_LENS: [[u8; 62]; 4] = [
+ [
+ 1, 6, 2, 8, 6, 3, 9, 8, 7, 5, 10, 9, 8, 6, 11, 10,
+ 9, 7, 13, 11, 10, 8, 13, 13, 11, 9, 13, 13, 13, 10, 14, 14,
+ 13, 11, 14, 14, 14, 13, 15, 15, 14, 14, 15, 15, 15, 14, 16, 15,
+ 15, 15, 16, 16, 16, 15, 16, 16, 16, 16, 16, 16, 16, 16
+ ], [
+ 2, 6, 2, 6, 5, 3, 7, 6, 6, 4, 8, 6, 6, 4, 8, 7,
+ 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11, 11, 11, 7, 12, 11,
+ 11, 9, 12, 12, 12, 11, 12, 12, 12, 11, 13, 13, 13, 12, 13, 13,
+ 13, 13, 13, 14, 13, 13, 14, 14, 14, 13, 14, 14, 14, 14
+ ], [
+ 4, 6, 4, 6, 5, 4, 6, 5, 5, 4, 7, 5, 5, 4, 7, 5,
+ 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5, 8, 8,
+ 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8, 10, 9,
+ 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+ ], [ 6; 62 ]
+];
+
+const CHROMA_DC_COEFF_TOKEN_BITS: [u8; 14] = [
+ 1, 7, 1, 4, 6, 1, 3, 3, 2, 5, 2, 3, 2, 0
+];
+const CHROMA_DC_COEFF_TOKEN_LENS: [u8; 14] = [
+ 2, 6, 1, 6, 6, 3, 6, 7, 7, 6, 6, 8, 8, 7
+];
+
+const TOTAL_ZERO_BITS: [[u8; 16]; 15] = [
+ [ 1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1 ],
+ [ 7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0, 0 ],
+ [ 5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0, 0, 0 ],
+ [ 3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0, 0, 0, 0 ],
+ [ 5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, 0, 0, 0 ],
+ [ 1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 1, 5, 4, 3, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 1, 1, 3, 3, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 0, 1, 3, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 0, 1, 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0, 1, 1, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
+];
+const TOTAL_ZERO_LENS: [[u8; 16]; 15] = [
+ [ 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9 ],
+ [ 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 0 ],
+ [ 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6, 0, 0 ],
+ [ 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5, 0, 0, 0 ],
+ [ 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5, 0, 0, 0, 0 ],
+ [ 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6, 0, 0, 0, 0, 0 ],
+ [ 6, 5, 3, 3, 3, 2, 3, 4, 3, 6, 0, 0, 0, 0, 0, 0 ],
+ [ 6, 4, 5, 3, 2, 2, 3, 3, 6, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 6, 6, 4, 2, 2, 3, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 5, 5, 3, 2, 2, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 4, 4, 3, 3, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 4, 4, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 3, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
+];
+
+const CHROMA_DC_TOTAL_ZERO_BITS: [[u8; 4]; 3] = [
+ [ 1, 1, 1, 0 ], [ 1, 1, 0, 0 ], [ 1, 0, 0, 0 ]
+];
+const CHROMA_DC_TOTAL_ZERO_LENS: [[u8; 4]; 3] = [
+ [ 1, 2, 3, 3 ], [ 1, 2, 2, 0 ], [ 1, 1, 0, 0 ]
+];
+
+const RUN_BEFORE_BITS: [[u8; 15]; 7] = [
+ [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 3, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 3, 2, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 3, 0, 1, 3, 2, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
+];
+const RUN_BEFORE_LENS: [[u8; 15]; 7] = [
+ [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 2, 2, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 2, 2, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0 ],
+ [ 3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]
+];
--- /dev/null
+use std::str::FromStr;
+
+use nihav_core::codecs::*;
+use nihav_core::io::bitreader::*;
+
+use super::super::*;
+use super::*;
+use super::dispatch::*;
+
+pub struct FrameDecoder {
+ pub slices: Vec<(SliceHeader, usize, SliceRefs, Vec<u8>)>,
+ pub cur_pic: PictureInfo,
+ sps: Arc<SeqParameterSet>,
+ pps: Arc<PicParameterSet>,
+ pub num_mbs: usize,
+ mc_dsp: H264MC,
+ dispatch: Shareable<ThreadDispatcher>,
+ sstate: SliceState,
+ cavlc_cb: Arc<CAVLCTables>,
+ ipcm_buf: [u8; 256 + 64 + 64],
+ is_mbaff: bool,
+ deblock_skip: bool,
+}
+
+impl FrameDecoder {
+ pub fn decode_slice(&mut self, hdr: &SliceHeader, hdr_size: usize, refs: &SliceRefs, nal: &[u8]) -> DecoderResult<usize> {
+ self.sstate.def_fill = 1 << (self.sps.bit_depth_luma - 1);
+ self.sstate.reset(self.sps.pic_width_in_mbs, self.sps.pic_height_in_mbs, hdr.first_mb_in_slice);
+
+ let mut full_size = nal.len() * 8;
+ for &byte in nal.iter().rev() {
+ if byte == 0 {
+ full_size -= 8;
+ } else {
+ full_size -= (byte.trailing_zeros() + 1) as usize;
+ break;
+ }
+ }
+ validate!(full_size > 0);
+
+ let sslice_refs = SimplifiedSliceRefs::new(refs);
+
+ let mut br = BitReader::new(&nal[hdr_size / 8..], BitReaderMode::BE);
+ let mut dst_pic = self.cur_pic.clone();
+ let mut dst_frm = NASimpleVideoFrame::from_video_buf(&mut dst_pic.buf).unwrap();
+ if !self.pps.entropy_coding_mode {
+ br.skip((hdr_size & 7) as u32)?;
+ self.decode_slice_cavlc(&mut br, full_size - (hdr_size & !7), hdr, &sslice_refs, &mut dst_frm)
+ } else {
+ let csrc = &nal[(hdr_size + 7) / 8..];
+ validate!(csrc.len() >= 2);
+ let mut cabac = CABAC::new(csrc, hdr.slice_type, hdr.slice_qp, hdr.cabac_init_idc as usize)?;
+ self.decode_slice_cabac(&mut cabac, hdr, &sslice_refs, &mut dst_frm)
+ }
+ }
+ fn decode_slice_cavlc(&mut self, br: &mut BitReader, full_size: usize, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u16>) -> DecoderResult<usize> {
+ const INTRA_CBP: [u8; 48] = [
+ 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
+ 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
+ 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41
+ ];
+ const INTER_CBP: [u8; 48] = [
+ 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13,
+ 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
+ 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
+ ];
+
+ let mut mb_idx = slice_hdr.first_mb_in_slice;
+ let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() };
+ let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
+ while br.tell() < full_size && mb_idx < self.num_mbs {
+ mb_info.coded = [false; 25];
+ mb_info.ref_l0 = [ZERO_REF; 4];
+ mb_info.ref_l1 = [ZERO_REF; 4];
+ mb_info.mv_l0 = [ZERO_MV; 16];
+ mb_info.mv_l1 = [ZERO_MV; 16];
+ mb_info.chroma_dc = [[0; 4]; 2];
+ mb_info.cbpy = 0;
+ mb_info.cbpc = 0;
+
+ if !slice_hdr.slice_type.is_intra() {
+ let mb_skip_run = br.read_ue()? as usize;
+ validate!(mb_idx + mb_skip_run <= self.num_mbs);
+ mb_info.mb_type = skip_type;
+ for _ in 0..mb_skip_run {
+ self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?;
+ mb_idx += 1;
+ }
+ if mb_idx == self.num_mbs || br.tell() >= full_size {
+ break;
+ }
+ }
+ if br.tell() < full_size {
+ if self.is_mbaff && ((mb_idx & 1) == 0) {
+ let _mb_field_decoding = br.read_bool()?;
+ }
+ let mut mb_type = decode_mb_type_cavlc(br, slice_hdr)?;
+ mb_info.mb_type = mb_type;
+ mb_info.transform_size_8x8 = false;
+ if mb_type == MBType::PCM {
+ br.align();
+ for pix in self.ipcm_buf[..256 + 64 + 64].iter_mut() {
+ *pix = br.read(8)? as u8;
+ }
+ self.sstate.fill_ncoded(16);
+ } else {
+ if self.pps.transform_8x8_mode && mb_type == MBType::Intra4x4 {
+ mb_info.transform_size_8x8 = br.read_bool()?;
+ if mb_info.transform_size_8x8 {
+ mb_type = MBType::Intra8x8;
+ mb_info.mb_type = MBType::Intra8x8;
+ }
+ }
+ decode_mb_pred_cavlc(br, slice_hdr, mb_type, &mut self.sstate, &mut mb_info)?;
+ let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type {
+ (cbpy, cbpc)
+ } else {
+ let cbp_id = br.read_ue()? as usize;
+ validate!(cbp_id < INTRA_CBP.len());
+ let cbp = if mb_type == MBType::Intra4x4 || mb_type == MBType::Intra8x8 {
+ INTRA_CBP[cbp_id]
+ } else {
+ INTER_CBP[cbp_id]
+ };
+ if self.pps.transform_8x8_mode && (cbp & 0xF) != 0 && mb_info.can_have_8x8_tx(self.sps.direct_8x8_inference) {
+ mb_info.transform_size_8x8 = br.read_bool()?;
+ }
+ ((cbp & 0xF), (cbp >> 4))
+ };
+ mb_info.cbpy = cbpy;
+ mb_info.cbpc = cbpc;
+ self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy;
+ if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() {
+ let mb_qp_delta = br.read_se()?;
+ validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25);
+ let new_qp = mb_qp_delta + i32::from(mb_info.qp_y);
+ mb_info.qp_y = if new_qp < 0 {
+ (new_qp + 52) as u8
+ } else if new_qp >= 52 {
+ (new_qp - 52) as u8
+ } else {
+ new_qp as u8
+ };
+ mb_info.coeffs = [[0; 16]; 25];
+ if self.pps.transform_8x8_mode {
+ mb_info.clear_coeffs8x8();
+ }
+ mb_info.chroma_dc = [[0; 4]; 2];
+ decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?;
+ }
+ }
+ self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?;
+ }
+ mb_idx += 1;
+ if let Ok(disp) = self.dispatch.read() {
+ disp.update_pos(self.cur_pic.full_id, mb_idx);
+ }
+ }
+ Ok(mb_idx)
+ }
+ fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u16>) -> DecoderResult<usize> {
+ let mut mb_idx = slice_hdr.first_mb_in_slice;
+ let mut prev_mb_skipped = false;
+ let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
+ let mut last_qp_diff = false;
+
+ let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() };
+
+ while mb_idx < self.num_mbs {
+ mb_info.coded = [false; 25];
+ mb_info.ref_l0 = [ZERO_REF; 4];
+ mb_info.ref_l1 = [ZERO_REF; 4];
+ mb_info.mv_l0 = [ZERO_MV; 16];
+ mb_info.mv_l1 = [ZERO_MV; 16];
+ mb_info.chroma_dc = [[0; 4]; 2];
+ mb_info.cbpy = 0;
+ mb_info.cbpc = 0;
+ let mb_skip = cabac_decode_mbskip(cabac, &self.sstate, slice_hdr);
+ if !mb_skip {
+ if self.is_mbaff && (((mb_idx & 1) == 0) || (prev_mb_skipped && ((mb_idx & 1) == 1))) {
+ let _mb_field_decoding = cabac.decode_bit(70);
+ }
+ let mut mb_type = cabac_decode_mb_type(cabac, slice_hdr, &self.sstate);
+ mb_info.mb_type = mb_type;
+ mb_info.transform_size_8x8 = false;
+ if mb_type == MBType::PCM {
+ let ipcm_size = 256 + 64 + 64;
+ validate!(cabac.pos + ipcm_size <= cabac.src.len());
+ self.ipcm_buf[..ipcm_size].copy_from_slice(&cabac.src[cabac.pos..][..ipcm_size]);
+ cabac.pos += ipcm_size;
+ cabac.reinit()?;
+ last_qp_diff = false;
+ } else {
+ if self.pps.transform_8x8_mode && mb_type == MBType::Intra4x4 {
+ let mut ctx = 0;
+ if self.sstate.get_top_mb().transform_8x8 {
+ ctx += 1;
+ }
+ if self.sstate.get_left_mb().transform_8x8 {
+ ctx += 1;
+ }
+ mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx);
+ if mb_info.transform_size_8x8 {
+ mb_type = MBType::Intra8x8;
+ mb_info.mb_type = MBType::Intra8x8;
+ }
+ }
+ decode_mb_pred_cabac(cabac, slice_hdr, mb_type, &mut self.sstate, &mut mb_info);
+ let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type {
+ (cbpy, cbpc)
+ } else {
+ decode_cbp_cabac(cabac, &self.sstate)
+ };
+ if self.pps.transform_8x8_mode && cbpy != 0 && mb_info.can_have_8x8_tx(self.sps.direct_8x8_inference) {
+ let mut ctx = 0;
+ if self.sstate.get_top_mb().transform_8x8 {
+ ctx += 1;
+ }
+ if self.sstate.get_left_mb().transform_8x8 {
+ ctx += 1;
+ }
+ mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx);
+ }
+ if mb_type.is_intra() {
+ self.sstate.get_cur_mb().cmode = mb_info.chroma_ipred;
+ }
+ mb_info.cbpy = cbpy;
+ mb_info.cbpc = cbpc;
+ self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy;
+ if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() {
+ let mb_qp_delta = decode_mb_qp_delta_cabac(cabac, last_qp_diff as usize);
+ validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25);
+ last_qp_diff = mb_qp_delta != 0;
+ let new_qp = mb_qp_delta + i32::from(mb_info.qp_y);
+ mb_info.qp_y = if new_qp < 0 {
+ (new_qp + 52) as u8
+ } else if new_qp >= 52 {
+ (new_qp - 52) as u8
+ } else {
+ new_qp as u8
+ };
+ mb_info.coeffs = [[0; 16]; 25];
+ if self.pps.transform_8x8_mode {
+ mb_info.clear_coeffs8x8();
+ }
+ mb_info.chroma_dc = [[0; 4]; 2];
+ decode_residual_cabac(cabac, &mut self.sstate, &mut mb_info);
+ } else {
+ last_qp_diff = false;
+ }
+ }
+ } else {
+ mb_info.mb_type = skip_type;
+ mb_info.transform_size_8x8 = false;
+ last_qp_diff = false;
+ }
+ self.handle_macroblock(slice_hdr, &mut mb_info, refs, frm)?;
+ prev_mb_skipped = mb_skip;
+ if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() {
+ if let Ok(disp) = self.dispatch.read() {
+ disp.update_pos(self.cur_pic.full_id, mb_idx + 1);
+ }
+ return Ok(mb_idx + 1);
+ }
+ mb_idx += 1;
+ if let Ok(disp) = self.dispatch.read() {
+ disp.update_pos(self.cur_pic.full_id, mb_idx);
+ }
+ }
+ Err(DecoderError::InvalidData)
+ }
+ #[allow(clippy::cognitive_complexity)]
+ fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u16>) -> DecoderResult<()> {
+ let qp_y = mb_info.qp_y;
+ let qpr = ((qp_y as i8) + self.pps.chroma_qp_index_offset).clamp(0, 51) as usize;
+ let qp_u = CHROMA_QUANTS[qpr];
+ let qpb = ((qp_y as i8) + self.pps.second_chroma_qp_index_offset).clamp(0, 51) as usize;
+ let qp_v = CHROMA_QUANTS[qpb];
+
+ let tx_bypass = qp_y == 0 && self.sps.qpprime_y_zero_transform_bypass;
+
+ self.sstate.get_cur_mb().mb_type = mb_info.mb_type.into();
+ if mb_info.mb_type != MBType::PCM {
+ self.sstate.get_cur_mb().qp_y = qp_y;
+ self.sstate.get_cur_mb().qp_u = qp_u;
+ self.sstate.get_cur_mb().qp_v = qp_v;
+ self.sstate.get_cur_mb().transform_8x8 = mb_info.transform_size_8x8;
+ }
+ let has_dc = mb_info.mb_type.is_intra16x16() && mb_info.coded[24];
+ let qp_y = (qp_y + 6 * (self.sps.bit_depth_luma - 8)).min(51);
+ let qp_u = (qp_u + 6 * (self.sps.bit_depth_chroma - 8)).min(51);
+ let qp_v = (qp_v + 6 * (self.sps.bit_depth_chroma - 8)).min(51);
+ if has_dc {
+ idct_luma_dc(&mut mb_info.coeffs[24], qp_y);
+ for i in 0..16 {
+ mb_info.coeffs[i][0] = mb_info.coeffs[24][i];
+ }
+ }
+ if !mb_info.transform_size_8x8 {
+ let quant_dc = !mb_info.mb_type.is_intra16x16();
+ if quant_dc {
+ for i in 0..16 {
+ if mb_info.coded[i] {
+ if !tx_bypass {
+ idct(&mut mb_info.coeffs[i], qp_y);
+ }
+ } else if has_dc {
+ if !tx_bypass {
+ idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc);
+ }
+ mb_info.coded[i] = true;
+ }
+ }
+ } else {
+ for i in 0..16 {
+ if mb_info.coded[i] {
+ if !tx_bypass {
+ idct_skip_dc(&mut mb_info.coeffs[i], qp_y);
+ }
+ } else if has_dc {
+ if !tx_bypass {
+ idct_dc(&mut mb_info.coeffs[i], qp_y, quant_dc);
+ }
+ mb_info.coded[i] = true;
+ }
+ }
+ }
+ } else {
+ for i in 0..4 {
+ if mb_info.coded[(i & 1) * 2 + (i & 2) * 4] && !tx_bypass {
+ dequant8x8(&mut mb_info.coeffs8x8[i].coeffs, &self.pps.scaling_list_8x8[!mb_info.mb_type.is_intra() as usize]);
+ idct8x8(&mut mb_info.coeffs8x8[i].coeffs, qp_y);
+ }
+ }
+ }
+ for chroma in 0..2 {
+ let qp_c = if chroma == 0 { qp_u } else { qp_v };
+ if mb_info.cbpc != 0 {
+ chroma_dc_transform(&mut mb_info.chroma_dc[chroma], qp_c);
+ }
+ for i in 0..4 {
+ let blk_no = 16 + chroma * 4 + i;
+ mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i];
+ if mb_info.coded[blk_no] {
+ idct_skip_dc(&mut mb_info.coeffs[blk_no], qp_c);
+ } else if mb_info.coeffs[blk_no][0] != 0 {
+ idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false);
+ mb_info.coded[blk_no] = true;
+ }
+ }
+ }
+ if !self.pps.entropy_coding_mode || mb_info.mb_type.is_skip() || mb_info.mb_type.is_intra() {
+ self.sstate.reset_mb_mv();
+ }
+ if !mb_info.mb_type.is_intra() {
+ let temporal_mv = !slice_hdr.direct_spatial_mv_pred;
+ let cur_id = self.cur_pic.full_id as u16;
+ // wait for the reference macroblock MV to be available
+ if matches!(mb_info.mb_type, MBType::Direct | MBType::BSkip | MBType::B8x8) {
+ if let Some(ref_id) = refs.get_ref_id(1, mb_info.ref_l1[0].index()) {
+ wait_for_mb(&self.dispatch, &self.sstate, self.sstate.mb_x * 16, self.sstate.mb_y * 16, ZERO_MV, ref_id)?;
+ }
+ }
+ Self::pred_mv(&mut self.sstate, refs, mb_info, cur_id, temporal_mv, self.sps.direct_8x8_inference);
+ }
+ if !self.pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
+ self.sstate.fill_ipred(IntraPredMode::DC);
+ }
+
+ let xpos = self.sstate.mb_x * 16;
+ let ypos = self.sstate.mb_y * 16;
+ if mb_info.mb_type != MBType::PCM {
+ let weight_mode = if self.pps.weighted_pred && slice_hdr.slice_type.is_p() {
+ 1
+ } else if slice_hdr.slice_type.is_b() {
+ self.pps.weighted_bipred_idc
+ } else {
+ 0
+ };
+ recon_mb_mt(frm, slice_hdr, mb_info, &mut self.sstate, refs, &mut self.mc_dsp, weight_mode, &self.dispatch)?;
+ } else {
+ for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
+ for (dst, &p) in dline[..16].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..16].copy_from_slice(src);
+ }
+ for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) {
+ for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src);
+ }
+ for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) {
+ for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src);
+ }
+ }
+ self.sstate.save_ipred_context(frm);
+
+ let mv_info = &mut self.cur_pic.mv_info;
+ let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride;
+ let mut mb = FrameMBInfo::new();
+ mb.mb_type = mb_info.mb_type.into();
+ for blk4 in 0..16 {
+ mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv;
+ }
+ for blk8 in 0..4 {
+ mb.ref_poc[blk8] = refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx);
+ mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx;
+ }
+ mv_info.mbs[mb_pos] = mb;
+
+ let deblock_mode = slice_hdr.disable_deblocking_filter_idc;
+ if !self.deblock_skip && deblock_mode != 1 {
+ let is_s = slice_hdr.slice_type == SliceType::SI || slice_hdr.slice_type == SliceType::SP;
+ self.sstate.fill_deblock(refs, deblock_mode, is_s);
+ let mut frm = NASimpleVideoFrame::from_video_buf(&mut self.cur_pic.buf).unwrap();
+ let lf_alpha = slice_hdr.slice_alpha_c0_offset;
+ let lf_beta = slice_hdr.slice_beta_offset;
+ loop_filter_mb(&mut frm, &self.sstate, lf_alpha, lf_beta);
+ }
+ self.sstate.next_mb();
+ Ok(())
+ }
+
+ fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) {
+ let mb_type = mb_info.mb_type;
+ if !mb_type.is_4x4() {
+ let (pw, ph) = mb_type.size();
+ let mut xoff = 0;
+ let mut yoff = 0;
+ if mb_type == MBType::Direct || mb_type == MBType::BSkip {
+ sstate.predict_direct_mb(frame_refs, temporal_mv, direct_8x8, cur_id);
+ }
+ for part in 0..mb_type.num_parts() {
+ if !mb_type.is_l1(part) {
+ match mb_type {
+ MBType::PSkip => sstate.predict_pskip(),
+ MBType::BSkip | MBType::Direct => {
+ },
+ _ => {
+ sstate.predict(xoff, yoff, pw, ph, 0,
+ mb_info.mv_l0[part], mb_info.ref_l0[part]);
+ },
+ };
+ }
+ if !mb_type.is_l0(part) && mb_type != MBType::BSkip && mb_type != MBType::Direct {
+ sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part], mb_info.ref_l1[part]);
+ }
+ if pw != 16 {
+ xoff += pw;
+ } else {
+ yoff += ph;
+ }
+ }
+ } else {
+ for part in 0..4 {
+ let sub_type = mb_info.sub_mb_type[part];
+ let mut xoff = (part & 1) * 8;
+ let mut yoff = (part & 2) * 4;
+ let orig_x = xoff;
+ let (pw, ph) = sub_type.size();
+ for subpart in 0..sub_type.num_parts() {
+ if sub_type != SubMBType::Direct8x8 {
+ if !sub_type.is_l1() {
+ sstate.predict(xoff, yoff, pw, ph, 0, mb_info.mv_l0[part * 4 + subpart], mb_info.ref_l0[part]);
+ }
+ if !sub_type.is_l0() {
+ sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part * 4 + subpart], mb_info.ref_l1[part]);
+ }
+ } else {
+ for sblk in 0..4 {
+ sstate.predict_direct_sub(frame_refs, temporal_mv, direct_8x8, cur_id, (xoff / 4) + (sblk & 1) + (yoff / 4) * 4 + (sblk & 2) * 2);
+ }
+ }
+ xoff += pw;
+ if xoff == orig_x + 8 {
+ xoff -= 8;
+ yoff += ph;
+ }
+ }
+ }
+ }
+ }
+}
+
+pub struct H264MTDecoder {
+ info: NACodecInfoRef,
+ nal_len: u8,
+ dispatch: Shareable<ThreadDispatcher>,
+ frame_refs: FrameRefs,
+ skip_mode: FrameSkipMode,
+ sps: Vec<Arc<SeqParameterSet>>,
+ cur_sps: usize,
+ pps: Vec<Arc<PicParameterSet>>,
+ cur_pps: usize,
+ cur_fdec: Option<FrameDecoder>,
+ cavlc_cb: Arc<CAVLCTables>,
+ deblock_skip: bool,
+ max_last_poc: u32,
+ poc_base: u32,
+ disp_w: usize,
+ disp_h: usize,
+}
+
+impl H264MTDecoder {
+ pub fn new() -> Self {
+ Self {
+ info: NACodecInfoRef::default(),
+ nal_len: 0,
+ dispatch: Arc::new(RwLock::new(ThreadDispatcher::new())),
+ frame_refs: FrameRefs::new(),
+ skip_mode: FrameSkipMode::default(),
+ sps: Vec::new(),
+ cur_sps: 0,
+ pps: Vec::new(),
+ cur_pps: 0,
+ cur_fdec: None,
+ cavlc_cb: Arc::new(CAVLCTables::new()),
+ deblock_skip: false,
+ max_last_poc: 0,
+ poc_base: 0,
+ disp_w: 0,
+ disp_h: 0,
+ }
+ }
+ fn handle_nal(&mut self, src: Vec<u8>, supp: &mut NADecoderSupport, skip_decoding: bool, user_id: u32, time: NATimeInfo) -> DecoderResult<()> {
+ validate!(!src.is_empty());
+ validate!((src[0] & 0x80) == 0);
+ let nal_ref_idc = src[0] >> 5;
+ let nal_unit_type = src[0] & 0x1F;
+
+ let mut full_size = src.len() * 8;
+ for &byte in src.iter().rev() {
+ if byte == 0 {
+ full_size -= 8;
+ } else {
+ full_size -= (byte.trailing_zeros() + 1) as usize;
+ break;
+ }
+ }
+ validate!(full_size > 0);
+ match nal_unit_type {
+ 1 | 5 if !skip_decoding => {
+ let is_idr = nal_unit_type == 5;
+ let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE);
+ br.skip(8)?;
+
+ let slice_hdr = parse_slice_header(&mut br, self.sps.as_slice(), self.pps.as_slice(), is_idr, nal_ref_idc)?;
+ let hdr_size = br.tell();
+ validate!(br.tell() < full_size);
+ let full_id;
+ if slice_hdr.first_mb_in_slice == 0 {
+ validate!(self.cur_fdec.is_none());
+ for (i, pps) in self.pps.iter().enumerate() {
+ if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id {
+ self.cur_pps = i;
+ break;
+ }
+ }
+ for (i, sps) in self.sps.iter().enumerate() {
+ if sps.seq_parameter_set_id == self.pps[self.cur_pps].seq_parameter_set_id {
+ self.cur_sps = i;
+ break;
+ }
+ }
+
+ let mut cur_full_id = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, &self.sps[self.cur_sps]) + self.poc_base;
+ if is_idr {
+ if cur_full_id <= self.max_last_poc {
+ self.poc_base = self.max_last_poc + 2 - (cur_full_id - self.poc_base);
+ cur_full_id = self.max_last_poc + 2;
+ }
+ }
+ self.max_last_poc = self.max_last_poc.max(cur_full_id);
+ full_id = cur_full_id;
+
+ let sps = &self.sps[self.cur_sps];
+ if sps.chroma_format_idc != 1 || sps.bit_depth_luma != sps.bit_depth_chroma {
+println!(" chroma fmt {} bits {}/{}", sps.chroma_format_idc, sps.bit_depth_luma, sps.bit_depth_chroma);
+ return Err(DecoderError::NotImplemented);
+ }
+ if sps.bit_depth_luma != 10 {
+println!(" unsupported depth {}", sps.bit_depth_luma);
+ return Err(DecoderError::NotImplemented);
+ }
+
+ if is_idr {
+ self.frame_refs.clear_refs();
+ }
+
+ let width = sps.pic_width_in_mbs << 4;
+ let height = sps.pic_height_in_mbs << 4;
+ let num_mbs = sps.pic_width_in_mbs * sps.pic_height_in_mbs;
+
+ let mut mc_dsp = H264MC::new();
+ mc_dsp.set_dimensions(width, height);
+ mc_dsp.set_depth(sps.bit_depth_luma);
+
+ let is_mbaff = sps.mb_adaptive_frame_field && !slice_hdr.field_pic;
+ if is_mbaff {
+ println!("MBAFF");
+ return Err(DecoderError::NotImplemented);
+ }
+ if !sps.frame_mbs_only {
+ println!("PAFF?");
+ return Err(DecoderError::NotImplemented);
+ }
+
+ let cur_vinfo = supp.pool_u16.get_info();
+ let (w, h) = if ((self.disp_w + 15) & !15) == width && ((self.disp_h + 15) & !15) == height {
+ (self.disp_w, self.disp_h)
+ } else {
+ (width, height)
+ };
+ let fmtstr = match sps.bit_depth_luma {
+ 9 => "yuv420p9",
+ 10 => "yuv420p10",
+ 11 => "yuv420p11",
+ 12 => "yuv420p12",
+ _ => return Err(DecoderError::NotImplemented),
+ };
+ let tmp_vinfo = NAVideoInfo::new(w, h, false, NAPixelFormaton::from_str(fmtstr).unwrap());
+ if cur_vinfo != Some(tmp_vinfo) {
+ supp.pool_u16.reset();
+ supp.pool_u16.prealloc_video(tmp_vinfo, 4)?;
+ }
+
+ let buf = if let Some(pic) = supp.pool_u16.get_free() {
+ pic
+ } else {
+ if supp.pool_u16.get_num_used() > 256 {
+ return Err(DecoderError::AllocError);
+ }
+ if let Ok(nbuf) = alloc_video_buffer(tmp_vinfo, 4) {
+ let vbuf = nbuf.get_vbuf16().unwrap();
+ supp.pool_u16.add_frame(vbuf.clone());
+ vbuf
+ } else {
+ return Err(DecoderError::AllocError);
+ }
+ };
+
+ let cur_pic = PictureInfo {
+ id: slice_hdr.frame_num,
+ full_id, user_id, time,
+ pic_type: slice_hdr.slice_type.to_frame_type(),
+ buf,
+ cur_mb: 0,
+ is_ref: nal_ref_idc != 0,
+ is_idr,
+ long_term: get_long_term_id(is_idr, &slice_hdr),
+ mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)),
+ };
+
+ self.cur_fdec = Some(FrameDecoder{
+ slices: Vec::new(),
+ sstate: SliceState::new(),
+ ipcm_buf: [0; 256 + 64 + 64],
+ //width, height,
+ num_mbs,
+ sps: Arc::clone(sps),
+ pps: Arc::clone(&self.pps[self.cur_pps]),
+ dispatch: Arc::clone(&self.dispatch),
+ cavlc_cb: Arc::clone(&self.cavlc_cb),
+ mc_dsp,
+ cur_pic,
+ is_mbaff,
+ deblock_skip: self.deblock_skip,
+ });
+ } else {
+ if let Some(ref mut fdec) = self.cur_fdec {
+ let new_type = slice_hdr.slice_type.to_frame_type();
+ let pic = &mut fdec.cur_pic;
+ pic.pic_type = match (pic.pic_type, new_type) {
+ (FrameType::I, _) => new_type,
+ (_, FrameType::B) => FrameType::B,
+ _ => pic.pic_type,
+ };
+ full_id = pic.full_id;
+ } else {
+ return Ok(());
+ }
+ }
+
+ let sps = &self.sps[self.cur_sps];
+
+ self.frame_refs.select_refs(sps, &slice_hdr, full_id);
+
+ if slice_hdr.adaptive_ref_pic_marking_mode {
+ self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, ((1u32 << self.sps[self.cur_sps].log2_max_frame_num) - 1) as u16)?;
+ }
+ if let Some(ref mut fdec) = self.cur_fdec {
+ fdec.slices.push((slice_hdr, hdr_size, self.frame_refs.cur_refs.clone(), src));
+ }
+ },
+ 2 => { // slice data partition A
+ //slice header
+ //slice id = read_ue()
+ //cat 2 slice data (all but MB layer residual)
+ return Err(DecoderError::NotImplemented);
+ },
+ 3 => { // slice data partition B
+ //slice id = read_ue()
+ //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
+ //cat 3 slice data (MB layer residual)
+ return Err(DecoderError::NotImplemented);
+ },
+ 4 => { // slice data partition C
+ //slice id = read_ue()
+ //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
+ //cat 4 slice data (MB layer residual)
+ return Err(DecoderError::NotImplemented);
+ },
+ 6 => {}, //SEI
+ 7 => {
+ let sps = parse_sps(&src[1..])?;
+ self.sps.push(Arc::new(sps));
+ },
+ 8 => {
+ validate!(full_size >= 8 + 16);
+ let pps = parse_pps(&src[1..], self.sps.as_slice(), full_size - 8)?;
+ let mut found = false;
+ for stored_pps in self.pps.iter_mut() {
+ if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
+ *stored_pps = Arc::clone(&pps);
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ self.pps.push(pps);
+ }
+ },
+ 9 => { // access unit delimiter
+ },
+ 10 => {}, //end of sequence
+ 11 => {}, //end of stream
+ 12 => {}, //filler
+ _ => {},
+ };
+
+ Ok(())
+ }
+}
+
+impl NADecoderMT for H264MTDecoder {
+ fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef, nthreads: usize) -> DecoderResult<()> {
+ if let NACodecTypeInfo::Video(vinfo) = info.get_properties() {
+ let fmt = NAPixelFormaton::from_str("yuv420p10").unwrap();
+ let myinfo = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, fmt));
+ self.info = NACodecInfo::new_ref(info.get_name(), myinfo, info.get_extradata()).into_ref();
+
+ let edata = info.get_extradata().unwrap();
+//print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!();
+ if edata.len() > 11 && &edata[0..4] == b"avcC" {
+ let mut br = MemoryReader::new_read(edata.as_slice());
+
+ br.read_skip(4)?;
+ let version = br.read_byte()?;
+ validate!(version == 1);
+ let profile = br.read_byte()?;
+ let _compatibility = br.read_byte()?;
+ let _level = br.read_byte()?;
+ let b = br.read_byte()?;
+ //validate!((b & 0xFC) == 0xFC);
+ self.nal_len = (b & 3) + 1;
+ let b = br.read_byte()?;
+ //validate!((b & 0xE0) == 0xE0);
+ let num_sps = (b & 0x1F) as usize;
+ for _ in 0..num_sps {
+ let len = br.read_u16be()? as usize;
+ let offset = br.tell() as usize;
+ validate!((br.peek_byte()? & 0x1F) == 7);
+ let mut nal_buf = Vec::new();
+ let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+ self.handle_nal(nal_buf, supp, true, 0, NATimeInfo::new(None, None, None, 0, 0))?;
+ br.read_skip(len)?;
+ }
+ let num_pps = br.read_byte()? as usize;
+ for _ in 0..num_pps {
+ let len = br.read_u16be()? as usize;
+ let offset = br.tell() as usize;
+ validate!((br.peek_byte()? & 0x1F) == 8);
+ let mut nal_buf = Vec::new();
+ let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+ self.handle_nal(nal_buf, supp, true, 0, NATimeInfo::new(None, None, None, 0, 0))?;
+ br.read_skip(len)?;
+ }
+ if br.left() > 0 {
+ match profile {
+ 100 | 110 | 122 | 144 => {
+ let b = br.read_byte()?;
+ // some encoders put something different here
+ if (b & 0xFC) != 0xFC {
+ return Ok(());
+ }
+ // b & 3 -> chroma format
+ let b = br.read_byte()?;
+ validate!((b & 0xF8) == 0xF8);
+ // b & 7 -> luma depth minus 8
+ let b = br.read_byte()?;
+ validate!((b & 0xF8) == 0xF8);
+ // b & 7 -> chroma depth minus 8
+ let num_spsext = br.read_byte()? as usize;
+ for _ in 0..num_spsext {
+ let len = br.read_u16be()? as usize;
+ // parse spsext
+ br.read_skip(len)?;
+ }
+ },
+ _ => {},
+ };
+ }
+ } else {
+ return Err(DecoderError::NotImplemented);
+ }
+
+ let mut width = vinfo.get_width();
+ let mut height = vinfo.get_height();
+ self.disp_w = width;
+ self.disp_h = height;
+
+ if (width == 0 || height == 0) && !self.sps.is_empty() {
+ width = self.sps[0].pic_width_in_mbs * 16;
+ height = self.sps[0].pic_height_in_mbs * 16;
+ }
+
+ let num_bufs = if !self.sps.is_empty() {
+ self.sps[0].num_ref_frames + 1
+ } else {
+ 3
+ }.max(16 + 1);
+ if let Ok(ref mut sd) = self.dispatch.write() {
+ sd.max_threads = nthreads;
+ } else {
+ return Err(DecoderError::Bug);
+ }
+ supp.pool_u16.set_dec_bufs(num_bufs + nthreads);
+ supp.pool_u16.prealloc_video(NAVideoInfo::new(width, height, false, fmt), 4)?;
+
+ Ok(())
+ } else {
+ Err(DecoderError::InvalidData)
+ }
+ }
+ fn can_take_input(&mut self) -> bool {
+ if let Ok(ref sd) = self.dispatch.read() {
+ sd.can_decode_more()
+ } else {
+ false
+ }
+ }
+ fn queue_pkt(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket, user_id: u32) -> DecoderResult<bool> {
+ if !self.can_take_input() {
+ return Ok(false);
+ }
+
+ let src = pkt.get_buffer();
+
+ let mut br = MemoryReader::new_read(&src);
+ let mut nal_buf = Vec::with_capacity(src.len());
+
+ if self.nal_len > 0 {
+ let mut skip_decoding = false;
+ if self.skip_mode != FrameSkipMode::None {
+ let mut pic_type = FrameType::I;
+ let mut is_ref = false;
+ while br.left() > 0 {
+ let size = match self.nal_len {
+ 1 => br.read_byte()? as usize,
+ 2 => br.read_u16be()? as usize,
+ 3 => br.read_u24be()? as usize,
+ 4 => br.read_u32be()? as usize,
+ _ => unreachable!(),
+ };
+ validate!(br.left() >= (size as i64));
+ let offset = br.tell() as usize;
+ let size = unescape_nal(&src[offset..][..size], &mut nal_buf);
+ validate!(size > 0);
+ let nal_ref_idc = nal_buf[0] >> 5;
+ let nal_unit_type = nal_buf[0] & 0x1F;
+ if nal_unit_type == 1 || nal_unit_type == 5 {
+ let mut bitr = BitReader::new(&nal_buf[1..], BitReaderMode::BE);
+ let (first_mb, slice_type) = parse_slice_header_minimal(&mut bitr)?;
+ if first_mb == 0 && nal_ref_idc != 0 {
+ is_ref = true;
+ }
+ let new_type = slice_type.to_frame_type();
+ pic_type = match (pic_type, new_type) {
+ (FrameType::I, _) => new_type,
+ (_, FrameType::B) => FrameType::B,
+ _ => pic_type,
+ };
+ }
+ br.read_skip(size)?;
+ }
+ match self.skip_mode {
+ FrameSkipMode::IntraOnly => {
+ skip_decoding = pic_type != FrameType::I;
+ },
+ FrameSkipMode::KeyframesOnly => {
+ if !is_ref {
+ skip_decoding = true;
+ }
+ },
+ _ => {},
+ };
+ br.seek(SeekFrom::Start(0))?;
+ }
+
+ let mut initial_ref_frames = Vec::new();
+ self.frame_refs.fill_ref_nums(&mut initial_ref_frames);
+
+ while br.left() > 0 {
+ let size = match self.nal_len {
+ 1 => br.read_byte()? as usize,
+ 2 => br.read_u16be()? as usize,
+ 3 => br.read_u24be()? as usize,
+ 4 => br.read_u32be()? as usize,
+ _ => unreachable!(),
+ };
+ validate!(br.left() >= (size as i64));
+ let offset = br.tell() as usize;
+ let mut cur_nal_buf = Vec::with_capacity(size);
+ let _size = unescape_nal(&src[offset..][..size], &mut cur_nal_buf);
+ self.handle_nal(cur_nal_buf, supp, skip_decoding, user_id, pkt.ts)?;
+ br.read_skip(size)?;
+ }
+ let mut fdec = None;
+ std::mem::swap(&mut fdec, &mut self.cur_fdec);
+ if let Some(fdc) = fdec {
+ let cpic = &fdc.cur_pic;
+ if cpic.is_ref {
+ self.frame_refs.add_short_term(cpic.clone(), self.sps[self.cur_sps].num_ref_frames);
+ }
+ if let Some(lt_idx) = cpic.long_term {
+ self.frame_refs.add_long_term(lt_idx, cpic.clone());
+ }
+ let mut ref_frames = Vec::new();
+ self.frame_refs.fill_ref_nums(&mut ref_frames);
+ queue_decoding(&mut self.dispatch, fdc, &initial_ref_frames, &ref_frames);
+ }
+ } else {
+//todo NAL detection
+ unimplemented!();
+ }
+ Ok(true)
+ }
+ fn has_output(&mut self) -> bool {
+ if let Ok(ref ds) = self.dispatch.read() {
+ ds.has_output()
+ } else {
+ panic!("can't peek into status");
+ }
+ }
+ fn get_frame(&mut self) -> (DecoderResult<NAFrameRef>, u32) {
+ match wait_for_one(&mut self.dispatch) {
+ Ok(cpic) => {
+ let bufinfo = NABufferType::Video16(cpic.buf.clone());
+ let ftype = cpic.pic_type;
+ let pts = Some(u64::from(cpic.full_id));
+ let mut frm = NAFrame::new(cpic.time, ftype, cpic.is_idr, self.info.clone(), bufinfo);
+ if let (Some(mypts), None) = (pts, frm.get_pts()) {
+ frm.set_pts(Some(mypts));
+ }
+ frm.set_id(cpic.user_id as i64);
+ (Ok(frm.into_ref()), cpic.user_id)
+ },
+ Err((err, id)) => (Err(err), id),
+ }
+ }
+ fn flush(&mut self) {
+ clear_threads(&mut self.dispatch);
+ self.frame_refs.clear_refs();
+ }
+}
+
+impl NAOptionHandler for H264MTDecoder {
+ fn get_supported_options(&self) -> &[NAOptionDefinition] { DECODER_OPTIONS }
+ fn set_options(&mut self, options: &[NAOption]) {
+ for option in options.iter() {
+ for opt_def in DECODER_OPTIONS.iter() {
+ if opt_def.check(option).is_ok() {
+ match (option.name, &option.value) {
+ (FRAME_SKIP_OPTION, NAValue::String(ref strval)) => {
+ if let Ok(smode) = FrameSkipMode::from_str(strval) {
+ self.skip_mode = smode;
+ }
+ },
+ (DEBLOCK_SKIP_OPTION, NAValue::Bool(val)) => {
+ self.deblock_skip = *val;
+ },
+ _ => {},
+ }
+ }
+ }
+ }
+ }
+ fn query_option_value(&self, name: &str) -> Option<NAValue> {
+ match name {
+ FRAME_SKIP_OPTION => Some(NAValue::String(self.skip_mode.to_string())),
+ DEBLOCK_SKIP_OPTION => Some(NAValue::Bool(self.deblock_skip)),
+ _ => None,
+ }
+ }
+}
--- /dev/null
+use std::sync::Arc;
+use std::str::FromStr;
+
+use nihav_core::codecs::*;
+use nihav_core::io::bitreader::*;
+
+use super::super::*;
+use super::*;
+
+pub struct H264Decoder {
+ info: NACodecInfoRef,
+ width: usize,
+ height: usize,
+ disp_w: usize,
+ disp_h: usize,
+ num_mbs: usize,
+ nal_len: u8,
+ sps: Vec<Arc<SeqParameterSet>>,
+ cur_sps: usize,
+ pps: Vec<Arc<PicParameterSet>>,
+ cur_pps: usize,
+
+ skip_mode: FrameSkipMode,
+ deblock_skip: bool,
+
+ is_mbaff: bool,
+
+ cavlc_cb: CAVLCTables,
+
+ sstate: SliceState,
+
+ cur_pic: Option<PictureInfo>,
+ cur_id: u16,
+ has_pic: bool,
+ frame_refs: FrameRefs,
+
+ temporal_mv: bool,
+ deblock_mode: u8,
+ lf_alpha: i8,
+ lf_beta: i8,
+ is_s: bool,
+
+ ipcm_buf: [u8; 256 + 64 + 64],
+
+ mc_dsp: H264MC,
+
+ transform_8x8_mode: bool,
+}
+
+impl H264Decoder {
+ pub fn new() -> Self {
+ H264Decoder{
+ info: NACodecInfoRef::default(),
+ width: 0,
+ height: 0,
+ disp_w: 0,
+ disp_h: 0,
+ num_mbs: 0,
+ nal_len: 0,
+ sps: Vec::with_capacity(1),
+ cur_sps: 0,
+ pps: Vec::with_capacity(3),
+ cur_pps: 0,
+
+ skip_mode: FrameSkipMode::default(),
+ deblock_skip: false,
+
+ is_mbaff: false,
+
+ cavlc_cb: CAVLCTables::new(),
+
+ sstate: SliceState::new(),
+ cur_pic: None,
+ cur_id: 0,
+ has_pic: false,
+ frame_refs: FrameRefs::new(),
+
+ temporal_mv: false,
+ deblock_mode: 0,
+ lf_alpha: 0,
+ lf_beta: 0,
+ is_s: false,
+
+ ipcm_buf: [0; 256 + 64 + 64],
+
+ mc_dsp: H264MC::new(),
+
+ transform_8x8_mode: false,
+ }
+ }
+ fn handle_nal(&mut self, src: &[u8], supp: &mut NADecoderSupport, skip_decoding: bool) -> DecoderResult<()> {
+ validate!(!src.is_empty());
+ validate!((src[0] & 0x80) == 0);
+ let nal_ref_idc = src[0] >> 5;
+ let nal_unit_type = src[0] & 0x1F;
+
+ let mut full_size = src.len() * 8;
+ for &byte in src.iter().rev() {
+ if byte == 0 {
+ full_size -= 8;
+ } else {
+ full_size -= (byte.trailing_zeros() + 1) as usize;
+ break;
+ }
+ }
+ validate!(full_size > 0);
+ match nal_unit_type {
+ 1 | 5 if !skip_decoding => {
+ let is_idr = nal_unit_type == 5;
+ let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE);
+ br.skip(8)?;
+
+ let slice_hdr = parse_slice_header(&mut br, self.sps.as_slice(), self.pps.as_slice(), is_idr, nal_ref_idc)?;
+ validate!(br.tell() < full_size);
+ let full_id;
+ if slice_hdr.first_mb_in_slice == 0 {
+ validate!(self.cur_pic.is_none());
+ for (i, pps) in self.pps.iter().enumerate() {
+ if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id {
+ self.cur_pps = i;
+ break;
+ }
+ }
+ for (i, sps) in self.sps.iter().enumerate() {
+ if sps.seq_parameter_set_id == self.pps[self.cur_pps].seq_parameter_set_id {
+ self.cur_sps = i;
+ break;
+ }
+ }
+
+ full_id = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, &self.sps[self.cur_sps]);
+
+ let sps = &self.sps[self.cur_sps];
+ if sps.chroma_format_idc != 1 || sps.bit_depth_luma != sps.bit_depth_chroma {
+println!(" chroma fmt {} bits {}/{}", sps.chroma_format_idc, sps.bit_depth_luma, sps.bit_depth_chroma);
+ return Err(DecoderError::NotImplemented);
+ }
+ if sps.bit_depth_luma != 10 {
+println!(" unsupported depth {}", sps.bit_depth_luma);
+ return Err(DecoderError::NotImplemented);
+ }
+ //let pps = &self.pps[self.cur_pps];
+
+ if is_idr {
+ self.frame_refs.clear_refs();
+ }
+
+ self.width = sps.pic_width_in_mbs << 4;
+ self.height = sps.pic_height_in_mbs << 4;
+ self.num_mbs = sps.pic_width_in_mbs * sps.pic_height_in_mbs;
+ self.mc_dsp.set_dimensions(self.width, self.height);
+ self.mc_dsp.set_depth(sps.bit_depth_luma);
+
+ self.is_mbaff = sps.mb_adaptive_frame_field && !slice_hdr.field_pic;
+ if self.is_mbaff {
+println!("MBAFF");
+ return Err(DecoderError::NotImplemented);
+ }
+ if !sps.frame_mbs_only {
+println!("PAFF?");
+ return Err(DecoderError::NotImplemented);
+ }
+
+//if slice_hdr.slice_type.is_b() { return Ok(()); }
+ self.cur_id = full_id as u16;
+ } else {
+ if let Some(ref mut pic) = self.cur_pic {
+ validate!(pic.cur_mb == slice_hdr.first_mb_in_slice);
+ let new_type = slice_hdr.slice_type.to_frame_type();
+ pic.pic_type = match (pic.pic_type, new_type) {
+ (FrameType::I, _) => new_type,
+ (_, FrameType::B) => FrameType::B,
+ _ => pic.pic_type,
+ };
+ full_id = pic.full_id;
+ } else {
+ return Ok(());//Err(DecoderError::InvalidData);
+ }
+ validate!(self.cur_pps < self.pps.len() && self.pps[self.cur_pps].pic_parameter_set_id == slice_hdr.pic_parameter_set_id);
+ }
+
+ let sps = &self.sps[self.cur_sps];
+ let pps = &self.pps[self.cur_pps];
+
+ self.temporal_mv = !slice_hdr.direct_spatial_mv_pred;
+ self.is_s = slice_hdr.slice_type == SliceType::SI || slice_hdr.slice_type == SliceType::SP;
+ self.deblock_mode = slice_hdr.disable_deblocking_filter_idc;
+ self.lf_alpha = slice_hdr.slice_alpha_c0_offset;
+ self.lf_beta = slice_hdr.slice_beta_offset;
+
+ self.frame_refs.select_refs(sps, &slice_hdr, full_id);
+
+ if slice_hdr.adaptive_ref_pic_marking_mode {
+ self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, ((1u32 << self.sps[self.cur_sps].log2_max_frame_num) - 1) as u16)?;
+ }
+ if slice_hdr.first_mb_in_slice == 0 {
+ let ret = supp.pool_u16.get_free();
+ if ret.is_none() {
+ return Err(DecoderError::AllocError);
+ }
+ let (w, h) = if ((self.disp_w + 15) & !15) == self.width && ((self.disp_h + 15) & !15) == self.height {
+ (self.disp_w, self.disp_h)
+ } else {
+ (self.width, self.height)
+ };
+ let fmtstr = match self.sps[self.cur_sps].bit_depth_luma {
+ 9 => "yuv420p9",
+ 10 => "yuv420p10",
+ 11 => "yuv420p11",
+ 12 => "yuv420p12",
+ _ => return Err(DecoderError::NotImplemented),
+ };
+ let tmp_vinfo = NAVideoInfo::new(w, h, false, NAPixelFormaton::from_str(fmtstr).unwrap());
+ let mut buf = ret.unwrap();
+ if buf.get_info() != tmp_vinfo {
+ supp.pool_u16.reset();
+ supp.pool_u16.prealloc_video(tmp_vinfo, 4)?;
+ let ret = supp.pool_u16.get_free();
+ if ret.is_none() {
+ return Err(DecoderError::AllocError);
+ }
+ buf = ret.unwrap();
+ }
+ self.cur_pic = Some(PictureInfo {
+ id: slice_hdr.frame_num,
+ full_id,
+ user_id: full_id,
+ time: NATimeInfo::new(None, None, None, 0, 0),
+ pic_type: slice_hdr.slice_type.to_frame_type(),
+ buf,
+ cur_mb: 0,
+ is_ref: nal_ref_idc != 0,
+ is_idr,
+ long_term: get_long_term_id(is_idr, &slice_hdr),
+ mv_info: NABufferRef::new(FrameMV::new(sps.pic_width_in_mbs, sps.pic_height_in_mbs)),
+ });
+ }
+
+ self.transform_8x8_mode = pps.transform_8x8_mode;
+
+ self.sstate.def_fill = 1 << (sps.bit_depth_luma - 1);
+ self.sstate.reset(sps.pic_width_in_mbs, sps.pic_height_in_mbs, slice_hdr.first_mb_in_slice);
+
+ let mut dst_pic = if let Some(ref pic) = self.cur_pic {
+ pic.clone()
+ } else {
+ return Err(DecoderError::InvalidData);
+ };
+ let mut dst_frm = NASimpleVideoFrame::from_video_buf(&mut dst_pic.buf).unwrap();
+ let dst_mv_info = &mut dst_pic.mv_info;
+ if !pps.entropy_coding_mode {
+ self.has_pic = self.decode_slice_cavlc(&mut br, &slice_hdr, full_size, &mut dst_frm, dst_mv_info)?;
+ } else {
+ br.align();
+ let start = br.tell() / 8;
+ let csrc = &src[start..];
+ validate!(csrc.len() >= 2);
+ let mut cabac = CABAC::new(csrc, slice_hdr.slice_type, slice_hdr.slice_qp, slice_hdr.cabac_init_idc as usize)?;
+ self.has_pic = self.decode_slice_cabac(&mut cabac, &slice_hdr, &mut dst_frm, dst_mv_info)?;
+ }
+ },
+ 2 => { // slice data partition A
+ //slice header
+ //slice id = read_ue()
+ //cat 2 slice data (all but MB layer residual)
+ return Err(DecoderError::NotImplemented);
+ },
+ 3 => { // slice data partition B
+ //slice id = read_ue()
+ //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
+ //cat 3 slice data (MB layer residual)
+ return Err(DecoderError::NotImplemented);
+ },
+ 4 => { // slice data partition C
+ //slice id = read_ue()
+ //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
+ //cat 4 slice data (MB layer residual)
+ return Err(DecoderError::NotImplemented);
+ },
+ 6 => {}, //SEI
+ 7 => {
+ let sps = parse_sps(&src[1..])?;
+ self.sps.push(Arc::new(sps));
+ },
+ 8 => {
+ validate!(full_size >= 8 + 16);
+ let pps = parse_pps(&src[1..], &self.sps, full_size - 8)?;
+ let mut found = false;
+ for stored_pps in self.pps.iter_mut() {
+ if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
+ *stored_pps = Arc::clone(&pps);
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ self.pps.push(pps);
+ }
+ },
+ 9 => { // access unit delimiter
+ },
+ 10 => {}, //end of sequence
+ 11 => {}, //end of stream
+ 12 => {}, //filler
+ _ => {},
+ };
+
+ Ok(())
+ }
+ fn pred_mv(sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool, direct_8x8: bool) {
+ let mb_type = mb_info.mb_type;
+ if !mb_type.is_4x4() {
+ let (pw, ph) = mb_type.size();
+ let mut xoff = 0;
+ let mut yoff = 0;
+ if mb_type == MBType::Direct || mb_type == MBType::BSkip {
+ sstate.predict_direct_mb(frame_refs, temporal_mv, direct_8x8, cur_id);
+ }
+ for part in 0..mb_type.num_parts() {
+ if !mb_type.is_l1(part) {
+ match mb_type {
+ MBType::PSkip => sstate.predict_pskip(),
+ MBType::BSkip | MBType::Direct => {
+ },
+ _ => {
+ sstate.predict(xoff, yoff, pw, ph, 0,
+ mb_info.mv_l0[part], mb_info.ref_l0[part]);
+ },
+ };
+ }
+ if !mb_type.is_l0(part) && mb_type != MBType::BSkip && mb_type != MBType::Direct {
+ sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part], mb_info.ref_l1[part]);
+ }
+ if pw != 16 {
+ xoff += pw;
+ } else {
+ yoff += ph;
+ }
+ }
+ } else {
+ for part in 0..4 {
+ let sub_type = mb_info.sub_mb_type[part];
+ let mut xoff = (part & 1) * 8;
+ let mut yoff = (part & 2) * 4;
+ let orig_x = xoff;
+ let (pw, ph) = sub_type.size();
+ for subpart in 0..sub_type.num_parts() {
+ if sub_type != SubMBType::Direct8x8 {
+ if !sub_type.is_l1() {
+ sstate.predict(xoff, yoff, pw, ph, 0, mb_info.mv_l0[part * 4 + subpart], mb_info.ref_l0[part]);
+ }
+ if !sub_type.is_l0() {
+ sstate.predict(xoff, yoff, pw, ph, 1, mb_info.mv_l1[part * 4 + subpart], mb_info.ref_l1[part]);
+ }
+ } else {
+ for sblk in 0..4 {
+ sstate.predict_direct_sub(frame_refs, temporal_mv, direct_8x8, cur_id, (xoff / 4) + (sblk & 1) + (yoff / 4) * 4 + (sblk & 2) * 2);
+ }
+ }
+ xoff += pw;
+ if xoff == orig_x + 8 {
+ xoff -= 8;
+ yoff += ph;
+ }
+ }
+ }
+ }
+ }
+ #[allow(clippy::cognitive_complexity)]
+ fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo, slice_refs: &SimplifiedSliceRefs, frm: &mut NASimpleVideoFrame<u16>, mv_info: &mut FrameMV) {
+ let pps = &self.pps[self.cur_pps];
+
+ let qp_y = mb_info.qp_y;
+ let qpr = ((qp_y as i8) + pps.chroma_qp_index_offset).clamp(0, 51) as usize;
+ let qp_u = CHROMA_QUANTS[qpr];
+ let qpb = ((qp_y as i8) + pps.second_chroma_qp_index_offset).clamp(0, 51) as usize;
+ let qp_v = CHROMA_QUANTS[qpb];
+
+ let tx_bypass = qp_y == 0 && self.sps[self.cur_sps].qpprime_y_zero_transform_bypass;
+
+ self.sstate.get_cur_mb().mb_type = mb_info.mb_type.into();
+ if mb_info.mb_type != MBType::PCM {
+ self.sstate.get_cur_mb().qp_y = qp_y;
+ self.sstate.get_cur_mb().qp_u = qp_u;
+ self.sstate.get_cur_mb().qp_v = qp_v;
+ self.sstate.get_cur_mb().transform_8x8 = mb_info.transform_size_8x8;
+ }
+ let has_dc = mb_info.mb_type.is_intra16x16() && mb_info.coded[24];
+ let qp_y = (qp_y + 6 * (self.sps[self.cur_sps].bit_depth_luma - 8)).min(51);
+ let qp_u = (qp_u + 6 * (self.sps[self.cur_sps].bit_depth_chroma - 8)).min(51);
+ let qp_v = (qp_v + 6 * (self.sps[self.cur_sps].bit_depth_chroma - 8)).min(51);
+ if has_dc {
+ idct_luma_dc(&mut mb_info.coeffs[24], qp_y);
+ for i in 0..16 {
+ mb_info.coeffs[i][0] = mb_info.coeffs[24][i];
+ }
+ }
+ if !tx_bypass {
+ if !mb_info.transform_size_8x8 {
+ let quant_dc = !mb_info.mb_type.is_intra16x16();
+ if quant_dc {
+ for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) {
+ if *coded {
+ idct(coeffs, qp_y);
+ } else if has_dc {
+ idct_dc(coeffs, qp_y, quant_dc);
+ *coded = true;
+ }
+ }
+ } else {
+ for (coded, coeffs) in mb_info.coded[..16].iter_mut().zip(mb_info.coeffs[..16].iter_mut()) {
+ if *coded {
+ idct_skip_dc(coeffs, qp_y);
+ } else if has_dc {
+ idct_dc(coeffs, qp_y, quant_dc);
+ *coded = true;
+ }
+ }
+ }
+ } else {
+ for i in 0..4 {
+ if mb_info.coded[(i & 1) * 2 + (i & 2) * 4] {
+ dequant8x8(&mut mb_info.coeffs8x8[i].coeffs, &pps.scaling_list_8x8[!mb_info.mb_type.is_intra() as usize]);
+ idct8x8(&mut mb_info.coeffs8x8[i].coeffs, qp_y);
+ }
+ }
+ }
+ } else if !mb_info.transform_size_8x8 {
+ for i in 0..16 {
+ if !mb_info.coded[i] && has_dc {
+ mb_info.coded[i] = true;
+ }
+ }
+ }
+ for chroma in 0..2 {
+ let qp_c = if chroma == 0 { qp_u } else { qp_v };
+ if mb_info.cbpc != 0 {
+ chroma_dc_transform(&mut mb_info.chroma_dc[chroma], qp_c);
+ }
+ for i in 0..4 {
+ let blk_no = 16 + chroma * 4 + i;
+ mb_info.coeffs[blk_no][0] = mb_info.chroma_dc[chroma][i];
+ if mb_info.coded[blk_no] {
+ idct_skip_dc(&mut mb_info.coeffs[blk_no], qp_c);
+ } else if mb_info.coeffs[blk_no][0] != 0 {
+ idct_dc(&mut mb_info.coeffs[blk_no], qp_c, false);
+ mb_info.coded[blk_no] = true;
+ }
+ }
+ }
+ if !pps.entropy_coding_mode || mb_info.mb_type.is_skip() || mb_info.mb_type.is_intra() {
+ self.sstate.reset_mb_mv();
+ }
+ if !mb_info.mb_type.is_intra() {
+ Self::pred_mv(&mut self.sstate, slice_refs, mb_info, self.cur_id, self.temporal_mv, self.sps[self.cur_sps].direct_8x8_inference);
+ }
+ if !pps.constrained_intra_pred && mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
+ self.sstate.fill_ipred(IntraPredMode::DC);
+ }
+
+ let xpos = self.sstate.mb_x * 16;
+ let ypos = self.sstate.mb_y * 16;
+
+ if mb_info.mb_type != MBType::PCM {
+ let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() {
+ 1
+ } else if slice_hdr.slice_type.is_b() {
+ self.pps[self.cur_pps].weighted_bipred_idc
+ } else {
+ 0
+ };
+ recon_mb(frm, slice_hdr, mb_info, &mut self.sstate, slice_refs, &mut self.mc_dsp, weight_mode);
+ } else {
+ for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
+ for (dst, &p) in dline[..16].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..16].copy_from_slice(src);
+ }
+ for (dline, src) in frm.data[frm.offset[1] + xpos/2 + ypos/2 * frm.stride[1]..].chunks_mut(frm.stride[1]).take(8).zip(self.ipcm_buf[256..].chunks(8)) {
+ for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src);
+ }
+ for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) {
+ for (dst, &p) in dline[..8].iter_mut().zip(src.iter()) { *dst = u16::from(p); } //dline[..8].copy_from_slice(src);
+ }
+ }
+/*match mb_info.mb_type {
+MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBType::B8x16(_, _) | MBType::B8x8 => {
+ let dstride = frm.stride[0];
+ let dst = &mut frm.data[frm.offset[0] + self.sstate.mb_x * 16 + self.sstate.mb_y * 16 * dstride..];
+ for el in dst[..16].iter_mut() { *el = 255; }
+ for row in dst.chunks_mut(dstride).skip(1).take(15) {
+ row[0] = 255;
+ }
+},
+_ => {},
+};*/
+ self.sstate.save_ipred_context(frm);
+
+ let mb_pos = self.sstate.mb_x + self.sstate.mb_y * mv_info.mb_stride;
+ let mut mb = FrameMBInfo::new();
+ mb.mb_type = mb_info.mb_type.into();
+ for blk4 in 0..16 {
+ mb.mv[blk4] = self.sstate.get_cur_blk4(blk4).mv;
+ }
+ for blk8 in 0..4 {
+ mb.ref_poc[blk8] = slice_refs.map_refs(self.sstate.get_cur_blk8(blk8).ref_idx);
+ mb.ref_idx[blk8] = self.sstate.get_cur_blk8(blk8).ref_idx;
+ }
+ mv_info.mbs[mb_pos] = mb;
+
+ if !self.deblock_skip && self.deblock_mode != 1 {
+ self.sstate.fill_deblock(slice_refs, self.deblock_mode, self.is_s);
+ loop_filter_mb(frm, &self.sstate, self.lf_alpha, self.lf_beta);
+ }
+ self.sstate.next_mb();
+ }
+ fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize, frm: &mut NASimpleVideoFrame<u16>, mv_info: &mut FrameMV) -> DecoderResult<bool> {
+ const INTRA_CBP: [u8; 48] = [
+ 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
+ 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
+ 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41
+ ];
+ const INTER_CBP: [u8; 48] = [
+ 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13,
+ 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
+ 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
+ ];
+
+ let mut mb_idx = slice_hdr.first_mb_in_slice;
+ let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() };
+ let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
+
+ let slice_refs = self.frame_refs.cur_refs.clone();
+ let sslice_refs = SimplifiedSliceRefs::new(&slice_refs);
+
+ while br.tell() < full_size && mb_idx < self.num_mbs {
+ mb_info.coded = [false; 25];
+ mb_info.ref_l0 = [ZERO_REF; 4];
+ mb_info.ref_l1 = [ZERO_REF; 4];
+ mb_info.mv_l0 = [ZERO_MV; 16];
+ mb_info.mv_l1 = [ZERO_MV; 16];
+ mb_info.chroma_dc = [[0; 4]; 2];
+ mb_info.cbpy = 0;
+ mb_info.cbpc = 0;
+
+ if !slice_hdr.slice_type.is_intra() {
+ let mb_skip_run = br.read_ue()? as usize;
+ validate!(mb_idx + mb_skip_run <= self.num_mbs);
+ mb_info.mb_type = skip_type;
+ for _ in 0..mb_skip_run {
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info);
+ mb_idx += 1;
+ }
+ if mb_idx == self.num_mbs || br.tell() >= full_size {
+ break;
+ }
+ }
+ if br.tell() < full_size {
+ if self.is_mbaff && ((mb_idx & 1) == 0) {
+ let _mb_field_decoding = br.read_bool()?;
+ }
+ let mut mb_type = decode_mb_type_cavlc(br, slice_hdr)?;
+ mb_info.mb_type = mb_type;
+ mb_info.transform_size_8x8 = false;
+ if mb_type == MBType::PCM {
+ br.align();
+ for pix in self.ipcm_buf[..256 + 64 + 64].iter_mut() {
+ *pix = br.read(8)? as u8;
+ }
+ self.sstate.fill_ncoded(16);
+ } else {
+ if self.transform_8x8_mode && mb_type == MBType::Intra4x4 {
+ mb_info.transform_size_8x8 = br.read_bool()?;
+ if mb_info.transform_size_8x8 {
+ mb_type = MBType::Intra8x8;
+ mb_info.mb_type = MBType::Intra8x8;
+ }
+ }
+ decode_mb_pred_cavlc(br, slice_hdr, mb_type, &mut self.sstate, &mut mb_info)?;
+ let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type {
+ (cbpy, cbpc)
+ } else {
+ let cbp_id = br.read_ue()? as usize;
+ validate!(cbp_id < INTRA_CBP.len());
+ let cbp = if mb_type == MBType::Intra4x4 || mb_type == MBType::Intra8x8 {
+ INTRA_CBP[cbp_id]
+ } else {
+ INTER_CBP[cbp_id]
+ };
+ if self.transform_8x8_mode && (cbp & 0xF) != 0 && mb_info.can_have_8x8_tx(self.sps[self.cur_sps].direct_8x8_inference) {
+ mb_info.transform_size_8x8 = br.read_bool()?;
+ }
+ ((cbp & 0xF), (cbp >> 4))
+ };
+ mb_info.cbpy = cbpy;
+ mb_info.cbpc = cbpc;
+ self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy;
+ if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() {
+ let mb_qp_delta = br.read_se()?;
+ validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25);
+ let new_qp = mb_qp_delta + i32::from(mb_info.qp_y);
+ mb_info.qp_y = if new_qp < 0 {
+ (new_qp + 52) as u8
+ } else if new_qp >= 52 {
+ (new_qp - 52) as u8
+ } else {
+ new_qp as u8
+ };
+ mb_info.coeffs = [[0; 16]; 25];
+ if self.transform_8x8_mode {
+ mb_info.clear_coeffs8x8();
+ }
+ mb_info.chroma_dc = [[0; 4]; 2];
+ decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?;
+ }
+ }
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info);
+ }
+ mb_idx += 1;
+ }
+ if let Some(ref mut pic) = self.cur_pic {
+ pic.cur_mb = mb_idx;
+ }
+ Ok(mb_idx == self.num_mbs)
+ }
+ fn decode_slice_cabac(&mut self, cabac: &mut CABAC, slice_hdr: &SliceHeader, frm: &mut NASimpleVideoFrame<u16>, mv_info: &mut FrameMV) -> DecoderResult<bool> {
+ let mut mb_idx = slice_hdr.first_mb_in_slice;
+ let mut prev_mb_skipped = false;
+ let skip_type = if slice_hdr.slice_type.is_p() { MBType::PSkip } else { MBType::BSkip };
+ let mut last_qp_diff = false;
+
+ let mut mb_info = CurrentMBInfo { qp_y: slice_hdr.slice_qp, ..Default::default() };
+
+ let slice_refs = self.frame_refs.cur_refs.clone();
+ let sslice_refs = SimplifiedSliceRefs::new(&slice_refs);
+
+ while mb_idx < self.num_mbs {
+ mb_info.coded = [false; 25];
+ mb_info.ref_l0 = [ZERO_REF; 4];
+ mb_info.ref_l1 = [ZERO_REF; 4];
+ mb_info.mv_l0 = [ZERO_MV; 16];
+ mb_info.mv_l1 = [ZERO_MV; 16];
+ mb_info.chroma_dc = [[0; 4]; 2];
+ mb_info.cbpy = 0;
+ mb_info.cbpc = 0;
+ let mb_skip = cabac_decode_mbskip(cabac, &self.sstate, slice_hdr);
+ if !mb_skip {
+ if self.is_mbaff && (((mb_idx & 1) == 0) || (prev_mb_skipped && ((mb_idx & 1) == 1))) {
+ let _mb_field_decoding = cabac.decode_bit(70);
+ }
+ let mut mb_type = cabac_decode_mb_type(cabac, slice_hdr, &self.sstate);
+ mb_info.mb_type = mb_type;
+ mb_info.transform_size_8x8 = false;
+ if mb_type == MBType::PCM {
+ let ipcm_size = 256 + 64 + 64;
+ validate!(cabac.pos + ipcm_size <= cabac.src.len());
+ self.ipcm_buf[..ipcm_size].copy_from_slice(&cabac.src[cabac.pos..][..ipcm_size]);
+ cabac.pos += ipcm_size;
+ cabac.reinit()?;
+ last_qp_diff = false;
+ } else {
+ if self.transform_8x8_mode && mb_type == MBType::Intra4x4 {
+ let mut ctx = 0;
+ if self.sstate.get_top_mb().transform_8x8 {
+ ctx += 1;
+ }
+ if self.sstate.get_left_mb().transform_8x8 {
+ ctx += 1;
+ }
+ mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx);
+ if mb_info.transform_size_8x8 {
+ mb_type = MBType::Intra8x8;
+ mb_info.mb_type = MBType::Intra8x8;
+ }
+ }
+ decode_mb_pred_cabac(cabac, slice_hdr, mb_type, &mut self.sstate, &mut mb_info);
+ let (cbpy, cbpc) = if let MBType::Intra16x16(_, cbpy, cbpc) = mb_type {
+ (cbpy, cbpc)
+ } else {
+ decode_cbp_cabac(cabac, &self.sstate)
+ };
+ if self.transform_8x8_mode && cbpy != 0 && mb_info.can_have_8x8_tx(self.sps[self.cur_sps].direct_8x8_inference) {
+ let mut ctx = 0;
+ if self.sstate.get_top_mb().transform_8x8 {
+ ctx += 1;
+ }
+ if self.sstate.get_left_mb().transform_8x8 {
+ ctx += 1;
+ }
+ mb_info.transform_size_8x8 = cabac.decode_bit(399 + ctx);
+ }
+ if mb_type.is_intra() {
+ self.sstate.get_cur_mb().cmode = mb_info.chroma_ipred;
+ }
+ mb_info.cbpy = cbpy;
+ mb_info.cbpc = cbpc;
+ self.sstate.get_cur_mb().cbp = (cbpc << 4) | cbpy;
+ if cbpy != 0 || cbpc != 0 || mb_type.is_intra16x16() {
+ let mb_qp_delta = decode_mb_qp_delta_cabac(cabac, last_qp_diff as usize);
+ validate!(mb_qp_delta >= -26 && mb_qp_delta <= 25);
+ last_qp_diff = mb_qp_delta != 0;
+ let new_qp = mb_qp_delta + i32::from(mb_info.qp_y);
+ mb_info.qp_y = if new_qp < 0 {
+ (new_qp + 52) as u8
+ } else if new_qp >= 52 {
+ (new_qp - 52) as u8
+ } else {
+ new_qp as u8
+ };
+ mb_info.coeffs = [[0; 16]; 25];
+ if self.transform_8x8_mode {
+ mb_info.clear_coeffs8x8();
+ }
+ mb_info.chroma_dc = [[0; 4]; 2];
+ decode_residual_cabac(cabac, &mut self.sstate, &mut mb_info);
+ } else {
+ last_qp_diff = false;
+ }
+ }
+ } else {
+ mb_info.mb_type = skip_type;
+ mb_info.transform_size_8x8 = false;
+ last_qp_diff = false;
+ }
+ self.handle_macroblock(slice_hdr, &mut mb_info, &sslice_refs, frm, mv_info);
+ prev_mb_skipped = mb_skip;
+ if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() {
+ if let Some(ref mut pic) = self.cur_pic {
+ pic.cur_mb = mb_idx + 1;
+ }
+ return Ok(mb_idx + 1 == self.num_mbs);
+ }
+ mb_idx += 1;
+ }
+ Err(DecoderError::InvalidData)
+ }
+}
+
+impl NADecoder for H264Decoder {
+ fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef) -> DecoderResult<()> {
+ if let NACodecTypeInfo::Video(vinfo) = info.get_properties() {
+ let fmt = NAPixelFormaton::from_str("yuv420p10").unwrap();
+ let myinfo = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, false, fmt));
+ self.info = NACodecInfo::new_ref(info.get_name(), myinfo, info.get_extradata()).into_ref();
+
+ let edata = info.get_extradata().unwrap();
+//print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!();
+ if edata.len() > 11 && &edata[0..4] == b"avcC" {
+ let mut br = MemoryReader::new_read(edata.as_slice());
+ let mut nal_buf = Vec::new();
+
+ br.read_skip(4)?;
+ let version = br.read_byte()?;
+ validate!(version == 1);
+ let profile = br.read_byte()?;
+ let _compatibility = br.read_byte()?;
+ let _level = br.read_byte()?;
+ let b = br.read_byte()?;
+ //validate!((b & 0xFC) == 0xFC);
+ self.nal_len = (b & 3) + 1;
+ let b = br.read_byte()?;
+ //validate!((b & 0xE0) == 0xE0);
+ let num_sps = (b & 0x1F) as usize;
+ for _ in 0..num_sps {
+ let len = br.read_u16be()? as usize;
+ let offset = br.tell() as usize;
+ validate!((br.peek_byte()? & 0x1F) == 7);
+ let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+ self.handle_nal(&nal_buf, supp, true)?;
+ br.read_skip(len)?;
+ }
+ let num_pps = br.read_byte()? as usize;
+ for _ in 0..num_pps {
+ let len = br.read_u16be()? as usize;
+ let offset = br.tell() as usize;
+ validate!((br.peek_byte()? & 0x1F) == 8);
+ let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+ self.handle_nal(&nal_buf, supp, true)?;
+ br.read_skip(len)?;
+ }
+ if br.left() > 0 {
+ match profile {
+ 100 | 110 | 122 | 144 => {
+ let b = br.read_byte()?;
+ // some encoders put something different here
+ if (b & 0xFC) != 0xFC {
+ return Ok(());
+ }
+ // b & 3 -> chroma format
+ let b = br.read_byte()?;
+ validate!((b & 0xF8) == 0xF8);
+ // b & 7 -> luma depth minus 8
+ let b = br.read_byte()?;
+ validate!((b & 0xF8) == 0xF8);
+ // b & 7 -> chroma depth minus 8
+ let num_spsext = br.read_byte()? as usize;
+ for _ in 0..num_spsext {
+ let len = br.read_u16be()? as usize;
+ // parse spsext
+ br.read_skip(len)?;
+ }
+ },
+ _ => {},
+ };
+ }
+ } else {
+ return Err(DecoderError::NotImplemented);
+ }
+
+ self.width = vinfo.get_width();
+ self.height = vinfo.get_height();
+ self.disp_w = self.width;
+ self.disp_h = self.height;
+
+ if (self.width == 0 || self.height == 0) && !self.sps.is_empty() {
+ self.width = self.sps[0].pic_width_in_mbs * 16;
+ self.height = self.sps[0].pic_height_in_mbs * 16;
+ }
+
+ let num_bufs = if !self.sps.is_empty() {
+ self.sps[0].num_ref_frames + 1
+ } else {
+ 3
+ }.max(16 + 1);
+ supp.pool_u16.set_dec_bufs(num_bufs);
+ supp.pool_u16.prealloc_video(NAVideoInfo::new(self.width, self.height, false, fmt), 4)?;
+
+ Ok(())
+ } else {
+ Err(DecoderError::InvalidData)
+ }
+ }
+ fn decode(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket) -> DecoderResult<NAFrameRef> {
+ let src = pkt.get_buffer();
+
+ let mut br = MemoryReader::new_read(&src);
+ let mut nal_buf = Vec::with_capacity(src.len());
+ if self.nal_len > 0 {
+ let mut skip_decoding = false;
+ if self.skip_mode != FrameSkipMode::None {
+ let mut pic_type = FrameType::I;
+ let mut is_ref = false;
+ while br.left() > 0 {
+ let size = match self.nal_len {
+ 1 => br.read_byte()? as usize,
+ 2 => br.read_u16be()? as usize,
+ 3 => br.read_u24be()? as usize,
+ 4 => br.read_u32be()? as usize,
+ _ => unreachable!(),
+ };
+ validate!(br.left() >= (size as i64));
+ let offset = br.tell() as usize;
+ let size = unescape_nal(&src[offset..][..size], &mut nal_buf);
+ validate!(size > 0);
+ let nal_ref_idc = nal_buf[0] >> 5;
+ let nal_unit_type = nal_buf[0] & 0x1F;
+ if nal_unit_type == 1 || nal_unit_type == 5 {
+ let mut bitr = BitReader::new(&nal_buf[1..], BitReaderMode::BE);
+ let (first_mb, slice_type) = parse_slice_header_minimal(&mut bitr)?;
+ if first_mb == 0 && nal_ref_idc != 0 {
+ is_ref = true;
+ }
+ let new_type = slice_type.to_frame_type();
+ pic_type = match (pic_type, new_type) {
+ (FrameType::I, _) => new_type,
+ (_, FrameType::B) => FrameType::B,
+ _ => pic_type,
+ };
+ }
+ br.read_skip(size)?;
+ }
+ match self.skip_mode {
+ FrameSkipMode::IntraOnly => {
+ skip_decoding = pic_type != FrameType::I;
+ },
+ FrameSkipMode::KeyframesOnly => {
+ if !is_ref {
+ skip_decoding = true;
+ }
+ },
+ _ => {},
+ };
+ br.seek(SeekFrom::Start(0))?;
+ }
+ while br.left() > 0 {
+ let size = match self.nal_len {
+ 1 => br.read_byte()? as usize,
+ 2 => br.read_u16be()? as usize,
+ 3 => br.read_u24be()? as usize,
+ 4 => br.read_u32be()? as usize,
+ _ => unreachable!(),
+ };
+ validate!(br.left() >= (size as i64));
+ let offset = br.tell() as usize;
+ let _size = unescape_nal(&src[offset..][..size], &mut nal_buf);
+ self.handle_nal(nal_buf.as_slice(), supp, skip_decoding)?;
+ br.read_skip(size)?;
+ }
+ } else {
+//todo NAL detection
+ unimplemented!();
+ }
+
+ let (bufinfo, ftype, pts) = if self.has_pic && self.cur_pic.is_some() {
+ let mut npic = None;
+ std::mem::swap(&mut self.cur_pic, &mut npic);
+ let cpic = npic.unwrap();
+ let ret = (NABufferType::Video16(cpic.buf.clone()), cpic.pic_type, Some(u64::from(cpic.full_id)));
+ if cpic.is_ref {
+ self.frame_refs.add_short_term(cpic.clone(), self.sps[self.cur_sps].num_ref_frames);
+ }
+ if let Some(lt_idx) = cpic.long_term {
+ self.frame_refs.add_long_term(lt_idx, cpic);
+ }
+ ret
+ } else {
+ (NABufferType::None, FrameType::Skip, None)
+ };
+
+ let mut frm = NAFrame::new_from_pkt(pkt, self.info.clone(), bufinfo);
+ frm.set_keyframe(ftype == FrameType::I);
+ if let (Some(mypts), None) = (pts, frm.get_pts()) {
+ frm.set_pts(Some(mypts));
+ }
+ if let Some(pts) = pts {
+ frm.set_id(pts as i64);
+ }
+ frm.set_frame_type(ftype);
+ Ok(frm.into_ref())
+ }
+ fn flush(&mut self) {
+ }
+}
+
+impl NAOptionHandler for H264Decoder {
+ fn get_supported_options(&self) -> &[NAOptionDefinition] { DECODER_OPTIONS }
+ fn set_options(&mut self, options: &[NAOption]) {
+ for option in options.iter() {
+ for opt_def in DECODER_OPTIONS.iter() {
+ if opt_def.check(option).is_ok() {
+ match (option.name, &option.value) {
+ (FRAME_SKIP_OPTION, NAValue::String(ref strval)) => {
+ if let Ok(smode) = FrameSkipMode::from_str(strval) {
+ self.skip_mode = smode;
+ }
+ },
+ (DEBLOCK_SKIP_OPTION, NAValue::Bool(val)) => {
+ self.deblock_skip = *val;
+ },
+ _ => {},
+ }
+ }
+ }
+ }
+ }
+ fn query_option_value(&self, name: &str) -> Option<NAValue> {
+ match name {
+ FRAME_SKIP_OPTION => Some(NAValue::String(self.skip_mode.to_string())),
+ DEBLOCK_SKIP_OPTION => Some(NAValue::Bool(self.deblock_skip)),
+ _ => None,
+ }
+ }
+}
--- /dev/null
+use std::sync::{Arc, Barrier};
+use std::sync::atomic::*;
+use std::thread;
+
+use nihav_core::codecs::{DecoderError, DecoderResult};
+
+use super::PictureInfo;
+use super::decoder_mt::FrameDecoder;
+use super::super::Shareable;
+
+#[derive(Clone,Copy,Debug,PartialEq)]
+pub enum FrameDecodingStatus {
+ Ok,
+ NotReady,
+ Error,
+ NotFound,
+}
+
+struct FrameState {
+ pinfo: PictureInfo,
+ mb_pos: AtomicUsize,
+ error: AtomicBool,
+ complete: AtomicBool,
+ output: AtomicBool,
+ worker: Option<thread::JoinHandle<DecoderResult<()>>>,
+ result: DecoderResult<()>,
+ num_refs: usize,
+ ref_frames: Vec<u32>,
+}
+
+impl FrameState {
+ fn get_id(&self) -> u32 { self.pinfo.full_id }
+ fn get_user_id(&self) -> u32 { self.pinfo.user_id }
+ fn is_working(&self) -> bool {
+ self.worker.is_some() &&
+ !self.complete.load(Ordering::Relaxed) &&
+ !self.error.load(Ordering::Relaxed)
+ }
+ fn is_output_candidate(&self) -> bool {
+ !self.output.load(Ordering::Relaxed) &&
+ (self.complete.load(Ordering::Relaxed) || self.error.load(Ordering::Relaxed))
+ }
+}
+
+pub struct ThreadDispatcher {
+ fstate: Vec<FrameState>,
+ pub max_threads: usize,
+ cur_threads: usize,
+}
+
+impl ThreadDispatcher {
+ pub fn new() -> Self {
+ Self {
+ fstate: Vec::new(),
+ max_threads: 3,
+ cur_threads: 0,
+ }
+ }
+ pub fn can_decode_more(&self) -> bool {
+ let out_cand = self.fstate.iter().filter(|state| state.is_output_candidate()).count();
+ if out_cand > self.max_threads {
+ return false;
+ }
+ if (self.cur_threads < self.max_threads) || (self.max_threads == 0) {
+ true
+ } else {
+ let real_workers = self.fstate.iter().fold(0usize,
+ |acc, state| acc + (state.is_working() as usize));
+ real_workers < self.max_threads
+ }
+ }
+ fn cleanup(&mut self) {
+ for state in self.fstate.iter_mut() {
+ if state.worker.is_some() && !state.is_working() {
+ let mut ret = None;
+ std::mem::swap(&mut state.worker, &mut ret);
+ if let Some(handle) = ret {
+ state.result = handle.join().unwrap();
+ }
+ self.cur_threads -= 1;
+ }
+ }
+ }
+ fn unref_frame(&mut self, id: u32) {
+ let mut toremove = Vec::new();
+ for state in self.fstate.iter() {
+ if state.num_refs == 0 && state.output.load(Ordering::Relaxed) {
+ toremove.push(state.get_id());
+ }
+ }
+ if let Some(idx) = self.find_by_id(id) {
+ let mut ref_frm = Vec::new();
+ std::mem::swap(&mut ref_frm, &mut self.fstate[idx].ref_frames);
+ for state in self.fstate.iter_mut() {
+ if ref_frm.contains(&state.get_id()) {
+ assert!(state.num_refs >= 2);
+ state.num_refs -= 2;
+ }
+ }
+ if self.fstate[idx].num_refs == 0 && self.fstate[idx].output.load(Ordering::Relaxed) {
+ self.remove_frame(id);
+ }
+ }
+ for &id in toremove.iter() {
+ self.remove_frame(id);
+ }
+ }
+ fn find_by_id(&self, id: u32) -> Option<usize> {
+ self.fstate.iter().position(|x| x.get_id() == id)
+ }
+ fn set_completed(&self, id: u32) {
+ if let Some(idx) = self.find_by_id(id) {
+ self.fstate[idx].complete.store(true, Ordering::Relaxed);
+ }
+ }
+ fn set_error(&self, id: u32) {
+ if let Some(idx) = self.find_by_id(id) {
+ self.fstate[idx].error.store(true, Ordering::Relaxed);
+ }
+ }
+ pub fn update_pos(&self, id: u32, mb_pos: usize) {
+ if let Some(idx) = self.find_by_id(id) {
+ self.fstate[idx].mb_pos.store(mb_pos, Ordering::Relaxed);
+ }
+ }
+ pub fn check_pos(&self, id: u32, mb_pos: usize) -> FrameDecodingStatus {
+ if let Some(idx) = self.find_by_id(id) {
+ let state = &self.fstate[idx];
+ if !state.error.load(Ordering::Relaxed) {
+ if state.complete.load(Ordering::Relaxed) || mb_pos < state.mb_pos.load(Ordering::Relaxed) {
+ FrameDecodingStatus::Ok
+ } else {
+ FrameDecodingStatus::NotReady
+ }
+ } else {
+ FrameDecodingStatus::Error
+ }
+ } else {
+ FrameDecodingStatus::NotFound
+ }
+ }
+ fn remove_frame(&mut self, id: u32) {
+ if let Some(idx) = self.find_by_id(id) {
+ self.fstate.remove(idx);
+ }
+ }
+ /*fn print_state(&self) {
+ print!(" state:");
+ for state in self.fstate.iter() {
+ print!(" s{}b{}r{}{}{}{}", state.get_id(),
+ state.mb_pos.load(Ordering::Relaxed), state.num_refs,
+ if state.error.load(Ordering::Relaxed) { "E" } else {""},
+ if state.complete.load(Ordering::Relaxed) {"C"} else {""},
+ if state.output.load(Ordering::Relaxed) {"O"} else {""});
+ }
+ println!();
+ }*/
+ pub fn has_output(&self) -> bool {
+ for state in self.fstate.iter() {
+ if state.is_output_candidate() {
+ return true;
+ }
+ }
+ false
+ }
+}
+
+pub fn queue_decoding(disp: &mut Shareable<ThreadDispatcher>, mut fdec: FrameDecoder, initial_ref_frames: &[u32], ref_frames: &[u32]) {
+ let barrier = Arc::new(Barrier::new(2));
+ let starter = Arc::clone(&barrier);
+
+ let pinfo = fdec.cur_pic.clone();
+ let pic_id = pinfo.full_id;
+ let shared_disp = Arc::clone(disp);
+ let worker = thread::Builder::new().name("frame ".to_string() + &pic_id.to_string()).spawn(move || {
+ barrier.wait();
+
+ let mut slices = Vec::new();
+ std::mem::swap(&mut slices, &mut fdec.slices);
+ let mut cur_mb = 0;
+ for (hdr, hdr_size, refs, nal) in slices.iter() {
+ if hdr.first_mb_in_slice != cur_mb {
+ if let Ok(rd) = shared_disp.read() {
+ rd.set_error(pic_id);
+ } else {
+ panic!("can't set error");
+ }
+ return Err(DecoderError::InvalidData);
+ }
+ match fdec.decode_slice(hdr, *hdr_size, refs, nal) {
+ Ok(pos) => cur_mb = pos,
+ Err(err) => {
+ if let Ok(rd) = shared_disp.read() {
+ rd.set_error(pic_id);
+ } else {
+ panic!("can't set error");
+ }
+ return Err(err);
+ },
+ };
+ }
+
+ if cur_mb == fdec.num_mbs {
+ if let Ok(rd) = shared_disp.read() {
+ rd.set_completed(pic_id);
+ } else {
+ panic!("can't set status");
+ }
+ }
+
+ DecoderResult::Ok(())
+ }).unwrap();
+ let new_state = FrameState {
+ pinfo,
+ mb_pos: AtomicUsize::new(0),
+ error: AtomicBool::new(false),
+ complete: AtomicBool::new(false),
+ output: AtomicBool::new(false),
+ worker: Some(worker),
+ result: DecoderResult::Err(DecoderError::Bug),
+ num_refs: 0,
+ ref_frames: initial_ref_frames.to_vec(),
+ };
+ if let Ok(ref mut ds) = disp.write() {
+ let new_id = new_state.get_id();
+ if ds.find_by_id(new_id).is_some() {
+ ds.remove_frame(new_id);
+ }
+ ds.cleanup();
+ ds.fstate.push(new_state);
+ for state in ds.fstate.iter_mut() {
+ if ref_frames.contains(&state.get_id()) {
+ state.num_refs += 1;
+ }
+ if initial_ref_frames.contains(&state.get_id()) {
+ state.num_refs += 1;
+ }
+ }
+ ds.cur_threads += 1;
+ starter.wait();
+ } else {
+ panic!("cannot invoke thread dispatcher");
+ }
+}
+
+pub fn wait_for_one(dispatch: &mut Shareable<ThreadDispatcher>) -> Result<PictureInfo, (DecoderError, u32)> {
+ /*if let Ok(ref ds) = dispatch.read() {
+ ds.print_state();
+ }*/
+ let start = std::time::Instant::now();
+ 'main_loop: loop {
+ if std::time::Instant::now().duration_since(start) > std::time::Duration::from_millis(20000) { panic!(" too long!"); }
+ if let Ok(ref ds) = dispatch.read() {
+ let mut nw = 0;
+ for state in ds.fstate.iter() {
+ if state.is_working() {
+ nw += 1;
+ }
+ if state.is_output_candidate() {
+ break 'main_loop;
+ }
+ }
+ if nw == 0 {
+ return Err((DecoderError::NoFrame, 0));
+ }
+ } else {
+ panic!("can't peek into status");
+ }
+ thread::yield_now();
+ }
+ if let Ok(ref mut ds) = dispatch.write() {
+ ds.cleanup();
+ let mut found = None;
+ for state in ds.fstate.iter() {
+ if state.is_output_candidate() {
+ state.output.store(true, Ordering::Relaxed);
+ if let DecoderResult::Err(err) = state.result {
+ let id = state.get_id();
+ let user_id = state.get_user_id();
+ ds.unref_frame(id);
+ return Err((err, user_id));
+ } else {
+ found = Some(state.pinfo.clone());
+ break;
+ }
+ }
+ }
+ if let Some(ret) = found {
+ ds.unref_frame(ret.full_id);
+ Ok(ret)
+ } else {
+ unreachable!();
+ }
+ } else {
+ panic!("can't grab status");
+ }
+}
+
+pub fn clear_threads(dispatch: &mut Shareable<ThreadDispatcher>) {
+ /*if let Ok(ref ds) = dispatch.read() {
+ ds.print_state();
+ }*/
+ let mut to_wait = Vec::new();
+ if let Ok(ref mut ds) = dispatch.write() {
+ while let Some(state) = ds.fstate.pop() {
+ if let Some(handle) = state.worker {
+ to_wait.push(handle);
+ }
+ }
+ ds.cur_threads = 0;
+ } else {
+ panic!("can't grab status");
+ }
+ while let Some(handle) = to_wait.pop() {
+ let _ = handle.join();
+ }
+}
--- /dev/null
+const TMP_BUF_STRIDE: usize = 32;
+
+fn interp_block1(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool, clip: fn (i32) -> u16) {
+
+ let step = if hor { 1 } else { sstride };
+ let mut idx = 0;
+ let avgidx = if avg0 { step * 2 } else { step * 3 };
+
+ for dline in dst.chunks_mut(dstride).take(h) {
+ for (x, pix) in dline.iter_mut().take(w).enumerate() {
+ let t = (clip)(( i32::from(src[idx + x])
+ - 5 * i32::from(src[idx + x + step])
+ + 20 * i32::from(src[idx + x + step * 2])
+ + 20 * i32::from(src[idx + x + step * 3])
+ - 5 * i32::from(src[idx + x + step * 4])
+ + i32::from(src[idx + x + step * 5])
+ + 16) >> 5);
+ *pix = (t + src[idx + x + avgidx] + 1) >> 1;
+ }
+ idx += sstride;
+ }
+}
+
+fn interp_block2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, clip: fn (i32) -> u16) {
+ let step = if hor { 1 } else { sstride };
+ let mut idx = 0;
+ for dline in dst.chunks_mut(dstride).take(h) {
+ for (x, pix) in dline.iter_mut().take(w).enumerate() {
+ *pix = (clip)(( i32::from(src[idx + x])
+ - 5 * i32::from(src[idx + x + step])
+ + 20 * i32::from(src[idx + x + step * 2])
+ + 20 * i32::from(src[idx + x + step * 3])
+ - 5 * i32::from(src[idx + x + step * 4])
+ + i32::from(src[idx + x + step * 5])
+ + 16) >> 5);
+ }
+ idx += sstride;
+ }
+}
+
+fn mc_avg_tmp(dst: &mut [u16], dstride: usize, w: usize, h: usize, tmp: &[u16], tmp2: &[u16]) {
+ for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE).zip(tmp2.chunks(TMP_BUF_STRIDE))).take(h) {
+ for (pix, (&a, &b)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(w) {
+ *pix = (a + b + 1) >> 1;
+ }
+ }
+}
+
+fn h264_mc00(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, _clip: fn (i32) -> u16) {
+ for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) {
+ dline[..w].copy_from_slice(&sline[..w]);
+ }
+}
+
+fn h264_mc01(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true, clip);
+}
+
+fn h264_mc02(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true, clip);
+}
+
+fn h264_mc03(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false, clip);
+}
+
+fn h264_mc10(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true, clip);
+}
+
+fn h264_mc11(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc12(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc13(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc20(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block2(dst, dstride, &src[2..], sstride, w, h, false, clip);
+}
+
+fn h264_mc21(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc22(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0i32; TMP_BUF_STRIDE * 16];
+ let mut idx = 0;
+ for dline in tmp.chunks_mut(TMP_BUF_STRIDE).take(h) {
+ for (x, pix) in dline.iter_mut().take(w + 5).enumerate() {
+ *pix = i32::from(src[idx + x])
+ - 5 * i32::from(src[idx + x + sstride])
+ + 20 * i32::from(src[idx + x + sstride * 2])
+ + 20 * i32::from(src[idx + x + sstride * 3])
+ - 5 * i32::from(src[idx + x + sstride * 4])
+ + i32::from(src[idx + x + sstride * 5]);
+ }
+ idx += sstride;
+ }
+ for (dline, sline) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE)).take(h) {
+ for (x, pix) in dline.iter_mut().take(w).enumerate() {
+ *pix = (clip)((sline[x] - 5 * sline[x + 1] + 20 * sline[x + 2] + 20 * sline[x + 3] - 5 * sline[x + 4] + sline[x + 5] + 512) >> 10);
+ }
+ }
+}
+
+fn h264_mc23(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc30(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false, clip);
+}
+
+fn h264_mc31(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc32(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc33(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp = [0u16; TMP_BUF_STRIDE * 16];
+ let mut tmp2 = [0u16; TMP_BUF_STRIDE * 16];
+ h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip);
+ h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+
+fn chroma_interp(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) {
+ let a0 = 8 - dx;
+ let a1 = dx;
+ let b0 = 8 - dy;
+ let b1 = dy;
+
+ let src1 = &src[sstride..];
+ if a0 == 8 && b0 == 8 {
+ for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) {
+ drow[..w].copy_from_slice(&line[..w]);
+ }
+ } else if a0 == 8 {
+ for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) {
+ for (pix, (&a, &b)) in drow.iter_mut().take(w).zip(line0.iter().zip(line1.iter())) {
+ *pix = (a * b0 + b * b1 + 4) >> 3;
+ }
+ }
+ } else if b0 == 8 {
+ for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) {
+ let mut a = line[0];
+ for (pix, &b) in drow.iter_mut().take(w).zip(line.iter().skip(1)) {
+ *pix = (a * a0 + b * a1 + 4) >> 3;
+ a = b;
+ }
+ }
+ } else {
+ for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) {
+ let mut a = line0[0];
+ let mut c = line1[0];
+ for (pix, (&b, &d)) in drow.iter_mut().take(w).zip(line0[1..].iter().zip(line1[1..].iter())) {
+ *pix = ((u32::from(a) * u32::from(a0 * b0) + u32::from(b) * u32::from(a1 * b0) + u32::from(c) * u32::from(a0 * b1) + u32::from(d) * u32::from(a1 * b1) + 0x20) >> 6) as u16;
+ a = b;
+ c = d;
+ }
+ }
+ }
+}
+
+pub fn chroma_interp_8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) {
+ chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h);
+}
+
+pub fn chroma_interp_4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) {
+ chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h);
+}
+
+pub fn chroma_interp_2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) {
+ chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h);
+}
+
+macro_rules! luma_mc {
+ ($orig:ident, $func4:ident, $func8:ident, $func16:ident, $clip:expr) => {
+ fn $func4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) {
+ $orig(dst, dstride, src, sstride, 4, h, $clip);
+ }
+ fn $func8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) {
+ $orig(dst, dstride, src, sstride, 8, h, $clip);
+ }
+ fn $func16(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) {
+ $orig(dst, dstride, src, sstride, 16, h, $clip);
+ }
+ }
+}
+
+luma_mc!(h264_mc00, h264_mc00_4_10, h264_mc00_8_10, h264_mc00_16_10, super::clip_10);
+luma_mc!(h264_mc01, h264_mc01_4_10, h264_mc01_8_10, h264_mc01_16_10, super::clip_10);
+luma_mc!(h264_mc02, h264_mc02_4_10, h264_mc02_8_10, h264_mc02_16_10, super::clip_10);
+luma_mc!(h264_mc03, h264_mc03_4_10, h264_mc03_8_10, h264_mc03_16_10, super::clip_10);
+luma_mc!(h264_mc10, h264_mc10_4_10, h264_mc10_8_10, h264_mc10_16_10, super::clip_10);
+luma_mc!(h264_mc11, h264_mc11_4_10, h264_mc11_8_10, h264_mc11_16_10, super::clip_10);
+luma_mc!(h264_mc12, h264_mc12_4_10, h264_mc12_8_10, h264_mc12_16_10, super::clip_10);
+luma_mc!(h264_mc13, h264_mc13_4_10, h264_mc13_8_10, h264_mc13_16_10, super::clip_10);
+luma_mc!(h264_mc20, h264_mc20_4_10, h264_mc20_8_10, h264_mc20_16_10, super::clip_10);
+luma_mc!(h264_mc21, h264_mc21_4_10, h264_mc21_8_10, h264_mc21_16_10, super::clip_10);
+luma_mc!(h264_mc22, h264_mc22_4_10, h264_mc22_8_10, h264_mc22_16_10, super::clip_10);
+luma_mc!(h264_mc23, h264_mc23_4_10, h264_mc23_8_10, h264_mc23_16_10, super::clip_10);
+luma_mc!(h264_mc30, h264_mc30_4_10, h264_mc30_8_10, h264_mc30_16_10, super::clip_10);
+luma_mc!(h264_mc31, h264_mc31_4_10, h264_mc31_8_10, h264_mc31_16_10, super::clip_10);
+luma_mc!(h264_mc32, h264_mc32_4_10, h264_mc32_8_10, h264_mc32_16_10, super::clip_10);
+luma_mc!(h264_mc33, h264_mc33_4_10, h264_mc33_8_10, h264_mc33_16_10, super::clip_10);
+
+pub const H264_LUMA_INTERP_10: [[super::MCFunc; 16]; 3] = [
+ [
+ h264_mc00_4_10, h264_mc01_4_10, h264_mc02_4_10, h264_mc03_4_10,
+ h264_mc10_4_10, h264_mc11_4_10, h264_mc12_4_10, h264_mc13_4_10,
+ h264_mc20_4_10, h264_mc21_4_10, h264_mc22_4_10, h264_mc23_4_10,
+ h264_mc30_4_10, h264_mc31_4_10, h264_mc32_4_10, h264_mc33_4_10
+ ], [
+ h264_mc00_8_10, h264_mc01_8_10, h264_mc02_8_10, h264_mc03_8_10,
+ h264_mc10_8_10, h264_mc11_8_10, h264_mc12_8_10, h264_mc13_8_10,
+ h264_mc20_8_10, h264_mc21_8_10, h264_mc22_8_10, h264_mc23_8_10,
+ h264_mc30_8_10, h264_mc31_8_10, h264_mc32_8_10, h264_mc33_8_10
+ ], [
+ h264_mc00_16_10, h264_mc01_16_10, h264_mc02_16_10, h264_mc03_16_10,
+ h264_mc10_16_10, h264_mc11_16_10, h264_mc12_16_10, h264_mc13_16_10,
+ h264_mc20_16_10, h264_mc21_16_10, h264_mc22_16_10, h264_mc23_16_10,
+ h264_mc30_16_10, h264_mc31_16_10, h264_mc32_16_10, h264_mc33_16_10
+ ]
+];
+
+impl super::RegisterSIMD for super::H264MC {
+ fn register_simd(&mut self) {}
+}
--- /dev/null
+use nihav_core::frame::*;
+use nihav_codec_support::codecs::MV;
+use super::super::SimpleFrame;
+
+macro_rules! module_selector {
+ ($( ($cond:meta, $module:ident) ),*) => {
+ module_selector!(list; r#false; $(($cond, $module)),*);
+ };
+ (list; $nocond:meta; ($ccar:meta, $carmod:ident), $(($condcdr:meta, $cdrmod:ident)),*) => {
+ module_selector!(single; $nocond; $ccar; $carmod);
+ module_selector!(list; any($nocond, $ccar); $(($condcdr, $cdrmod)),*);
+ };
+ (list; $nocond:meta; ($yescond:meta, $module:ident)) => {
+ module_selector!(single; $nocond; $yescond; $module);
+ };
+ (list; $_:meta; ) => {};
+ (single; $nocond:meta; $yescond:meta; $module:ident) => {
+ #[cfg(all(not($nocond), $yescond))]
+ mod $module;
+ #[cfg(all(not($nocond), $yescond))]
+ use $module::*;
+ };
+}
+
+module_selector! (
+// (all(feature = "simd", target_arch = "x86_64"), x86),
+ (debug_assertions, debug),
+ (not(debug_assertions), release)
+);
+
+type MCFunc = fn (dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize);
+
+fn clip_10(val: i32) -> u16 { val.clamp(0, 1023) as u16 }
+
+trait RegisterSIMD {
+ fn register_simd(&mut self);
+}
+
+#[repr(align(16))]
+pub struct McBlock {
+ pub y: [u16; 16 * 16],
+ pub u: [u16; 16 * 16],
+ pub v: [u16; 16 * 16],
+}
+
+impl McBlock {
+ pub fn new() -> Self {
+ unsafe {
+ let blk = std::mem::MaybeUninit::uninit();
+ blk.assume_init()
+ }
+ }
+}
+
+#[allow(clippy::type_complexity)]
+pub struct H264MC {
+ pub put_block_weighted: [fn (dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]); 4],
+ pub put_block_weighted2: [fn (dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]); 4],
+ pub chroma_interp: [fn (dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize); 3],
+ avg: [fn (dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize); 4],
+ interp: [[MCFunc; 16]; 3],
+
+ width: usize,
+ height: usize,
+ depth: u8,
+}
+
+impl H264MC {
+ pub fn new() -> Self {
+ let mut obj = Self {
+ put_block_weighted: [put_blk_w_2_10, put_blk_w_4_10, put_blk_w_8_10, put_blk_w_16_10],
+ put_block_weighted2: [put_blk_w2_2_10, put_blk_w2_4_10, put_blk_w2_8_10, put_blk_w2_16_10],
+ chroma_interp: [chroma_interp_2, chroma_interp_4, chroma_interp_8],
+ interp: H264_LUMA_INTERP_10,
+ avg: [avg_2, avg_4, avg_8, avg_16],
+ width: 0, height: 0,
+ depth: 0,
+ };
+ obj.register_simd();
+ obj
+ }
+ pub fn set_dimensions(&mut self, width: usize, height: usize) {
+ self.width = width;
+ self.height = height;
+ }
+ pub fn set_depth(&mut self, depth: u8) {
+ if depth == self.depth {
+ return;
+ }
+ self.depth = depth;
+ match depth {
+ 10 => {
+ self.put_block_weighted = [put_blk_w_2_10, put_blk_w_4_10, put_blk_w_8_10, put_blk_w_16_10];
+ self.put_block_weighted2 = [put_blk_w2_2_10, put_blk_w2_4_10, put_blk_w2_8_10, put_blk_w2_16_10];
+ self.interp = H264_LUMA_INTERP_10;
+ },
+ _ => unreachable!(),
+ }
+ self.register_simd();
+ }
+ pub fn do_mc(&mut self, frm: &mut NASimpleVideoFrame<u16>, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+ let ubuf = std::mem::MaybeUninit::<[u16; 22 * 22]>::uninit();
+ let mut ebuf = unsafe { ubuf.assume_init() };
+ let mvx = mv.x >> 2;
+ let mvy = mv.y >> 2;
+ let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize;
+ let pre = if mode != 0 { 2isize } else { 0 };
+ let post = if mode != 0 { 3isize } else { 0 };
+ let (yw, yh) = (self.width, self.height);
+ let src = refpic.data;
+ let systride = refpic.stride[0];
+ let src_x = (xpos as isize) + (mvx as isize);
+ let src_y = (ypos as isize) + (mvy as isize);
+ let (ysrc, ystride) = if (src_x - pre < 0) || (src_x + (w as isize) + post > (yw as isize)) || (src_y - pre < 0) || (src_y + (h as isize) + post > (yh as isize)) {
+ let add = (pre + post) as usize;
+ edge_emu_sf(refpic, src_x - pre, src_y - pre, yw, yh, w + add, h + add, &mut ebuf, 22, 0);
+ (&ebuf[..], 22)
+ } else {
+ (&src[refpic.offset[0] + ((src_x - pre) as usize) + ((src_y - pre) as usize) * systride..], systride)
+ };
+ let wmode = match w {
+ 4 => 0,
+ 8 => 1,
+ _ => 2,
+ };
+ (self.interp[wmode][mode])(&mut frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..], frm.stride[0], ysrc, ystride, h);
+
+ let (cw, ch) = (self.width >> 1, self.height >> 1);
+ let mvx = mv.x >> 3;
+ let mvy = mv.y >> 3;
+ let dx = (mv.x & 7) as u16;
+ let dy = (mv.y & 7) as u16;
+ let src_x = ((xpos >> 1) as isize) + (mvx as isize);
+ let src_y = ((ypos >> 1) as isize) + (mvy as isize);
+ let suoff = refpic.offset[1];
+ let svoff = refpic.offset[2];
+ let sustride = refpic.stride[1];
+ let svstride = refpic.stride[2];
+ let cbw = w / 2;
+ let cbh = h / 2;
+ let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) {
+ let aw = (cw + 7) & !7;
+ let ah = (ch + 7) & !7;
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1);
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2);
+ ([&ebuf, &ebuf[9..]], [18, 18])
+ } else {
+ ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..],
+ &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]],
+ [sustride, svstride])
+ };
+ for chroma in 1..3 {
+ let off = frm.offset[chroma] + xpos / 2 + (ypos / 2) * frm.stride[chroma];
+ (self.chroma_interp[wmode])(&mut frm.data[off..], frm.stride[chroma], csrc[chroma - 1], cstride[chroma - 1], dx, dy, cbh);
+ }
+ }
+
+ pub fn mc_blocks(&mut self, dst: &mut McBlock, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+ let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize;
+
+ let pre = if mode != 0 { 2 } else { 0 };
+ let post = if mode != 0 { 3 } else { 0 };
+ let (width, height) = (self.width, self.height);
+ let sx = (xpos as isize) + ((mv.x >> 2) as isize);
+ let sy = (ypos as isize) + ((mv.y >> 2) as isize);
+
+ const EBUF_STRIDE: usize = 32;
+ let mut ebuf = [0u16; EBUF_STRIDE * (16 + 2 + 3)];
+
+ let wmode = match w {
+ 4 => 0,
+ 8 => 1,
+ _ => 2,
+ };
+ if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) ||
+ (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) {
+ let edge = (pre + post) as usize;
+ edge_emu_sf(refpic, sx - pre, sy - pre, width, height, w + edge, h + edge,
+ &mut ebuf, EBUF_STRIDE, 0);
+ (self.interp[wmode][mode])(&mut dst.y, 16, &ebuf, EBUF_STRIDE, h);
+ } else {
+ let sstride = refpic.stride[0];
+ let soff = refpic.offset[0];
+ let sbuf = refpic.data;
+ let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride;
+ (self.interp[wmode][mode])(&mut dst.y, 16, &sbuf[saddr..], sstride, h);
+ }
+
+ let (cw, ch) = (self.width >> 1, self.height >> 1);
+ let mvx = mv.x >> 3;
+ let mvy = mv.y >> 3;
+ let dx = (mv.x & 7) as u16;
+ let dy = (mv.y & 7) as u16;
+ let src_x = ((xpos >> 1) as isize) + (mvx as isize);
+ let src_y = ((ypos >> 1) as isize) + (mvy as isize);
+ let suoff = refpic.offset[1];
+ let svoff = refpic.offset[2];
+ let sustride = refpic.stride[1];
+ let svstride = refpic.stride[2];
+ let src = refpic.data;
+ let cbw = w / 2;
+ let cbh = h / 2;
+ let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) {
+ let aw = (cw + 7) & !7;
+ let ah = (ch + 7) & !7;
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf, 18, 1);
+ edge_emu_sf(refpic, src_x, src_y, aw, ah, cbw+1, cbh+1, &mut ebuf[9..], 18, 2);
+ ([&ebuf, &ebuf[9..]], [18, 18])
+ } else {
+ ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..],
+ &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]],
+ [sustride, svstride])
+ };
+ (self.chroma_interp[wmode])(&mut dst.u, 16, csrc[0], cstride[0], dx, dy, cbh);
+ (self.chroma_interp[wmode])(&mut dst.v, 16, csrc[1], cstride[1], dx, dy, cbh);
+ }
+
+ pub fn do_mc_avg(&mut self, frm: &mut NASimpleVideoFrame<u16>, refpic: &SimpleFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+ let ubuf = std::mem::MaybeUninit::<[u16; 64 * 16 + 32]>::uninit();
+ let mut buf = unsafe { ubuf.assume_init() };
+ let offset = 32 - ((&buf as *const u16 as usize) & 0x1F);
+
+ let mut afrm = NASimpleVideoFrame {
+ width: [32, 16, 16, 0],
+ height: [16, 16, 16, 0],
+ flip: false,
+ stride: [64, 64, 64, 0],
+ offset: [0, 32, 48, 0],
+ components: 3,
+ data: &mut buf[offset..]
+ };
+ let amv = MV { x: mv.x + (xpos as i16) * 4, y: mv.y + (ypos as i16) * 4 };
+ self.do_mc(&mut afrm, refpic, 0, 0, w, h, amv);
+ let wsize = (w.ilog2() - 1) as usize;
+ let src = afrm.data;
+ for (comp, (&sstride, &soff)) in afrm.stride.iter().zip(afrm.offset.iter()).take(3).enumerate() {
+ let shift = if comp == 0 { 0 } else { 1 };
+ (self.avg[wsize - shift])(&mut frm.data[frm.offset[comp] + (xpos >> shift) + (ypos >> shift) * frm.stride[comp]..], frm.stride[comp], &src[soff..], sstride, h >> shift);
+ }
+ }
+
+ pub fn gray_block(&mut self, frm: &mut NASimpleVideoFrame<u16>, x: usize, y: usize, w: usize, h: usize) {
+ let fill = 1 << (self.depth - 1);
+ let yoff = frm.offset[0] + x + y * frm.stride[0];
+ let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1],
+ frm.offset[2] + x / 2 + y / 2 * frm.stride[2]];
+ for row in frm.data[yoff..].chunks_mut(frm.stride[0]).take(h) {
+ for el in row[..w].iter_mut() {
+ *el = fill;
+ }
+ }
+ for chroma in 0..2 {
+ for row in frm.data[coff[chroma]..].chunks_mut(frm.stride[chroma + 1]).take(h / 2) {
+ for el in row[..w / 2].iter_mut() {
+ *el = fill;
+ }
+ }
+ }
+ }
+}
+
+fn edge_emu_sf(src: &SimpleFrame, xpos: isize, ypos: isize, w: usize, h: usize, bw: usize, bh: usize, dst: &mut [u16], dstride: usize, comp: usize) {
+ let stride = src.stride[comp];
+ let offs = src.offset[comp];
+ let framebuf = src.data;
+
+ for y in 0..bh {
+ let srcy;
+ if (y as isize) + ypos < 0 { srcy = 0; }
+ else if (y as isize) + ypos >= (h as isize) { srcy = h - 1; }
+ else { srcy = ((y as isize) + ypos) as usize; }
+
+ for x in 0..bw {
+ let srcx;
+ if (x as isize) + xpos < 0 { srcx = 0; }
+ else if (x as isize) + xpos >= (w as isize) { srcx = w - 1; }
+ else { srcx = ((x as isize) + xpos) as usize; }
+ dst[x + y * dstride] = framebuf[offs + srcx + srcy * stride];
+ }
+ }
+}
+
+fn avg(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bw: usize, bh: usize) {
+ for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) {
+ for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) {
+ *dst = (*dst + *src + 1) >> 1;
+ }
+ }
+}
+
+fn avg_2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) {
+ let _ = src[sstride + 1];
+ let _ = dst[dstride + 1];
+ dst[0] = (dst[0] + src[0] + 1) >> 1;
+ dst[1] = (dst[1] + src[1] + 1) >> 1;
+ dst[dstride] = (dst[dstride] + src[sstride] + 1) >> 1;
+ dst[dstride + 1] = (dst[dstride + 1] + src[sstride + 1] + 1) >> 1;
+ if bh == 4 {
+ let _ = src[sstride * 3 + 1];
+ let _ = dst[dstride * 3 + 1];
+ dst[dstride * 2] = (dst[dstride * 2] + src[sstride * 2] + 1) >> 1;
+ dst[dstride * 2 + 1] = (dst[dstride * 2 + 1] + src[sstride * 2 + 1] + 1) >> 1;
+ dst[dstride * 3] = (dst[dstride * 3] + src[sstride * 3] + 1) >> 1;
+ dst[dstride * 3 + 1] = (dst[dstride * 3 + 1] + src[sstride * 3 + 1] + 1) >> 1;
+ }
+}
+fn avg_4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) {
+ avg(dst, dstride, src, sstride, 4, bh);
+}
+fn avg_8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) {
+ avg(dst, dstride, src, sstride, 8, bh);
+}
+fn avg_16(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, bh: usize) {
+ avg(dst, dstride, src, sstride, 16, bh);
+}
+
+fn put_block_weighted_10(dst: &mut [u16], stride: usize, src: &[u16], w: usize, h: usize, wparams: [i8; 3]) {
+ let weight = i32::from(wparams[0]);
+ let offset = i32::from(wparams[1]) << (10 - 8);
+ let wshift = wparams[2] as u8;
+ let bias = (1 << wshift) >> 1;
+
+ for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks_exact(16)).take(h) {
+ for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) {
+ *dst = clip_10(((i32::from(src) * weight + bias) >> wshift) + offset);
+ }
+ }
+}
+
+fn put_blk_w_2_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) {
+ put_block_weighted_10(dst, stride, src, 2, h, wparams);
+}
+fn put_blk_w_4_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) {
+ put_block_weighted_10(dst, stride, src, 4, h, wparams);
+}
+fn put_blk_w_8_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) {
+ put_block_weighted_10(dst, stride, src, 8, h, wparams);
+}
+fn put_blk_w_16_10(dst: &mut [u16], stride: usize, src: &[u16], h: usize, wparams: [i8; 3]) {
+ put_block_weighted_10(dst, stride, src, 16, h, wparams);
+}
+
+fn put_block_weighted2_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], w: usize, h: usize, wparams: [i8; 5]) {
+ let weight0 = i32::from(wparams[0]);
+ let offset0 = i32::from(wparams[1]) << (10 - 8);
+ let weight1 = i32::from(wparams[2]);
+ let offset1 = i32::from(wparams[3]) << (10 - 8);
+ let wshift = (wparams[4] as u8) + 1;
+ let offset = (offset0 + offset1 + 1) >> 1;
+ let bias = (1 << wshift) >> 1;
+
+ for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks_exact(16).zip(src1.chunks_exact(16))).take(h) {
+ for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) {
+ *dst = clip_10(((i32::from(src0) * weight0 + i32::from(src1) * weight1 + bias) >> wshift) + offset);
+ }
+ }
+}
+
+fn put_blk_w2_2_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) {
+ let weight0 = i32::from(wparams[0]);
+ let offset0 = i32::from(wparams[1]);
+ let weight1 = i32::from(wparams[2]);
+ let offset1 = i32::from(wparams[3]);
+ let wshift = (wparams[4] as u8) + 1;
+ let offset = (offset0 + offset1 + 1) >> 1;
+ let bias = (1 << wshift) >> 1;
+
+ let _ = src0[16 + 1];
+ let _ = src1[16 + 1];
+ let _ = dst[stride + 1];
+ dst[0] = clip_10(((i32::from(src0[ 0]) * weight0 + i32::from(src1[ 0]) * weight1 + bias) >> wshift) + offset);
+ dst[1] = clip_10(((i32::from(src0[ 1]) * weight0 + i32::from(src1[ 1]) * weight1 + bias) >> wshift) + offset);
+ dst[stride] = clip_10(((i32::from(src0[16]) * weight0 + i32::from(src1[16]) * weight1 + bias) >> wshift) + offset);
+ dst[stride + 1] = clip_10(((i32::from(src0[17]) * weight0 + i32::from(src1[17]) * weight1 + bias) >> wshift) + offset);
+ if h == 4 {
+ let _ = src0[16 * 3 + 1];
+ let _ = src1[16 * 3 + 1];
+ let _ = dst[stride * 3 + 1];
+ dst[stride * 2] = clip_10(((i32::from(src0[32]) * weight0 + i32::from(src1[32]) * weight1 + bias) >> wshift) + offset);
+ dst[stride * 2 + 1] = clip_10(((i32::from(src0[33]) * weight0 + i32::from(src1[33]) * weight1 + bias) >> wshift) + offset);
+ dst[stride * 3] = clip_10(((i32::from(src0[48]) * weight0 + i32::from(src1[48]) * weight1 + bias) >> wshift) + offset);
+ dst[stride * 3 + 1] = clip_10(((i32::from(src0[49]) * weight0 + i32::from(src1[49]) * weight1 + bias) >> wshift) + offset);
+ }
+}
+fn put_blk_w2_4_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) {
+ put_block_weighted2_10(dst, stride, src0, src1, 4, h, wparams);
+}
+fn put_blk_w2_8_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) {
+ put_block_weighted2_10(dst, stride, src0, src1, 8, h, wparams);
+}
+fn put_blk_w2_16_10(dst: &mut [u16], stride: usize, src0: &[u16], src1: &[u16], h: usize, wparams: [i8; 5]) {
+ put_block_weighted2_10(dst, stride, src0, src1, 16, h, wparams);
+}
--- /dev/null
+const TMP_BUF_STRIDE: usize = 32;
+
+fn interp_block1(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool, clip: fn (i32) -> u16) {
+ unsafe {
+ let step = if hor { 1 } else { sstride };
+ let avgidx = if avg0 { step * 2 } else { step * 3 };
+ let mut src = src.as_ptr();
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ for _ in 0..w {
+ let t = (clip)(( i32::from(*src)
+ - 5 * i32::from(*src.add(step))
+ + 20 * i32::from(*src.add(step * 2))
+ + 20 * i32::from(*src.add(step * 3))
+ - 5 * i32::from(*src.add(step * 4))
+ + i32::from(*src.add(step * 5))
+ + 16) >> 5);
+ *dst = (t + *src.add(avgidx) + 1) >> 1;
+ src = src.add(1);
+ dst = dst.add(1);
+ }
+ dst = dst.sub(w).add(dstride);
+ src = src.sub(w).add(sstride);
+ }
+ }
+}
+
+fn interp_block2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, hor: bool, clip: fn (i32) -> u16) {
+ unsafe {
+ let step = if hor { 1 } else { sstride };
+ let mut pix = dst.as_mut_ptr();
+ let mut src = src.as_ptr();
+ for _ in 0..h {
+ for x in 0..w {
+ *pix.add(x) = (clip)(( i32::from(*src)
+ - 5 * i32::from(*src.add(step))
+ + 20 * i32::from(*src.add(step * 2))
+ + 20 * i32::from(*src.add(step * 3))
+ - 5 * i32::from(*src.add(step * 4))
+ + i32::from(*src.add(step * 5))
+ + 16) >> 5);
+ src = src.add(1);
+ }
+ pix = pix.add(dstride);
+ src = src.sub(w);
+ src = src.add(sstride);
+ }
+ }
+}
+
+fn mc_avg_tmp(dst: &mut [u16], dstride: usize, w: usize, h: usize, tmp: &[u16], tmp2: &[u16]) {
+ unsafe {
+ let mut src1 = tmp.as_ptr();
+ let mut src2 = tmp2.as_ptr();
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ for x in 0..w {
+ let a = *src1.add(x);
+ let b = *src2.add(x);
+ *dst.add(x) = (a + b + 1) >> 1;
+ }
+ dst = dst.add(dstride);
+ src1 = src1.add(TMP_BUF_STRIDE);
+ src2 = src2.add(TMP_BUF_STRIDE);
+ }
+ }
+}
+
+fn h264_mc00(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, _clip: fn(i32) -> u16) {
+ unsafe {
+ let mut src = src.as_ptr();
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ std::ptr::copy_nonoverlapping(src, dst, w);
+ src = src.add(sstride);
+ dst = dst.add(dstride);
+ }
+ }
+}
+
+fn h264_mc01(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true, clip);
+}
+
+fn h264_mc02(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true, clip);
+}
+
+fn h264_mc03(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false, clip);
+}
+
+fn h264_mc10(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true, clip);
+}
+
+fn h264_mc11(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc12(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc13(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc20(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block2(dst, dstride, &src[2..], sstride, w, h, false, clip);
+}
+
+fn h264_mc21(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc22(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp: [i32; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ unsafe {
+ let mut src = src.as_ptr();
+ let mut dst = tmp.as_mut_ptr();
+ for _ in 0..h {
+ for _ in 0..w+5 {
+ *dst = i32::from(*src)
+ - 5 * i32::from(*src.add(sstride))
+ + 20 * i32::from(*src.add(sstride * 2))
+ + 20 * i32::from(*src.add(sstride * 3))
+ - 5 * i32::from(*src.add(sstride * 4))
+ + i32::from(*src.add(sstride * 5));
+ dst = dst.add(1);
+ src = src.add(1);
+ }
+ src = src.sub(w+5).add(sstride);
+ dst = dst.sub(w+5).add(TMP_BUF_STRIDE);
+ }
+ }
+ unsafe {
+ let mut dst = dst.as_mut_ptr();
+ let mut src = tmp.as_ptr();
+ for _ in 0..h {
+ for _ in 0..w {
+ *dst = (clip)((*src - 5 * *src.add(1) + 20 * *src.add(2) + 20 * *src.add(3) - 5 * *src.add(4) + *src.add(5) + 512) >> 10);
+ dst = dst.add(1);
+ src = src.add(1);
+ }
+ dst = dst.sub(w).add(dstride);
+ src = src.sub(w).add(TMP_BUF_STRIDE);
+ }
+ }
+}
+
+fn h264_mc23(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc30(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false, clip);
+}
+
+fn h264_mc31(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc32(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h, clip);
+ h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+fn h264_mc33(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, w: usize, h: usize, clip: fn (i32) -> u16) {
+ let mut tmp : [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ let mut tmp2: [u16; TMP_BUF_STRIDE * 16] = unsafe { let arr = std::mem::MaybeUninit::uninit(); arr.assume_init() };
+ h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h, clip);
+ h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h, clip);
+ mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
+}
+
+
+fn chroma_interp(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) {
+ let a0 = 8 - dx;
+ let a1 = dx;
+ let b0 = 8 - dy;
+ let b1 = dy;
+
+ if a0 == 8 && b0 == 8 {
+ unsafe {
+ let mut src = src.as_ptr();
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ std::ptr::copy_nonoverlapping(src, dst, w);
+ src = src.add(sstride);
+ dst = dst.add(dstride);
+ }
+ }
+ } else if a0 == 8 {
+ unsafe {
+ let mut src0 = src.as_ptr();
+ let mut src1 = src0.add(sstride);
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ for x in 0..w {
+ let a = *src0.add(x);
+ let b = *src1.add(x);
+ *dst.add(x) = (a * b0 + b * b1 + 4) >> 3;
+ }
+ src0 = src0.add(sstride);
+ src1 = src1.add(sstride);
+ dst = dst.add(dstride);
+ }
+ }
+ } else if b0 == 8 {
+ unsafe {
+ let mut src = src.as_ptr();
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ let mut a = *src;
+ for x in 0..w {
+ let b = *src.add(x + 1);
+ *dst.add(x) = (a * a0 + b * a1 + 4) >> 3;
+ a = b;
+ }
+ src = src.add(sstride);
+ dst = dst.add(dstride);
+ }
+ }
+ } else {
+ unsafe {
+ let mut src0 = src.as_ptr();
+ let mut src1 = src0.add(sstride);
+ let mut dst = dst.as_mut_ptr();
+ for _ in 0..h {
+ let mut a = *src0;
+ let mut c = *src1;
+ for x in 0..w {
+ let b = *src0.add(x + 1);
+ let d = *src1.add(x + 1);
+ *dst.add(x) = ((u32::from(a) * u32::from(a0 * b0) + u32::from(b) * u32::from(a1 * b0) + u32::from(c) * u32::from(a0 * b1) + u32::from(d) * u32::from(a1 * b1) + 0x20) >> 6) as u16;
+ a = b;
+ c = d;
+ }
+ src0 = src0.add(sstride);
+ src1 = src1.add(sstride);
+ dst = dst.add(dstride);
+ }
+ }
+ }
+}
+
+pub fn chroma_interp_8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) {
+ chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h);
+}
+
+pub fn chroma_interp_4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) {
+ chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h);
+}
+
+pub fn chroma_interp_2(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, dx: u16, dy: u16, h: usize) {
+ chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h);
+}
+
+macro_rules! luma_mc {
+ ($orig:ident, $func4:ident, $func8:ident, $func16:ident, $clip:expr) => {
+ fn $func4(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) {
+ $orig(dst, dstride, src, sstride, 4, h, $clip);
+ }
+ fn $func8(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) {
+ $orig(dst, dstride, src, sstride, 8, h, $clip);
+ }
+ fn $func16(dst: &mut [u16], dstride: usize, src: &[u16], sstride: usize, h: usize) {
+ $orig(dst, dstride, src, sstride, 16, h, $clip);
+ }
+ }
+}
+
+luma_mc!(h264_mc00, h264_mc00_4_10, h264_mc00_8_10, h264_mc00_16_10, super::clip_10);
+luma_mc!(h264_mc01, h264_mc01_4_10, h264_mc01_8_10, h264_mc01_16_10, super::clip_10);
+luma_mc!(h264_mc02, h264_mc02_4_10, h264_mc02_8_10, h264_mc02_16_10, super::clip_10);
+luma_mc!(h264_mc03, h264_mc03_4_10, h264_mc03_8_10, h264_mc03_16_10, super::clip_10);
+luma_mc!(h264_mc10, h264_mc10_4_10, h264_mc10_8_10, h264_mc10_16_10, super::clip_10);
+luma_mc!(h264_mc11, h264_mc11_4_10, h264_mc11_8_10, h264_mc11_16_10, super::clip_10);
+luma_mc!(h264_mc12, h264_mc12_4_10, h264_mc12_8_10, h264_mc12_16_10, super::clip_10);
+luma_mc!(h264_mc13, h264_mc13_4_10, h264_mc13_8_10, h264_mc13_16_10, super::clip_10);
+luma_mc!(h264_mc20, h264_mc20_4_10, h264_mc20_8_10, h264_mc20_16_10, super::clip_10);
+luma_mc!(h264_mc21, h264_mc21_4_10, h264_mc21_8_10, h264_mc21_16_10, super::clip_10);
+luma_mc!(h264_mc22, h264_mc22_4_10, h264_mc22_8_10, h264_mc22_16_10, super::clip_10);
+luma_mc!(h264_mc23, h264_mc23_4_10, h264_mc23_8_10, h264_mc23_16_10, super::clip_10);
+luma_mc!(h264_mc30, h264_mc30_4_10, h264_mc30_8_10, h264_mc30_16_10, super::clip_10);
+luma_mc!(h264_mc31, h264_mc31_4_10, h264_mc31_8_10, h264_mc31_16_10, super::clip_10);
+luma_mc!(h264_mc32, h264_mc32_4_10, h264_mc32_8_10, h264_mc32_16_10, super::clip_10);
+luma_mc!(h264_mc33, h264_mc33_4_10, h264_mc33_8_10, h264_mc33_16_10, super::clip_10);
+
+pub const H264_LUMA_INTERP_10: [[super::MCFunc; 16]; 3] = [
+ [
+ h264_mc00_4_10, h264_mc01_4_10, h264_mc02_4_10, h264_mc03_4_10,
+ h264_mc10_4_10, h264_mc11_4_10, h264_mc12_4_10, h264_mc13_4_10,
+ h264_mc20_4_10, h264_mc21_4_10, h264_mc22_4_10, h264_mc23_4_10,
+ h264_mc30_4_10, h264_mc31_4_10, h264_mc32_4_10, h264_mc33_4_10
+ ], [
+ h264_mc00_8_10, h264_mc01_8_10, h264_mc02_8_10, h264_mc03_8_10,
+ h264_mc10_8_10, h264_mc11_8_10, h264_mc12_8_10, h264_mc13_8_10,
+ h264_mc20_8_10, h264_mc21_8_10, h264_mc22_8_10, h264_mc23_8_10,
+ h264_mc30_8_10, h264_mc31_8_10, h264_mc32_8_10, h264_mc33_8_10
+ ], [
+ h264_mc00_16_10, h264_mc01_16_10, h264_mc02_16_10, h264_mc03_16_10,
+ h264_mc10_16_10, h264_mc11_16_10, h264_mc12_16_10, h264_mc13_16_10,
+ h264_mc20_16_10, h264_mc21_16_10, h264_mc22_16_10, h264_mc23_16_10,
+ h264_mc30_16_10, h264_mc31_16_10, h264_mc32_16_10, h264_mc33_16_10
+ ]
+];
+
+impl super::RegisterSIMD for super::H264MC {
+ fn register_simd(&mut self) {}
+}
--- /dev/null
+#[allow(unexpected_cfgs)]
+mod mc;
+pub use mc::{H264MC, McBlock};
+//#[cfg(target_arch="x86_64")]
+//use std::arch::asm;
+
+pub const CHROMA_QUANTS: [u8; 52] = [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30,
+ 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38,
+ 39, 39, 39, 39
+];
+
+pub const CHROMA_DC_SCAN: [usize; 4] = [ 0, 1, 2, 3];
+pub const ZIGZAG: [usize; 16] = [
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+];
+pub const ZIGZAG1: [usize; 15] = [
+ 0, 3, 7, 4, 1, 2, 5, 8, 11, 12, 9, 6, 10, 13, 14
+];
+/*pub const IL_SCAN: [usize; 16] = [
+ 0, 4, 1, 8, 12, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
+];*/
+pub const ZIGZAG8X8: [usize; 64] = [
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+];
+
+const LEVEL_SCALE: [[i16; 6]; 3] = [
+ [ 10, 11, 13, 14, 16, 18 ],
+ [ 16, 18, 20, 23, 25, 29 ],
+ [ 13, 14, 16, 18, 20, 23 ]
+];
+
+pub fn chroma_dc_transform(blk: &mut [i16; 4], qp: u8) {
+ let t0 = blk[0] + blk[2];
+ let t1 = blk[0] - blk[2];
+ let t2 = blk[1] + blk[3];
+ let t3 = blk[1] - blk[3];
+ blk[0] = t0 + t2;
+ blk[1] = t0 - t2;
+ blk[2] = t1 + t3;
+ blk[3] = t1 - t3;
+ if qp < 6 {
+ let mul = LEVEL_SCALE[0][qp as usize];
+ for el in blk.iter_mut() {
+ *el = el.wrapping_mul(mul) >> 1;
+ }
+ } else {
+ let mul = LEVEL_SCALE[0][(qp % 6) as usize];
+ let shift = qp / 6 - 1;
+ for el in blk.iter_mut() {
+ *el = el.wrapping_mul(mul) << shift;
+ }
+ }
+}
+
+macro_rules! transform {
+ (luma_dc; $a: expr, $b: expr, $c: expr, $d: expr) => ({
+ let t0 = $a.wrapping_add($c);
+ let t1 = $a.wrapping_sub($c);
+ let t2 = $b.wrapping_add($d);
+ let t3 = $b.wrapping_sub($d);
+ $a = t0.wrapping_add(t2);
+ $b = t1.wrapping_add(t3);
+ $c = t1.wrapping_sub(t3);
+ $d = t0.wrapping_sub(t2);
+ });
+ ($a: expr, $b: expr, $c: expr, $d: expr, $shift: expr) => ({
+ let t0 = $a.wrapping_add($c);
+ let t1 = $a.wrapping_sub($c);
+ let t2 = ($b >> 1).wrapping_sub($d);
+ let t3 = $b.wrapping_add($d >> 1);
+ let bias = 1 << $shift >> 1;
+ $a = t0.wrapping_add(t3).wrapping_add(bias) >> $shift;
+ $b = t1.wrapping_add(t2).wrapping_add(bias) >> $shift;
+ $c = t1.wrapping_sub(t2).wrapping_add(bias) >> $shift;
+ $d = t0.wrapping_sub(t3).wrapping_add(bias) >> $shift;
+ });
+ ($a: expr, $b: expr, $c: expr, $d: expr, $e: expr, $f: expr, $g: expr, $h: expr) => {
+ let e0 = $a + $e;
+ let e1 = -$d + $f - $h - ($h >> 1);
+ let e2 = $a - $e;
+ let e3 = $b + $h - $d - ($d >> 1);
+ let e4 = ($c >> 1) - $g;
+ let e5 = -$b + $h + $f + ($f >> 1);
+ let e6 = $c + ($g >> 1);
+ let e7 = $d + $f + $b + ($b >> 1);
+
+ let f0 = e0 + e6;
+ let f1 = e1 + (e7 >> 2);
+ let f2 = e2 + e4;
+ let f3 = e3 + (e5 >> 2);
+ let f4 = e2 - e4;
+ let f5 = (e3 >> 2) - e5;
+ let f6 = e0 - e6;
+ let f7 = e7 - (e1 >> 2);
+
+ $a = f0 + f7;
+ $b = f2 + f5;
+ $c = f4 + f3;
+ $d = f6 + f1;
+ $e = f6 - f1;
+ $f = f4 - f3;
+ $g = f2 - f5;
+ $h = f0 - f7;
+ };
+}
+
+pub fn idct_luma_dc(blk: &mut [i16; 16], qp: u8) {
+ if qp < 12 {
+ let mul = LEVEL_SCALE[0][(qp % 6) as usize];
+ let shift = 2 - qp / 6;
+ let bias = 1 << shift >> 1;
+ for el in blk.iter_mut() {
+ *el = el.wrapping_mul(mul).wrapping_add(bias) >> shift;
+ }
+ } else {
+ let mul = LEVEL_SCALE[0][(qp % 6) as usize];
+ let shift = qp / 6 - 2;
+ for el in blk.iter_mut() {
+ *el = el.wrapping_mul(mul) << shift;
+ }
+ }
+ for i in 0..4 {
+ transform!(luma_dc; blk[i], blk[i + 4], blk[i + 8], blk[i + 12]);
+ }
+ for row in blk.chunks_exact_mut(4) {
+ transform!(luma_dc; row[0], row[1], row[2], row[3]);
+ }
+}
+
+pub fn idct_skip_dc(blk: &mut [i16; 16], qp: u8) {
+ const BLK_INDEX: [usize; 16] = [
+ 0, 2, 0, 2,
+ 2, 1, 2, 1,
+ 0, 2, 0, 2,
+ 2, 1, 2, 1
+ ];
+ let qidx = (qp % 6) as usize;
+ let shift = qp / 6;
+ for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()).skip(1) {
+ *el = (*el * LEVEL_SCALE[idx][qidx]) << shift;
+ }
+ for row in blk.chunks_exact_mut(4) {
+ transform!(row[0], row[1], row[2], row[3], 0);
+ }
+ for i in 0..4 {
+ transform!(blk[i], blk[i + 4], blk[i + 8], blk[i + 12], 6);
+ }
+}
+
+pub fn idct(blk: &mut [i16; 16], qp: u8) {
+ const BLK_INDEX: [usize; 16] = [
+ 0, 2, 0, 2,
+ 2, 1, 2, 1,
+ 0, 2, 0, 2,
+ 2, 1, 2, 1
+ ];
+ let qidx = (qp % 6) as usize;
+ let shift = qp / 6;
+ for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()) {
+ *el = (*el * LEVEL_SCALE[idx][qidx]) << shift;
+ }
+ for row in blk.chunks_exact_mut(4) {
+ transform!(row[0], row[1], row[2], row[3], 0);
+ }
+ for i in 0..4 {
+ transform!(blk[i], blk[i + 4], blk[i + 8], blk[i + 12], 6);
+ }
+}
+
+pub fn idct_dc(blk: &mut [i16; 16], qp: u8, quant_dc: bool) {
+ let dc = if quant_dc {
+ (blk[0] * LEVEL_SCALE[0][(qp % 6) as usize]) << (qp / 6)
+ } else {
+ blk[0]
+ };
+ *blk = [(dc + 0x20) >> 6; 16];
+}
+
+const QMAT_8X8: [[u8; 16]; 6] = [
+ [
+ 20, 19, 25, 19,
+ 19, 18, 24, 18,
+ 25, 24, 32, 24,
+ 19, 18, 24, 18
+ ], [
+ 22, 21, 28, 21,
+ 21, 19, 26, 19,
+ 28, 26, 35, 26,
+ 21, 19, 26, 19
+ ], [
+ 26, 24, 33, 24,
+ 24, 23, 31, 23,
+ 33, 31, 42, 31,
+ 24, 23, 31, 23
+ ], [
+ 28, 26, 35, 26,
+ 26, 25, 33, 25,
+ 35, 33, 45, 33,
+ 26, 25, 33, 25
+ ], [
+ 32, 30, 40, 30,
+ 30, 28, 38, 28,
+ 40, 38, 51, 38,
+ 30, 28, 38, 28
+ ], [
+ 36, 34, 46, 34,
+ 34, 32, 43, 32,
+ 46, 43, 58, 43,
+ 34, 32, 43, 32
+ ]
+];
+
+pub fn dequant8x8(blk: &mut [i16; 64], slist: &[u8; 64]) {
+ for (el, &scan) in blk.iter_mut().zip(ZIGZAG8X8.iter()) {
+ if *el != 0 {
+ *el = el.wrapping_mul(i16::from(slist[scan]));
+ }
+ }
+}
+
+pub fn idct8x8(blk: &mut [i16; 64], qp: u8) {
+ let mut tmp = [0i32; 64];
+ let qmat = &QMAT_8X8[(qp % 6) as usize];
+ if qp >= 36 {
+ let shift = qp / 6 - 6;
+ for (i, (dst, &src)) in tmp.iter_mut().zip(blk.iter()).enumerate() {
+ let x = i & 7;
+ let y = i >> 3;
+ let idx = (x & 3) + (y & 3) * 4;
+ *dst = i32::from(src).wrapping_mul(i32::from(qmat[idx])) << shift;
+ }
+ } else {
+ let shift = 6 - qp / 6;
+ let bias = (1 << shift) >> 1;
+ for (i, (dst, &src)) in tmp.iter_mut().zip(blk.iter()).enumerate() {
+ let x = i & 7;
+ let y = i >> 3;
+ let idx = (x & 3) + (y & 3) * 4;
+ *dst = i32::from(src).wrapping_mul(i32::from(qmat[idx])).wrapping_add(bias) >> shift;
+ }
+ }
+ for row in tmp.chunks_exact_mut(8) {
+ transform!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]);
+ }
+ for col in 0..8 {
+ transform!(tmp[col], tmp[col + 8], tmp[col + 8 * 2], tmp[col + 8 * 3],
+ tmp[col + 8 * 4], tmp[col + 8 * 5], tmp[col + 8 * 6], tmp[col + 8 * 7]);
+ }
+ for (dst, &src) in blk.iter_mut().zip(tmp.iter()) {
+ *dst = ((src + 0x20) >> 6) as i16;
+ }
+}
+
+macro_rules! depth_funcs {
+ ($name_add:ident, $name_add8:ident, $clip_name:ident,
+ $name_dc:ident, $dc4:ident, $dc8y:ident, $dc8c:ident, $dc16:ident,
+ $plane8:ident, $plane16:ident, $val:expr) => {
+ fn $name_add(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16]) {
+ let out = &mut dst[offset..][..stride * 3 + 4];
+ for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks_exact(4)) {
+ for (dst, src) in line.iter_mut().take(4).zip(src.iter()) {
+ *dst = ((*dst as i16) + *src).clamp(0, $val * 2 - 1) as u16;
+ }
+ }
+ }
+ fn $name_add8(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16; 64]) {
+ let out = &mut dst[offset..];
+ for (line, src) in out.chunks_mut(stride).take(8).zip(coeffs.chunks_exact(8)) {
+ for (dst, src) in line.iter_mut().take(8).zip(src.iter()) {
+ *dst = ((*dst as i16) + *src).clamp(0, $val * 2 - 1) as u16;
+ }
+ }
+ }
+ fn $clip_name(val: i16) -> u16 { val.clamp(0, $val * 2 - 1) as u16 }
+ fn $name_dc(buf: &mut [u16], stride: usize, bsize: usize) {
+ for row in buf.chunks_mut(stride).take(bsize) {
+ for el in row[..bsize].iter_mut() {
+ *el = $val;
+ }
+ }
+ }
+ fn $dc4(buf: &mut [u16], stride: usize, _top: &[u16], _left: &[u16], _tr: &[u16]) {
+ $name_dc(buf, stride, 4);
+ }
+ fn $dc8y(buf: &mut [u16], stride: usize, _ctx: &IPred8Context) {
+ $name_dc(buf, stride, 8);
+ }
+ fn $dc8c(buf: &mut [u16], stride: usize, _top: &[u16], _left: &[u16]) {
+ $name_dc(buf, stride, 8);
+ }
+ fn $dc16(buf: &mut [u16], stride: usize, _top: &[u16], _left: &[u16]) {
+ $name_dc(buf, stride, 16);
+ }
+ fn $plane8(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) {
+ let mut h: i32 = 4 * (i32::from(top[7]) - i32::from(left[0]));
+ let mut v: i32 = 4 * (i32::from(left[8]) - i32::from(left[0]));
+ for i in 0..3 {
+ let i1 = (i + 1) as i32;
+ h += i1 * (i32::from(top[4 + i]) - i32::from(top[2 - i]));
+ v += i1 * (i32::from(left[5 + i]) - i32::from(left[3 - i]));
+ }
+ let b = (17 * h + 16) >> 5;
+ let c = (17 * v + 16) >> 5;
+ let mut a = 16 * (i32::from(left[8]) + i32::from(top[7])) - 3 * (b + c) + 16;
+ for line in buf.chunks_mut(stride).take(8) {
+ let mut acc = a;
+ for el in line.iter_mut().take(8) {
+ *el = $clip_name((acc >> 5) as i16);
+ acc += b;
+ }
+ a += c;
+ }
+ }
+ fn $plane16(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) {
+ let mut h = 8 * (i32::from(top[15]) - i32::from(left[0]));
+ let mut v = 8 * (i32::from(left[16]) - i32::from(left[0]));
+ for k in 0..7 {
+ h += ((k as i32) + 1) * (i32::from(top[8 + k]) - i32::from(top[6 - k]));
+ v += ((k as i32) + 1) * (i32::from(left[9 + k]) - i32::from(left[7 - k]));
+ }
+
+ h = (5 * h + 32) >> 6;
+ v = (5 * v + 32) >> 6;
+
+ let mut a = 16 * (i32::from(left[16]) + i32::from(top[15]) + 1) - 7 * (v + h);
+
+ for row in buf.chunks_mut(stride).take(16) {
+ let mut b = a;
+ a += v;
+
+ for dst in row.chunks_exact_mut(4).take(4) {
+ dst[0] = $clip_name(((b ) >> 5) as i16);
+ dst[1] = $clip_name(((b + h) >> 5) as i16);
+ dst[2] = $clip_name(((b + 2*h) >> 5) as i16);
+ dst[3] = $clip_name(((b + 3*h) >> 5) as i16);
+ b += h * 4;
+ }
+ }
+ }
+ }
+}
+
+depth_funcs!(add_coeffs_10, add_coeffs8_10, clip10,
+ ipred_dc128_10, ipred_4x4_dc128_10, ipred_y_8x8_dc128_10, ipred_8x8_dc128_10, ipred_16x16_dc128_10,
+ ipred_8x8_plane_10, ipred_16x16_plane_10, 0x200);
+
+fn ipred_ver(buf: &mut [u16], stride: usize, top: &[u16], bsize: usize) {
+ for row in buf.chunks_mut(stride).take(bsize) {
+ row[..bsize].copy_from_slice(&top[..bsize]);
+ }
+}
+fn ipred_hor(buf: &mut [u16], stride: usize, left: &[u16], bsize: usize) {
+ for (row, &left) in buf.chunks_mut(stride).zip(left[1..].iter()).take(bsize) {
+ for el in row[..bsize].iter_mut() {
+ *el = left;
+ }
+ }
+}
+fn ipred_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], bsize: usize, shift: u8) {
+ let mut adc: u16 = 0;
+ for i in 0..bsize { adc += top[i]; }
+ for i in 0..bsize { adc += left[i + 1]; }
+ let dc = (adc + (1 << (shift - 1))) >> shift;
+
+ for row in buf.chunks_mut(stride).take(bsize) {
+ for el in row[..bsize].iter_mut() {
+ *el = dc;
+ }
+ }
+}
+fn ipred_left_dc(buf: &mut [u16], stride: usize, left: &[u16], bsize: usize, shift: u8) {
+ let mut adc: u16 = 0;
+ for i in 0..bsize { adc += left[i + 1]; }
+ let dc = (adc + (1 << (shift - 1))) >> shift;
+
+ for row in buf.chunks_mut(stride).take(bsize) {
+ for el in row[..bsize].iter_mut() {
+ *el = dc;
+ }
+ }
+}
+fn ipred_top_dc(buf: &mut [u16], stride: usize, top: &[u16], bsize: usize, shift: u8) {
+ let mut adc: u16 = 0;
+ for i in 0..bsize { adc += top[i]; }
+ let dc = (adc + (1 << (shift - 1))) >> shift;
+
+ for row in buf.chunks_mut(stride).take(bsize) {
+ for el in row[..bsize].iter_mut() {
+ *el = dc;
+ }
+ }
+}
+
+fn load(dst: &mut [u16], src: &[u16]) {
+ for (dst, &src) in dst.iter_mut().zip(src.iter()) {
+ *dst = src;
+ }
+}
+
+fn ipred_4x4_ver(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], _tr: &[u16]) {
+ ipred_ver(buf, stride, top, 4);
+}
+fn ipred_4x4_hor(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16], _tr: &[u16]) {
+ ipred_hor(buf, stride, left, 4);
+}
+fn ipred_4x4_diag_down_left(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], tr: &[u16]) {
+ let mut t: [u16; 9] = [0; 9];
+ load(&mut t[..4], top);
+ load(&mut t[4..8], tr);
+ t[8] = t[7];
+
+ for i in 0..4 {
+ buf[i] = (t[i] + 2 * t[i + 1] + t[i + 2] + 2) >> 2;
+ }
+ let dst = &mut buf[stride..];
+ for i in 0..4 {
+ dst[i] = (t[i + 1] + 2 * t[i + 2] + t[i + 3] + 2) >> 2;
+ }
+ let dst = &mut buf[stride * 2..];
+ for i in 0..4 {
+ dst[i] = (t[i + 2] + 2 * t[i + 3] + t[i + 4] + 2) >> 2;
+ }
+ let dst = &mut buf[stride * 3..];
+ for i in 0..4 {
+ dst[i] = (t[i + 3] + 2 * t[i + 4] + t[i + 5] + 2) >> 2;
+ }
+}
+fn ipred_4x4_diag_down_right(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) {
+ let mut t: [u16; 5] = [0; 5];
+ t[0] = left[0];
+ load(&mut t[1..], top);
+ let mut l: [u16; 5] = [0; 5];
+ load(&mut l, left);
+ let dst = buf;
+
+ for j in 0..4 {
+ for i in 0..j {
+ dst[i + j * stride] = (l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2;
+ }
+ dst[j + j * stride] = (l[1] + 2 * l[0] + t[1] + 2) >> 2;
+ for i in (j+1)..4 {
+ dst[i + j * stride] = (t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2;
+ }
+ }
+}
+fn ipred_4x4_ver_right(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) {
+ let mut t: [u16; 5] = [0; 5];
+ t[0] = left[0];
+ load(&mut t[1..], top);
+ let mut l: [u16; 5] = [0; 5];
+ load(&mut l, left);
+ let dst = buf;
+
+ for j in 0..4 {
+ for i in 0..4 {
+ let zvr = ((2 * i) as i8) - (j as i8);
+ let pix;
+ if zvr >= 0 {
+ if (zvr & 1) == 0 {
+ pix = (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1;
+ } else {
+ pix = (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2;
+ }
+ } else {
+ if zvr == -1 {
+ pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2;
+ } else {
+ pix = (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2;
+ }
+ }
+ dst[i + j * stride] = pix;
+ }
+ }
+}
+fn ipred_4x4_ver_left(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], tr: &[u16]) {
+ let mut t: [u16; 8] = [0; 8];
+ load(&mut t[..4], top);
+ load(&mut t[4..], tr);
+ let dst = buf;
+
+ dst[0 + 0 * stride] = (t[0] + t[1] + 1) >> 1;
+ let pix = (t[1] + t[2] + 1) >> 1;
+ dst[1 + 0 * stride] = pix;
+ dst[0 + 2 * stride] = pix;
+ let pix = (t[2] + t[3] + 1) >> 1;
+ dst[2 + 0 * stride] = pix;
+ dst[1 + 2 * stride] = pix;
+ let pix = (t[3] + t[4] + 1) >> 1;
+ dst[3 + 0 * stride] = pix;
+ dst[2 + 2 * stride] = pix;
+ dst[3 + 2 * stride] = (t[4] + t[5] + 1) >> 1;
+ dst[0 + 1 * stride] = (t[0] + 2*t[1] + t[2] + 2) >> 2;
+ let pix = (t[1] + 2*t[2] + t[3] + 2) >> 2;
+ dst[1 + 1 * stride] = pix;
+ dst[0 + 3 * stride] = pix;
+ let pix = (t[2] + 2*t[3] + t[4] + 2) >> 2;
+ dst[2 + 1 * stride] = pix;
+ dst[1 + 3 * stride] = pix;
+ let pix = (t[3] + 2*t[4] + t[5] + 2) >> 2;
+ dst[3 + 1 * stride] = pix;
+ dst[2 + 3 * stride] = pix;
+ dst[3 + 3 * stride] = (t[4] + 2*t[5] + t[6] + 2) >> 2;
+}
+fn ipred_4x4_hor_down(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) {
+ let mut t: [u16; 5] = [0; 5];
+ t[0] = left[0];
+ load(&mut t[1..], top);
+ let mut l: [u16; 5] = [0; 5];
+ load(&mut l, left);
+ let dst = buf;
+
+ for j in 0..4 {
+ for i in 0..4 {
+ let zhd = ((2 * j) as i8) - (i as i8);
+ let pix;
+ if zhd >= 0 {
+ if (zhd & 1) == 0 {
+ pix = (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1;
+ } else {
+ pix = (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2;
+ }
+ } else {
+ if zhd == -1 {
+ pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2;
+ } else {
+ pix = (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2;
+ }
+ }
+ dst[i + j * stride] = pix;
+ }
+ }
+}
+fn ipred_4x4_hor_up(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16], _tr: &[u16]) {
+ let mut l: [u16; 8] = [0; 8];
+ load(&mut l, &left[1..]);
+ let dst = buf;
+
+ dst[0 + 0 * stride] = (l[0] + l[1] + 1) >> 1;
+ dst[1 + 0 * stride] = (l[0] + 2*l[1] + l[2] + 2) >> 2;
+ let pix = (l[1] + l[2] + 1) >> 1;
+ dst[2 + 0 * stride] = pix;
+ dst[0 + 1 * stride] = pix;
+ let pix = (l[1] + 2*l[2] + l[3] + 2) >> 2;
+ dst[3 + 0 * stride] = pix;
+ dst[1 + 1 * stride] = pix;
+ let pix = (l[2] + l[3] + 1) >> 1;
+ dst[2 + 1 * stride] = pix;
+ dst[0 + 2 * stride] = pix;
+ let pix = (l[2] + 3*l[3] + 2) >> 2;
+ dst[3 + 1 * stride] = pix;
+ dst[1 + 2 * stride] = pix;
+ dst[3 + 2 * stride] = l[3];
+ dst[1 + 3 * stride] = l[3];
+ dst[0 + 3 * stride] = l[3];
+ dst[2 + 2 * stride] = l[3];
+ dst[2 + 3 * stride] = l[3];
+ dst[3 + 3 * stride] = l[3];
+}
+fn ipred_4x4_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], _tr: &[u16]) {
+ ipred_dc(buf, stride, top, left, 4, 3);
+}
+fn ipred_4x4_left_dc(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16], _tr: &[u16]) {
+ ipred_left_dc(buf, stride, left, 4, 2);
+}
+fn ipred_4x4_top_dc(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16], _tr: &[u16]) {
+ ipred_top_dc(buf, stride, top, 4, 2);
+}
+
+pub struct IPred8Context {
+ pub t: [u16; 16],
+ pub l: [u16; 8],
+ pub tl: u16,
+ pub fill: u16,
+}
+
+impl IPred8Context {
+ pub fn new(fill: u16) -> Self {
+ Self {
+ t: [fill; 16],
+ l: [fill; 8],
+ tl: fill,
+ fill,
+ }
+ }
+ pub fn fill(&mut self, top: &[u16], left: &[u16], has_t: bool, has_tr: bool, has_l: bool, has_tl: bool) {
+ let mut t = [self.fill; 19];
+ let mut l = [self.fill; 11];
+ if has_t {
+ t[1..8 + 1].copy_from_slice(&top[..8]);
+ }
+ if has_tr {
+ t[8 + 1..16 + 1].copy_from_slice(&top[8..][..8]);
+ t[16 + 1] = t[15 + 1];
+ t[17 + 1] = t[15 + 1];
+ } else {
+ let (t0, t1) = t.split_at_mut(8 + 1);
+ for el in t1.iter_mut() {
+ *el = t0[7 + 1];
+ }
+ }
+ if has_l {
+ l[1..9].copy_from_slice(&left[1..9]);
+ l[8 + 1] = l[7 + 1];
+ l[9 + 1] = l[7 + 1];
+ }
+ if has_tl {
+ t[0] = left[0];
+ l[0] = left[0];
+ } else {
+ t[0] = t[1];
+ l[0] = l[1];
+ }
+
+ for i in 0..16 {
+ self.t[i] = (t[i] + 2 * t[i + 1] + t[i + 2] + 2) >> 2;
+ }
+ for i in 0..8 {
+ self.l[i] = (l[i] + 2 * l[i + 1] + l[i + 2] + 2) >> 2;
+ }
+ self.tl = if has_t && has_l {
+ (t[1] + 2 * t[0] + l[1] + 2) >> 2
+ } else if has_t {
+ (3 * t[0] + t[1] + 2) >> 2
+ } else if has_l {
+ (3 * l[0] + l[1] + 2) >> 2
+ } else {
+ t[0]
+ };
+ }
+}
+
+fn ipred_y_8x8_ver(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ for row in buf.chunks_mut(stride).take(8) {
+ row[..8].copy_from_slice(&ctx.t[..8]);
+ }
+}
+fn ipred_y_8x8_hor(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ for (row, &l) in buf.chunks_mut(stride).zip(ctx.l.iter()).take(8) {
+ row[..8].copy_from_slice(&[l; 8]);
+ }
+}
+fn ipred_y_8x8_diag_down_left(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut t = [0u16; 16];
+ load(&mut t, &ctx.t);
+
+ for (y, row) in buf.chunks_mut(stride).take(8).enumerate() {
+ for (x, pix) in row.iter_mut().take(8).enumerate() {
+ *pix = (if (x != 7) || (y != 7) {
+ t[x + y] + 2 * t[x + y + 1] + t[x + y + 2]
+ } else {
+ t[14] + 3 * t[15]
+ } + 2) >> 2;
+ }
+ }
+}
+fn ipred_y_8x8_diag_down_right(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut t = [0u16; 9];
+ t[0] = ctx.tl;
+ load(&mut t[1..], &ctx.t);
+ let mut l = [0u16; 9];
+ l[0] = ctx.tl;
+ load(&mut l[1..], &ctx.l);
+ let diag = t[1] + 2 * t[0] + l[1];
+
+ for (y, row) in buf.chunks_mut(stride).take(8).enumerate() {
+ for (x, pix) in row.iter_mut().take(8).enumerate() {
+ *pix = (if x > y {
+ t[x - y - 1] + 2 * t[x - y] + t[x - y + 1]
+ } else if x < y {
+ l[y - x - 1] + 2 * l[y - x] + l[y - x + 1]
+ } else {
+ diag
+ } + 2) >> 2;
+ }
+ }
+}
+fn ipred_y_8x8_ver_right(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut t = [0u16; 9];
+ t[0] = ctx.tl;
+ load(&mut t[1..], &ctx.t);
+ let mut l = [0u16; 9];
+ l[0] = ctx.tl;
+ load(&mut l[1..], &ctx.l);
+
+ for (y, row) in buf.chunks_mut(stride).take(8).enumerate() {
+ for (x, pix) in row.iter_mut().take(8).enumerate() {
+ let zvr = 2 * (x as i8) - (y as i8);
+ *pix = if zvr >= 0 {
+ let ix = x - (y >> 1);
+ if (zvr & 1) == 0 {
+ (t[ix] + t[ix + 1] + 1) >> 1
+ } else {
+ (t[ix - 1] + 2 * t[ix] + t[ix + 1] + 2) >> 2
+ }
+ } else if zvr == -1 {
+ (l[1] + 2 * l[0] + t[1] + 2) >> 2
+ } else {
+ let ix = y - 2 * x;
+ (l[ix] + 2 * l[ix - 1] + l[ix - 2] + 2) >> 2
+ };
+ }
+ }
+}
+fn ipred_y_8x8_ver_left(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut t = [0u16; 16];
+ load(&mut t, &ctx.t);
+
+ for (y, row) in buf.chunks_mut(stride).take(8).enumerate() {
+ for (x, pix) in row.iter_mut().take(8).enumerate() {
+ let ix = x + (y >> 1);
+ *pix = if (y & 1) == 0 {
+ (t[ix] + t[ix + 1] + 1) >> 1
+ } else {
+ (t[ix] + 2 * t[ix + 1] + t[ix + 2] + 2) >> 2
+ };
+ }
+ }
+
+}
+fn ipred_y_8x8_hor_down(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut t = [0u16; 9];
+ t[0] = ctx.tl;
+ load(&mut t[1..], &ctx.t);
+ let mut l = [0u16; 9];
+ l[0] = ctx.tl;
+ load(&mut l[1..], &ctx.l);
+
+ for (y, row) in buf.chunks_mut(stride).take(8).enumerate() {
+ for (x, pix) in row.iter_mut().take(8).enumerate() {
+ let zhd = 2 * (y as i8) - (x as i8);
+ *pix = if zhd >= 0 {
+ let ix = y - (x >> 1);
+ if (zhd & 1) == 0 {
+ (l[ix] + l[ix + 1] + 1) >> 1
+ } else {
+ (l[ix - 1] + 2 * l[ix] + l[ix + 1] + 2) >> 2
+ }
+ } else if zhd == -1 {
+ (l[1] + 2 * l[0] + t[1] + 2) >> 2
+ } else {
+ let ix = x - 2 * y;
+ (t[ix] + 2 * t[ix - 1] + t[ix - 2] + 2) >> 2
+ };
+ }
+ }
+}
+fn ipred_y_8x8_hor_up(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut l = [0u16; 8];
+ load(&mut l, &ctx.l);
+
+ for (y, row) in buf.chunks_mut(stride).take(8).enumerate() {
+ for (x, pix) in row.iter_mut().take(8).enumerate() {
+ let zhu = x + 2 * y;
+ let ix = y + (x >> 1);
+ *pix = if zhu > 13 {
+ l[7]
+ } else if zhu == 13 {
+ (l[6] + 3 * l[7] + 2) >> 2
+ } else if (zhu & 1) != 0 {
+ (l[ix] + 2 * l[ix + 1] + l[ix + 2] + 2) >> 2
+ } else {
+ (l[ix] + l[ix + 1] + 1) >> 1
+ };
+ }
+ }
+}
+fn ipred_y_8x8_dc(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut sum = 0u16;
+ for &t in ctx.t[..8].iter() {
+ sum += t;
+ }
+ for &l in ctx.l[..8].iter() {
+ sum += l;
+ }
+ let dc = (sum + 8) >> 4;
+ for row in buf.chunks_mut(stride).take(8) {
+ for pix in row.iter_mut().take(8) {
+ *pix = dc;
+ }
+ }
+}
+fn ipred_y_8x8_left_dc(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut sum = 0u16;
+ for &l in ctx.l[..8].iter() {
+ sum += l;
+ }
+ let dc = (sum + 4) >> 3;
+ for row in buf.chunks_mut(stride).take(8) {
+ for pix in row.iter_mut().take(8) {
+ *pix = dc;
+ }
+ }
+}
+fn ipred_y_8x8_top_dc(buf: &mut [u16], stride: usize, ctx: &IPred8Context) {
+ let mut sum = 0u16;
+ for &t in ctx.t[..8].iter() {
+ sum += t;
+ }
+ let dc = (sum + 4) >> 3;
+ for row in buf.chunks_mut(stride).take(8) {
+ for pix in row.iter_mut().take(8) {
+ *pix = dc;
+ }
+ }
+}
+
+fn ipred_8x8_ver(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) {
+ ipred_ver(buf, stride, top, 8);
+}
+fn ipred_8x8_hor(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) {
+ ipred_hor(buf, stride, left, 8);
+}
+fn ipred_8x8_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) {
+ let mut l = [0; 8];
+ load(&mut l, &left[1..]);
+ let mut t = [0; 8];
+ load(&mut t, top);
+
+ let dc0 = (t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3;
+ let sum1 = t[4] + t[5] + t[6] + t[7];
+ let dc1 = (sum1 + 2) >> 2;
+ let sum2 = l[4] + l[5] + l[6] + l[7];
+ let dc2 = (sum2 + 2) >> 2;
+ let dc3 = (sum1 + sum2 + 4) >> 3;
+
+ for row in buf.chunks_mut(stride).take(4) {
+ row[..4].copy_from_slice(&[dc0; 4]);
+ row[4..8].copy_from_slice(&[dc1; 4]);
+ }
+ for row in buf.chunks_mut(stride).skip(4).take(4) {
+ row[..4].copy_from_slice(&[dc2; 4]);
+ row[4..8].copy_from_slice(&[dc3; 4]);
+ }
+}
+fn ipred_8x8_left_dc(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) {
+ let mut left_dc0 = 0;
+ let mut left_dc1 = 0;
+ for &el in left[1..].iter().take(4) {
+ left_dc0 += el;
+ }
+ for &el in left[1..].iter().skip(4).take(4) {
+ left_dc1 += el;
+ }
+ let dc0 = (left_dc0 + 2) >> 2;
+ let dc2 = (left_dc1 + 2) >> 2;
+ for row in buf.chunks_mut(stride).take(4) {
+ row[..8].copy_from_slice(&[dc0; 8]);
+ }
+ for row in buf.chunks_mut(stride).skip(4).take(4) {
+ row[..8].copy_from_slice(&[dc2; 8]);
+ }
+}
+fn ipred_8x8_top_dc(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) {
+ ipred_top_dc(buf, stride, top, 4, 2);
+ ipred_top_dc(&mut buf[4..], stride, &top[4..], 4, 2);
+ let mut top = [0; 8];
+ top.copy_from_slice(&buf[stride * 3..][..8]);
+ ipred_top_dc(&mut buf[4 * stride..], stride, &top, 4, 2);
+ ipred_top_dc(&mut buf[4 + 4 * stride..], stride, &top[4..], 4, 2);
+}
+
+fn ipred_16x16_ver(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) {
+ ipred_ver(buf, stride, top, 16);
+}
+fn ipred_16x16_hor(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) {
+ ipred_hor(buf, stride, left, 16);
+}
+fn ipred_16x16_dc(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]) {
+ ipred_dc(buf, stride, top, left, 16, 5);
+}
+fn ipred_16x16_left_dc(buf: &mut [u16], stride: usize, _top: &[u16], left: &[u16]) {
+ ipred_left_dc(buf, stride, left, 16, 4);
+}
+fn ipred_16x16_top_dc(buf: &mut [u16], stride: usize, top: &[u16], _left: &[u16]) {
+ ipred_top_dc(buf, stride, top, 16, 4);
+}
+
+pub type AddCoeffsFunc = fn(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16]);
+pub type AddCoeffs8Func = fn(dst: &mut [u16], offset: usize, stride: usize, coeffs: &[i16; 64]);
+pub type IPred4x4Func = fn(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16], tr: &[u16]);
+pub type IPred8x8Func = fn(buf: &mut [u16], stride: usize, top: &[u16], left: &[u16]);
+pub type IPred8x8LumaFunc = fn(buf: &mut [u16], stride: usize, ctx: &IPred8Context);
+
+pub const IPRED4_DC128: usize = 11;
+pub const IPRED4_DC_TOP: usize = 10;
+pub const IPRED4_DC_LEFT: usize = 9;
+pub const IPRED8_DC128: usize = 6;
+pub const IPRED8_DC_TOP: usize = 5;
+pub const IPRED8_DC_LEFT: usize = 4;
+
+pub struct IPredFuncs {
+ pub fill_val: u16,
+ pub add_coeffs: AddCoeffsFunc,
+ pub add_coeffs8: AddCoeffs8Func,
+ pub ipred4x4: [IPred4x4Func; 12],
+ pub ipred8x8_luma: [IPred8x8LumaFunc; 12],
+ pub ipred8x8_chroma: [IPred8x8Func; 7],
+ pub ipred16x16: [IPred8x8Func; 7],
+}
+
+static IPRED_FUNCS: [IPredFuncs; 1] = [
+ IPredFuncs {
+ fill_val: 0x200,
+ add_coeffs: add_coeffs_10,
+ add_coeffs8: add_coeffs8_10,
+ ipred4x4: [
+ ipred_4x4_ver, ipred_4x4_hor, ipred_4x4_dc,
+ ipred_4x4_diag_down_left, ipred_4x4_diag_down_right,
+ ipred_4x4_ver_right, ipred_4x4_hor_down, ipred_4x4_ver_left, ipred_4x4_hor_up,
+ ipred_4x4_left_dc, ipred_4x4_top_dc, ipred_4x4_dc128_10
+ ],
+ ipred8x8_luma: [
+ ipred_y_8x8_ver, ipred_y_8x8_hor, ipred_y_8x8_dc,
+ ipred_y_8x8_diag_down_left, ipred_y_8x8_diag_down_right,
+ ipred_y_8x8_ver_right, ipred_y_8x8_hor_down,
+ ipred_y_8x8_ver_left, ipred_y_8x8_hor_up,
+ ipred_y_8x8_left_dc, ipred_y_8x8_top_dc, ipred_y_8x8_dc128_10
+ ],
+ ipred8x8_chroma: [
+ ipred_8x8_dc, ipred_8x8_hor, ipred_8x8_ver, ipred_8x8_plane_10,
+ ipred_8x8_left_dc, ipred_8x8_top_dc, ipred_8x8_dc128_10
+ ],
+ ipred16x16: [
+ ipred_16x16_ver, ipred_16x16_hor, ipred_16x16_dc, ipred_16x16_plane_10,
+ ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128_10
+ ]
+ }
+];
+
+pub fn find_ipred_funcs(fill_val: u16) -> &'static IPredFuncs {
+ for ipf in IPRED_FUNCS.iter() {
+ if ipf.fill_val == fill_val {
+ return ipf;
+ }
+ }
+ unreachable!()
+}
+
+macro_rules! loop_filter {
+ (lumaedge; $buf: expr, $off: expr, $step: expr, $alpha: expr, $beta: expr, $clip: ident) => {
+ let p2 = $buf[$off - $step * 3] as i16;
+ let p1 = $buf[$off - $step * 2] as i16;
+ let p0 = $buf[$off - $step] as i16;
+ let q0 = $buf[$off] as i16;
+ let q1 = $buf[$off + $step] as i16;
+ let q2 = $buf[$off + $step * 2] as i16;
+ let a_p = (p2 - p0).abs() < $beta;
+ let a_q = (q2 - q0).abs() < $beta;
+ if a_p && (p0 - q0).abs() < (($alpha >> 2) + 2) {
+ let p3 = $buf[$off - $step * 4] as i16;
+ $buf[$off - $step * 3] = ((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) as u16;
+ $buf[$off - $step * 2] = ((p2 + p1 + p0 + q0 + 2) >> 2) as u16;
+ $buf[$off - $step] = ((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) as u16;
+ } else {
+ $buf[$off - $step] = ((2 * p1 + p0 + q1 + 2) >> 2) as u16;
+ }
+ if a_q && (p0 - q0).abs() < (($alpha >> 2) + 2) {
+ let q3 = $buf[$off + $step * 3] as i16;
+ $buf[$off] = ((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) as u16;
+ $buf[$off + $step] = ((p0 + q0 + q1 + q2 + 2) >> 2) as u16;
+ $buf[$off + $step * 2] = ((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) as u16;
+ } else {
+ $buf[$off] = ((2 * q1 + q0 + p1 + 2) >> 2) as u16;
+ }
+ };
+ (chromaedge; $buf: expr, $off: expr, $step: expr, $clip:ident) => {
+ let p1 = $buf[$off - $step * 2] as i16;
+ let p0 = $buf[$off - $step] as i16;
+ let q0 = $buf[$off] as i16;
+ let q1 = $buf[$off + $step] as i16;
+ $buf[$off - $step] = ((2 * p1 + p0 + q1 + 2) >> 2) as u16;
+ $buf[$off] = ((2 * q1 + q0 + p1 + 2) >> 2) as u16;
+ };
+ (lumanormal; $buf: expr, $off: expr, $step: expr, $tc0: expr, $beta: expr, $clip: ident) => {
+ let p2 = $buf[$off - $step * 3] as i16;
+ let p1 = $buf[$off - $step * 2] as i16;
+ let p0 = $buf[$off - $step] as i16;
+ let q0 = $buf[$off] as i16;
+ let q1 = $buf[$off + $step] as i16;
+ let q2 = $buf[$off + $step * 2] as i16;
+ let a_p = (p2 - p0).abs() < $beta;
+ let a_q = (q2 - q0).abs() < $beta;
+ let tc = $tc0 + (a_p as i16) + (a_q as i16);
+ let delta = (((q0 - p0) * 4 + (p1 - q1) + 4) >> 3).max(-tc).min(tc);
+ if a_p && ($tc0 > 0) {
+ $buf[$off - $step * 2] = $clip(p1 + ((p2 + ((p0 + q0 + 1) >> 1) - p1 * 2) >> 1).max(-$tc0).min($tc0));
+ }
+ $buf[$off - $step] = $clip(p0 + delta);
+ $buf[$off] = $clip(q0 - delta);
+ if a_q && ($tc0 > 0) {
+ $buf[$off + $step] = $clip(q1 + ((q2 + ((p0 + q0 + 1) >> 1) - q1 * 2) >> 1).max(-$tc0).min($tc0));
+ }
+ };
+ (chromanormal; $buf: expr, $off: expr, $step: expr, $tc0: expr, $clip: ident) => {
+ let p1 = $buf[$off - $step * 2] as i16;
+ let p0 = $buf[$off - $step] as i16;
+ let q0 = $buf[$off] as i16;
+ let q1 = $buf[$off + $step] as i16;
+ let tc = $tc0 + 1;
+ let delta = (((q0 - p0) * 4 + (p1 - q1) + 4) >> 3).max(-tc).min(tc);
+ $buf[$off - $step] = $clip(p0 + delta);
+ $buf[$off] = $clip(q0 - delta);
+ }
+}
+
+fn check_filter(buf: &[u16], off: usize, step: usize, alpha: i16, beta: i16) -> bool {
+ let p1 = buf[off - step * 2] as i16;
+ let p0 = buf[off - step] as i16;
+ let q0 = buf[off] as i16;
+ let q1 = buf[off + step] as i16;
+ (p0 - q0).abs() < alpha && (p1 - p0).abs() < beta && (q1 - q0).abs() < beta
+}
+
+fn check_filter4(buf: &[u16], mut off: usize, step: usize, stride: usize, alpha: i16, beta: i16) -> [bool; 4] {
+ let mut flags = [false; 4];
+ for flag in flags.iter_mut() {
+ let p1 = buf[off - step * 2] as i16;
+ let p0 = buf[off - step] as i16;
+ let q0 = buf[off] as i16;
+ let q1 = buf[off + step] as i16;
+ *flag = (p0 - q0).abs() < alpha && (p1 - p0).abs() < beta && (q1 - q0).abs() < beta;
+ off += stride;
+ }
+ flags
+}
+
+fn loop_filter_lumaedge_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16) {
+ let flags = check_filter4(dst, off, 1, stride, alpha, beta);
+ for &flag in flags.iter() {
+ if flag {
+ loop_filter!(lumaedge; dst, off, 1, alpha, beta, clip10);
+ }
+ off += stride;
+ }
+}
+fn loop_filter_lumaedge_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16) {
+ let flags = check_filter4(dst, off, stride, 1, alpha, beta);
+ for (x, &flag) in flags.iter().enumerate() {
+ if flag {
+ loop_filter!(lumaedge; dst, off + x, stride, alpha, beta, clip10);
+ }
+ }
+}
+fn loop_filter_lumanormal_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) {
+ let flags = check_filter4(dst, off, 1, stride, alpha, beta);
+ for &flag in flags.iter() {
+ if flag {
+ loop_filter!(lumanormal; dst, off, 1, tc0, beta, clip10);
+ }
+ off += stride;
+ }
+}
+fn loop_filter_lumanormal_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) {
+ let flags = check_filter4(dst, off, stride, 1, alpha, beta);
+ for (x, &flag) in flags.iter().enumerate() {
+ if flag {
+ loop_filter!(lumanormal; dst, off + x, stride, tc0, beta, clip10);
+ }
+ }
+}
+fn loop_filter_chromaedge_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16) {
+ for _ in 0..2 {
+ if check_filter(dst, off, 1, alpha, beta) {
+ loop_filter!(chromaedge; dst, off, 1, clip10);
+ }
+ off += stride;
+ }
+}
+fn loop_filter_chromaedge_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16) {
+ for x in 0..2 {
+ if check_filter(dst, off + x, stride, alpha, beta) {
+ loop_filter!(chromaedge; dst, off + x, stride, clip10);
+ }
+ }
+}
+fn loop_filter_chromanormal_v_10(dst: &mut [u16], mut off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) {
+ for _ in 0..2 {
+ if check_filter(dst, off, 1, alpha, beta) {
+ loop_filter!(chromanormal; dst, off, 1, tc0, clip10);
+ }
+ off += stride;
+ }
+}
+fn loop_filter_chromanormal_h_10(dst: &mut [u16], off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) {
+ for x in 0..2 {
+ if check_filter(dst, off + x, stride, alpha, beta) {
+ loop_filter!(chromanormal; dst, off + x, stride, tc0, clip10);
+ }
+ }
+}
+
+const ALPHA: [i16; 52] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28,
+ 32, 36, 40, 45, 50, 56, 63, 71, 80, 90, 100, 113, 127, 144, 162, 182,
+ 203, 226, 255, 255
+];
+const BETA: [i16; 52] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
+ 17, 17, 18, 18
+];
+
+const TC0: [[u8; 3]; 52] = [
+ [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0],
+ [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0],
+ [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0],
+ [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0],
+ [ 0, 0, 0], [ 0, 0, 1], [ 0, 0, 1], [ 0, 0, 1],
+ [ 0, 0, 1], [ 0, 1, 1], [ 0, 1, 1], [ 1, 1, 1],
+ [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 2],
+ [ 1, 1, 2], [ 1, 1, 2], [ 1, 1, 2], [ 1, 2, 3],
+ [ 1, 2, 3], [ 2, 2, 3], [ 2, 2, 4], [ 2, 3, 4],
+ [ 2, 3, 4], [ 3, 3, 5], [ 3, 4, 6], [ 3, 4, 6],
+ [ 4, 5, 7], [ 4, 5, 8], [ 4, 6, 9], [ 5, 7, 10],
+ [ 6, 8, 11], [ 6, 8, 13], [ 7, 10, 14], [ 8, 11, 16],
+ [ 9, 12, 18], [10, 13, 20], [11, 15, 23], [13, 17, 25]
+];
+
+fn get_lf_idx(qp0: u8, qp1: u8, off: i8) -> usize {
+ (i16::from((qp0 + qp1 + 1) >> 1) + i16::from(off)).clamp(0, 51) as usize
+}
+
+macro_rules! filter_edge_func {
+ ($funcname: ident, $edgefilter: ident, $normfilter: ident, $shift: expr) => {
+ fn $funcname(dst: &mut [u16], off: usize, stride: usize, dmode: u8, quants: [u8; 2], alpha_off: i8, beta_off: i8) {
+ let q = quants[0];
+ let qleft = quants[1];
+ if dmode != 0 {
+ let index_a = get_lf_idx(q, qleft, alpha_off);
+ let alpha = ALPHA[index_a] << $shift;
+ let beta = BETA[get_lf_idx(q, qleft, beta_off)] << $shift;
+ if dmode == 4 {
+ $edgefilter(dst, off, stride, alpha, beta);
+ } else {
+ let tc0 = i16::from(TC0[index_a][(dmode - 1) as usize]) << $shift;
+ $normfilter(dst, off, stride, alpha, beta, tc0);
+ }
+ }
+ }
+ }
+}
+
+filter_edge_func!(filter_edge_y_v_10, loop_filter_lumaedge_v_10, loop_filter_lumanormal_v_10, 10 - 8);
+filter_edge_func!(filter_edge_y_h_10, loop_filter_lumaedge_h_10, loop_filter_lumanormal_h_10, 10 - 8);
+filter_edge_func!(filter_edge_c_v_10, loop_filter_chromaedge_v_10, loop_filter_chromanormal_v_10, 10 - 8);
+filter_edge_func!(filter_edge_c_h_10, loop_filter_chromaedge_h_10, loop_filter_chromanormal_h_10, 10 - 8);
+
+pub type LoopFilterFn = fn (dst: &mut [u16], off: usize, stride: usize, dmode: u8, quants: [u8; 2], alpha_off: i8, beta_off: i8);
+
+pub struct LoopFilterFuncs {
+ pub fill_val: u16,
+ pub filter_edge_y_v: LoopFilterFn,
+ pub filter_edge_y_h: LoopFilterFn,
+ pub filter_edge_c_v: LoopFilterFn,
+ pub filter_edge_c_h: LoopFilterFn,
+}
+
+static LOOP_FILTER_FUNCS: [LoopFilterFuncs; 1] = [
+ LoopFilterFuncs {
+ fill_val: 0x200,
+ filter_edge_y_v: filter_edge_y_v_10,
+ filter_edge_y_h: filter_edge_y_h_10,
+ filter_edge_c_v: filter_edge_c_v_10,
+ filter_edge_c_h: filter_edge_c_h_10,
+ }
+];
+
+pub fn find_loop_filter_funcs(fill_val: u16) -> &'static LoopFilterFuncs {
+ for lf in LOOP_FILTER_FUNCS.iter() {
+ if lf.fill_val == fill_val {
+ return lf;
+ }
+ }
+ unreachable!()
+}
--- /dev/null
+use nihav_core::frame::NASimpleVideoFrame;
+use super::types::SliceState;
+use super::dsp::*;
+
+pub fn loop_filter_mb(frm: &mut NASimpleVideoFrame<u16>, sstate: &SliceState, alpha_off: i8, beta_off: i8) {
+ let lf = find_loop_filter_funcs(sstate.def_fill);
+
+ let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
+ let uoff = frm.offset[1] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[1];
+ let voff = frm.offset[2] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[2];
+ let mb_idx = sstate.mb.xpos + sstate.mb_x;
+
+ let lqy = sstate.mb.data[mb_idx - 1].qp_y;
+ let lqu = sstate.mb.data[mb_idx - 1].qp_u;
+ let lqv = sstate.mb.data[mb_idx - 1].qp_v;
+ let qy = sstate.mb.data[mb_idx].qp_y;
+ let qu = sstate.mb.data[mb_idx].qp_u;
+ let qv = sstate.mb.data[mb_idx].qp_v;
+
+ for (y, dmodes) in sstate.deblock.chunks(4).enumerate() {
+ (lf.filter_edge_y_v)(frm.data, yoff + y * 4 * frm.stride[0], frm.stride[0], dmodes[0] & 0xF, [qy, lqy], alpha_off, beta_off);
+ for x in 1..4 {
+ (lf.filter_edge_y_v)(frm.data, yoff + x * 4 + y * 4 * frm.stride[0], frm.stride[0], dmodes[x] & 0xF, [qy, qy], alpha_off, beta_off);
+ }
+ (lf.filter_edge_c_v)(frm.data, uoff + y * 2 * frm.stride[1], frm.stride[1], dmodes[0] & 0xF, [qu, lqu], alpha_off, beta_off);
+ (lf.filter_edge_c_v)(frm.data, uoff + y * 2 * frm.stride[1] + 4, frm.stride[1], dmodes[2] & 0xF, [qu, qu], alpha_off, beta_off);
+ (lf.filter_edge_c_v)(frm.data, voff + y * 2 * frm.stride[2], frm.stride[2], dmodes[0] & 0xF, [qv, lqv], alpha_off, beta_off);
+ (lf.filter_edge_c_v)(frm.data, voff + y * 2 * frm.stride[2] + 4, frm.stride[2], dmodes[2] & 0xF, [qv, qv], alpha_off, beta_off);
+ }
+
+ let tqy = sstate.mb.data[mb_idx - sstate.mb.stride].qp_y;
+ let tqu = sstate.mb.data[mb_idx - sstate.mb.stride].qp_u;
+ let tqv = sstate.mb.data[mb_idx - sstate.mb.stride].qp_v;
+
+ let dmodes = &sstate.deblock;
+ for x in 0..4 {
+ (lf.filter_edge_y_h)(frm.data, yoff + x * 4, frm.stride[0], dmodes[x] >> 4, [qy, tqy], alpha_off, beta_off);
+ }
+ for x in 0..4 {
+ (lf.filter_edge_c_h)(frm.data, uoff + x * 2, frm.stride[1], dmodes[x] >> 4, [qu, tqu], alpha_off, beta_off);
+ (lf.filter_edge_c_h)(frm.data, voff + x * 2, frm.stride[2], dmodes[x] >> 4, [qv, tqv], alpha_off, beta_off);
+ }
+
+ for (y, dmodes) in sstate.deblock.chunks(4).enumerate().skip(1) {
+ for x in 0..4 {
+ (lf.filter_edge_y_h)(frm.data, yoff + x * 4 + y * 4 * frm.stride[0], frm.stride[0], dmodes[x] >> 4, [qy, qy], alpha_off, beta_off);
+ }
+ }
+
+ let dmodes = &sstate.deblock[4 * 2..];
+ for x in 0..4 {
+ (lf.filter_edge_c_h)(frm.data, uoff + x * 2 + frm.stride[1] * 4, frm.stride[1], dmodes[x] >> 4, [qu, qu], alpha_off, beta_off);
+ (lf.filter_edge_c_h)(frm.data, voff + x * 2 + frm.stride[2] * 4, frm.stride[2], dmodes[x] >> 4, [qv, qv], alpha_off, beta_off);
+ }
+}
--- /dev/null
+use nihav_core::codecs::{DecoderResult, DecoderError};
+use nihav_core::frame::*;
+use nihav_codec_support::codecs::{MV, ZERO_MV};
+use super::super::{CurrentMBInfo, I4X4_SCAN, Shareable};
+use super::dispatch::{ThreadDispatcher, FrameDecodingStatus};
+use super::dsp::*;
+use super::pic_ref::SimplifiedSliceRefs;
+use super::super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO};
+use super::super::common_types::*;
+use super::types::*;
+
+fn pred_intra(frm: &mut NASimpleVideoFrame<u16>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
+ let ipf = find_ipred_funcs(sstate.def_fill);
+ let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
+ match mb_info.mb_type {
+ MBType::Intra16x16(imode, _, _) => {
+ let id = if imode != 2 || (sstate.has_top && sstate.has_left) {
+ imode as usize
+ } else if !sstate.has_top && !sstate.has_left {
+ IPRED8_DC128
+ } else if !sstate.has_left {
+ IPRED8_DC_TOP
+ } else {
+ IPRED8_DC_LEFT
+ };
+ (ipf.ipred16x16[id])(&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y);
+ },
+ MBType::Intra8x8 => {
+ let mut ictx = IPred8Context::new(sstate.def_fill);
+ for part in 0..4 {
+ let x = (part & 1) * 2;
+ let y = part & 2;
+ let blk4 = x + y * 4;
+
+ let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0];
+ let has_top = y > 0 || sstate.has_top;
+ let has_left = x > 0 || sstate.has_left;
+ let imode = mb_info.ipred[blk4];
+ let id = if imode != IntraPredMode::DC || (has_top && has_left) {
+ let im_id: u8 = imode.into();
+ im_id as usize
+ } else if !has_top && !has_left {
+ IPRED4_DC128
+ } else if !has_left {
+ IPRED4_DC_TOP
+ } else {
+ IPRED4_DC_LEFT
+ };
+ let mb_idx = sstate.mb_x + sstate.mb_y * sstate.mb_w;
+ let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2);
+ let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w);
+ if id != IPRED4_DC128 {
+ let top = if y == 0 {
+ &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]
+ } else {
+ &frm.data[cur_yoff - frm.stride[0]..]
+ };
+ let mut left_buf = [0; 9];
+ let left = if x == 0 {
+ &sstate.left_y[y * 4..]
+ } else {
+ if has_tl {
+ if y == 0 {
+ left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1];
+ } else {
+ left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]];
+ }
+ }
+ if has_left {
+ for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) {
+ *dst = src[0];
+ }
+ }
+ &left_buf
+ };
+ ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl);
+ }
+ (ipf.ipred8x8_luma[id])(&mut frm.data[cur_yoff..], frm.stride[0], &ictx);
+ if mb_info.coded[blk4] {
+ (ipf.add_coeffs8)(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs8x8[part].coeffs);
+ }
+ }
+ },
+ MBType::Intra4x4 => {
+ for &(x,y) in I4X4_SCAN.iter() {
+ let x = x as usize;
+ let y = y as usize;
+ let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0];
+ let has_top = y > 0 || sstate.has_top;
+ let has_left = x > 0 || sstate.has_left;
+ let imode = mb_info.ipred[x + y * 4];
+ let id = if imode != IntraPredMode::DC || (has_top && has_left) {
+ let im_id: u8 = imode.into();
+ im_id as usize
+ } else if !has_top && !has_left {
+ IPRED4_DC128
+ } else if !has_left {
+ IPRED4_DC_TOP
+ } else {
+ IPRED4_DC_LEFT
+ };
+ let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3);
+ let tr: [u16; 4] = if y == 0 {
+ let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..];
+ if has_top && !noright {
+ [tsrc[4], tsrc[5], tsrc[6], tsrc[7]]
+ } else if has_top {
+ [tsrc[3]; 4]
+ } else {
+ [0; 4]
+ }
+ } else if (x & 1) == 0 || (x == 1 && y == 2) {
+ let i = cur_yoff - frm.stride[0];
+ [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]]
+ } else {
+ let i = cur_yoff - frm.stride[0];
+ [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]]
+ };
+ let mut top = [sstate.def_fill; 4];
+ let mut left = [sstate.def_fill; 9];
+ if y == 0 {
+ if has_top {
+ top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]);
+ }
+ } else {
+ top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]);
+ }
+ if x == 0 {
+ if has_left {
+ for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) {
+ *dst = src;
+ }
+ }
+ } else {
+ if y == 0 {
+ if x == 0 {
+ left[0] = sstate.left_y[y * 4];
+ } else if has_top {
+ left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1];
+ }
+ } else {
+ left[0] = frm.data[cur_yoff - frm.stride[0] - 1];
+ }
+ for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) {
+ *dst = row[0];
+ }
+ }
+ (ipf.ipred4x4[id])(&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr);
+ if mb_info.coded[x + y * 4] {
+ (ipf.add_coeffs)(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]);
+ }
+ }
+ },
+ _ => unreachable!(),
+ };
+ let id = if mb_info.chroma_ipred != 0 || (sstate.has_top && sstate.has_left) {
+ mb_info.chroma_ipred as usize
+ } else if !sstate.has_top && !sstate.has_left {
+ IPRED8_DC128
+ } else if !sstate.has_left {
+ IPRED8_DC_TOP
+ } else {
+ IPRED8_DC_LEFT
+ };
+ for chroma in 1..3 {
+ let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma];
+ let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..];
+ (ipf.ipred8x8_chroma[id])(&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]);
+ }
+}
+
+fn add_luma(frm: &mut NASimpleVideoFrame<u16>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
+ let ipf = find_ipred_funcs(sstate.def_fill);
+ let mut yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
+ if !mb_info.transform_size_8x8 {
+ for y in 0..4 {
+ for x in 0..4 {
+ if mb_info.coded[x + y * 4] {
+ (ipf.add_coeffs)(frm.data, yoff + x * 4, frm.stride[0], &mb_info.coeffs[x + y * 4]);
+ }
+ }
+ yoff += frm.stride[0] * 4;
+ }
+ } else {
+ for y in 0..2 {
+ for x in 0..2 {
+ if mb_info.coded[x * 2 + y * 2 * 4] {
+ (ipf.add_coeffs8)(frm.data, yoff + x * 8, frm.stride[0], &mb_info.coeffs8x8[x + y * 2].coeffs);
+ }
+ }
+ yoff += frm.stride[0] * 8;
+ }
+ }
+}
+
+fn add_chroma(frm: &mut NASimpleVideoFrame<u16>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
+ let ipf = find_ipred_funcs(sstate.def_fill);
+ for chroma in 1..3 {
+ let mut off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma];
+ for y in 0..2 {
+ for x in 0..2 {
+ let blk_no = 16 + (chroma - 1) * 4 + x + y * 2;
+ if mb_info.coded[blk_no] || mb_info.coeffs[blk_no][0] != 0 {
+ (ipf.add_coeffs)(frm.data, off + x * 4, frm.stride[chroma], &mb_info.coeffs[blk_no]);
+ }
+ }
+ off += frm.stride[chroma] * 4;
+ }
+ }
+}
+
+fn do_p_mc(frm: &mut NASimpleVideoFrame<u16>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<&SimpleFrame>, weight: &WeightInfo, mc_dsp: &mut H264MC) {
+ if let Some(buf) = ref_pic {
+ if !weight.is_weighted() {
+ mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv);
+ } else {
+ let mut tmp = McBlock::new();
+ mc_dsp.mc_blocks(&mut tmp, buf, xpos, ypos, w, h, mv);
+
+ let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+ let yw = if weight.luma_weighted {
+ [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8]
+ } else {
+ [1, 0, 0]
+ };
+ let wmode = match w {
+ 2 => 0,
+ 4 => 1,
+ 8 => 2,
+ _ => 3,
+ };
+ (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp.y, h, yw);
+
+ for chroma in 0..2 {
+ let cstride = frm.stride[chroma + 1];
+ let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+ let cw = if weight.chroma_weighted {
+ [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8]
+ } else {
+ [1, 0, 0]
+ };
+ let csrc = if chroma == 0 { &tmp.u } else { &tmp.v };
+ (mc_dsp.put_block_weighted[wmode - 1])(&mut frm.data[coff..], cstride, csrc, h / 2, cw);
+ }
+ }
+ } else {
+ mc_dsp.gray_block(frm, xpos, ypos, w, h);
+ }
+}
+
+#[allow(clippy::match_like_matches_macro)]
+fn do_b_mc(frm: &mut NASimpleVideoFrame<u16>, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) {
+ let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) {
+ (BMode::L0, true, _) => true,
+ (BMode::L1, _, true) => true,
+ (BMode::Bi, true, true) => true,
+ _ => false,
+ };
+ if !do_weight {
+ match mode {
+ BMode::L0 => {
+ if let Some(buf) = ref_pic0 {
+ mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv0);
+ } else {
+ mc_dsp.gray_block(frm, xpos, ypos, w, h);
+ }
+ },
+ BMode::L1 => {
+ if let Some(buf) = ref_pic1 {
+ mc_dsp.do_mc(frm, buf, xpos, ypos, w, h, mv1);
+ } else {
+ mc_dsp.gray_block(frm, xpos, ypos, w, h);
+ }
+ },
+ BMode::Bi => {
+ match (ref_pic0, ref_pic1) {
+ (Some(buf0), Some(buf1)) => {
+ mc_dsp.do_mc(frm, buf0, xpos, ypos, w, h, mv0);
+ mc_dsp.do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1);
+ },
+ (Some(buf0), None) => {
+ mc_dsp.do_mc(frm, buf0, xpos, ypos, w, h, mv0);
+ },
+ (None, Some(buf1)) => {
+ mc_dsp.do_mc(frm, buf1, xpos, ypos, w, h, mv1);
+ },
+ (None, None) => {
+ mc_dsp.gray_block(frm, xpos, ypos, w, h);
+ },
+ };
+ },
+ };
+ } else {
+ let mut tmp0 = McBlock::new();
+ let mut tmp1 = McBlock::new();
+ match (mode, ref_pic0, ref_pic1) {
+ (BMode::L0, Some(buf), _) | (BMode::L1, _, Some(buf)) => {
+ let (mv, weight) = if mode == BMode::L0 { (mv0, weight0) } else { (mv1, weight1) };
+ mc_dsp.mc_blocks(&mut tmp0, buf, xpos, ypos, w, h, mv);
+
+ let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+ let yw = if weight.luma_weighted {
+ [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8]
+ } else {
+ [1, 0, 0]
+ };
+ let wmode = match w {
+ 2 => 0,
+ 4 => 1,
+ 8 => 2,
+ _ => 3,
+ };
+ (mc_dsp.put_block_weighted[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, h, yw);
+
+ for chroma in 0..2 {
+ let cstride = frm.stride[chroma + 1];
+ let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+ let cw = if weight.chroma_weighted {
+ [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8]
+ } else {
+ [1, 0, 0]
+ };
+ let csrc = if chroma == 0 { &tmp0.u } else { &tmp0.v };
+ (mc_dsp.put_block_weighted[wmode - 1])(&mut frm.data[coff..], cstride, csrc, h / 2, cw);
+ }
+ },
+ (BMode::Bi, Some(buf0), Some(buf1)) => { // do both and avg
+ mc_dsp.mc_blocks(&mut tmp0, buf0, xpos, ypos, w, h, mv0);
+ mc_dsp.mc_blocks(&mut tmp1, buf1, xpos, ypos, w, h, mv1);
+
+ let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+ let yw = match (weight0.luma_weighted, weight1.luma_weighted) {
+ (true, true) => [weight0.luma_weight, weight0.luma_offset, weight1.luma_weight, weight1.luma_offset, weight0.luma_shift as i8],
+ (true, false) => [weight0.luma_weight, weight0.luma_offset, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8],
+ (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8],
+ (false, false) => [1, 0, 1, 0, 0],
+ };
+ let wmode = match w {
+ 2 => 0,
+ 4 => 1,
+ 8 => 2,
+ _ => 3,
+ };
+ (mc_dsp.put_block_weighted2[wmode])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, &tmp1.y, h, yw);
+
+ for chroma in 0..2 {
+ let cstride = frm.stride[chroma + 1];
+ let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+ let cw0 = weight0.chroma_weight[chroma];
+ let co0 = weight0.chroma_offset[chroma];
+ let cw1 = weight1.chroma_weight[chroma];
+ let co1 = weight1.chroma_offset[chroma];
+ let cw = match (weight0.chroma_weighted, weight1.chroma_weighted) {
+ (true, true) => [cw0, co0, cw1, co1, weight0.luma_shift as i8],
+ (true, false) => [cw0, co0, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8],
+ (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8],
+ (false, false) => [1, 0, 1, 0, 0],
+ };
+ let csrc0 = if chroma == 0 { &tmp0.u } else { &tmp0.v };
+ let csrc1 = if chroma == 0 { &tmp1.u } else { &tmp1.v };
+ (mc_dsp.put_block_weighted2[wmode - 1])(&mut frm.data[coff..], cstride, csrc0, csrc1, h / 2, cw);
+ }
+ },
+ _ => {
+ mc_dsp.gray_block(frm, xpos, ypos, w, h);
+ },
+ };
+ }
+}
+
+fn do_b_mc_4x4bi(frm: &mut NASimpleVideoFrame<u16>, xpos: usize, ypos: usize, mv: &[MV; 2], ref_pic0: Option<&SimpleFrame>, weight0: &WeightInfo, ref_pic1: Option<&SimpleFrame>, weight1: &WeightInfo, mc_dsp: &mut H264MC) {
+ if !weight0.is_weighted() || !weight1.is_weighted() {
+ match (ref_pic0, ref_pic1) {
+ (Some(buf0), Some(buf1)) => {
+ mc_dsp.do_mc(frm, buf0, xpos, ypos, 4, 4, mv[0]);
+ mc_dsp.do_mc_avg(frm, buf1, xpos, ypos, 4, 4, mv[1]);
+ },
+ (Some(buf0), None) => {
+ mc_dsp.do_mc(frm, buf0, xpos, ypos, 4, 4, mv[0]);
+ },
+ (None, Some(buf1)) => {
+ mc_dsp.do_mc(frm, buf1, xpos, ypos, 4, 4, mv[1]);
+ },
+ (None, None) => {
+ mc_dsp.gray_block(frm, xpos, ypos, 4, 4);
+ },
+ };
+ } else {
+ let mut tmp0 = McBlock::new();
+ let mut tmp1 = McBlock::new();
+ match (ref_pic0, ref_pic1) {
+ (Some(buf0), Some(buf1)) => { // do both and avg
+ mc_dsp.mc_blocks(&mut tmp0, buf0, xpos, ypos, 4, 4, mv[0]);
+ mc_dsp.mc_blocks(&mut tmp1, buf1, xpos, ypos, 4, 4, mv[1]);
+
+ let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+ let yw = match (weight0.luma_weighted, weight1.luma_weighted) {
+ (true, true) => [weight0.luma_weight, weight0.luma_offset, weight1.luma_weight, weight1.luma_offset, weight0.luma_shift as i8],
+ (true, false) => [weight0.luma_weight, weight0.luma_offset, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8],
+ (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8],
+ (false, false) => [1, 0, 1, 0, 0],
+ };
+ (mc_dsp.put_block_weighted2[1])(&mut frm.data[yoff..], frm.stride[0], &tmp0.y, &tmp1.y, 4, yw);
+
+ for chroma in 0..2 {
+ let cstride = frm.stride[chroma + 1];
+ let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+ let cw0 = weight0.chroma_weight[chroma];
+ let co0 = weight0.chroma_offset[chroma];
+ let cw1 = weight1.chroma_weight[chroma];
+ let co1 = weight1.chroma_offset[chroma];
+ let cw = match (weight0.chroma_weighted, weight1.chroma_weighted) {
+ (true, true) => [cw0, co0, cw1, co1, weight0.luma_shift as i8],
+ (true, false) => [cw0, co0, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8],
+ (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8],
+ (false, false) => [1, 0, 1, 0, 0],
+ };
+ let csrc0 = if chroma == 0 { &tmp0.u } else { &tmp0.v };
+ let csrc1 = if chroma == 0 { &tmp1.u } else { &tmp1.v };
+ (mc_dsp.put_block_weighted2[0])(&mut frm.data[coff..], cstride, csrc0, csrc1, 2, cw);
+ }
+ },
+ _ => {
+ mc_dsp.gray_block(frm, xpos, ypos, 4, 4);
+ },
+ };
+ }
+}
+
+fn get_weights(slice_hdr: &SliceHeader, frame_refs: &SimplifiedSliceRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) {
+ let idx_l0 = ref_l0.index();
+ let idx_l1 = ref_l1.index();
+ if mode != BMode::Bi || weight_mode != 2 {
+ (slice_hdr.get_weight(0, idx_l0), slice_hdr.get_weight(1, idx_l1))
+ } else if let (Some(Some(ref pic0)), Some(Some(ref pic1))) = (frame_refs.ref_list0.get(idx_l0), frame_refs.ref_list1.get(idx_l1)) {
+ let r0_poc = pic0.full_id as u16;
+ let r1_poc = pic1.full_id as u16;
+ let cur_id = frame_refs.cur_id as u16;
+ if (r0_poc == r1_poc) || pic0.long_term || pic1.long_term {
+ return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO);
+ }
+
+ let td = (i32::from(r1_poc) - i32::from(r0_poc)).clamp(-128, 127);
+ let tx = (16384 + (td / 2).abs()) / td;
+ let tb = (i32::from(cur_id) - i32::from(r0_poc)).clamp(-128, 127);
+ let scale = ((tb * tx + 32) >> 6).clamp(-1024, 1023);
+ if scale == 128 || (scale >> 2) < -64 || (scale >> 2) > 128 {
+ return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO);
+ }
+ let w1 = (scale >> 2) as i8;
+ let w0 = 64 - w1;
+
+ let weight0 = WeightInfo {
+ luma_weighted: true,
+ luma_weight: w0,
+ luma_offset: 0,
+ luma_shift: 5,
+ chroma_weighted: true,
+ chroma_weight: [w0; 2],
+ chroma_offset: [0; 2],
+ chroma_shift: 5,
+ };
+ let weight1 = WeightInfo {
+ luma_weighted: true,
+ luma_weight: w1,
+ luma_offset: 0,
+ luma_shift: 5,
+ chroma_weighted: true,
+ chroma_weight: [w1; 2],
+ chroma_offset: [0; 2],
+ chroma_shift: 5,
+ };
+
+ (weight0, weight1)
+ } else {
+ (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO)
+ }
+}
+
+pub fn recon_mb(frm: &mut NASimpleVideoFrame<u16>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8) {
+ let xpos = sstate.mb_x * 16;
+ let ypos = sstate.mb_y * 16;
+
+ match mb_info.mb_type {
+ MBType::Intra16x16(_, _, _) => {
+ pred_intra(frm, sstate, mb_info);
+ },
+ MBType::Intra4x4 | MBType::Intra8x8 => {
+ pred_intra(frm, sstate, mb_info);
+ },
+ MBType::PCM => {},
+ MBType::PSkip => {
+ let mv = sstate.get_cur_blk4(0).mv[0];
+ let rpic = frame_refs.select_ref_pic(0, 0);
+ let weight = &slice_hdr.get_weight(0, 0);
+ do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp);
+ },
+ MBType::P16x16 => {
+ let mv = sstate.get_cur_blk4(0).mv[0];
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index());
+ do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp);
+ },
+ MBType::P16x8 | MBType::P8x16 => {
+ let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 {
+ (16, 8, 0, 8)
+ } else {
+ (8, 16, 8, 0)
+ };
+ let mv = sstate.get_cur_blk4(0).mv[0];
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index());
+ do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight, mc_dsp);
+ let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0];
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index());
+ do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight, mc_dsp);
+ },
+ MBType::P8x8 | MBType::P8x8Ref0 => {
+ for part in 0..4 {
+ let bx = (part & 1) * 8;
+ let by = (part & 2) * 4;
+ let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0];
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index());
+
+ match mb_info.sub_mb_type[part] {
+ SubMBType::P8x8 => {
+ do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp);
+ },
+ SubMBType::P8x4 => {
+ do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp);
+ let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0];
+ do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp);
+ },
+ SubMBType::P4x8 => {
+ do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
+ let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0];
+ do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
+ },
+ SubMBType::P4x4 => {
+ for sb_no in 0..4 {
+ let sxpos = xpos + bx + (sb_no & 1) * 4;
+ let sypos = ypos + by + (sb_no & 2) * 2;
+ let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4;
+ let mv = sstate.get_cur_blk4(sblk_no).mv[0];
+ do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp);
+ }
+ },
+ _ => unreachable!(),
+ };
+ }
+ },
+ MBType::B16x16(mode) => {
+ let mv0 = sstate.get_cur_blk4(0).mv[0];
+ let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+ let mv1 = sstate.get_cur_blk4(0).mv[1];
+ let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]);
+ do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp);
+ },
+ MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
+ let (pw, ph) = mb_info.mb_type.size();
+ let (px, py) = (pw & 8, ph & 8);
+ let modes = [mode0, mode1];
+ let (mut bx, mut by) = (0, 0);
+ for part in 0..2 {
+ let blk = if part == 0 { 0 } else { (px / 4) + py };
+ let mv0 = sstate.get_cur_blk4(blk).mv[0];
+ let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
+ let mv1 = sstate.get_cur_blk4(blk).mv[1];
+ let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]);
+ do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp);
+ bx += px;
+ by += py;
+ }
+ },
+ MBType::Direct | MBType::BSkip => {
+ let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type;
+ let is_16x16 = colo_mb_type.is_16x16_ref();
+
+ if is_16x16 {
+ let mv = sstate.get_cur_blk4(0).mv;
+ let ref_idx = sstate.get_cur_blk8(0).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+ do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ } else {
+ for blk4 in 0..16 {
+ let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+ let mv = &sstate.get_cur_blk4(blk4).mv;
+ do_b_mc_4x4bi(frm, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, mv, rpic0, &weight0, rpic1, &weight1, mc_dsp);
+ }
+ }
+ sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); });
+ },
+ MBType::B8x8 => {
+ for part in 0..4 {
+ let ridx = sstate.get_cur_blk8(part).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index());
+ let subtype = mb_info.sub_mb_type[part];
+ let blk8 = (part & 1) * 2 + (part & 2) * 4;
+ let mut bx = (part & 1) * 8;
+ let mut by = (part & 2) * 4;
+ match subtype {
+ SubMBType::Direct8x8 => {
+ for blk in 0..4 {
+ let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+ let mv = &sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv;
+ do_b_mc_4x4bi(frm, xpos + bx, ypos + by, mv, rpic0, &weight0, rpic1, &weight1, mc_dsp);
+ bx += 4;
+ if blk == 1 {
+ bx -= 8;
+ by += 4;
+ }
+ }
+ sstate.get_cur_blk8(part).ref_idx[0].set_direct();
+ sstate.get_cur_blk8(part).ref_idx[1].set_direct();
+ },
+ SubMBType::B8x8(mode) => {
+ let mv = sstate.get_cur_blk4(blk8).mv;
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+ do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ },
+ SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => {
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+ let (pw, ph) = subtype.size();
+ let mv = sstate.get_cur_blk4(blk8).mv;
+ do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ let addr2 = blk8 + (pw & 4) / 4 + (ph & 4);
+ let mv = sstate.get_cur_blk4(addr2).mv;
+ do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ },
+ SubMBType::B4x4(mode) => {
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+ for i in 0..4 {
+ let addr2 = blk8 + (i & 1) + (i & 2) * 2;
+ let mv = sstate.get_cur_blk4(addr2).mv;
+ do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ bx += 4;
+ if i == 1 {
+ bx -= 8;
+ by += 4;
+ }
+ }
+ },
+ _ => unreachable!(),
+ };
+ }
+ },
+ };
+ if !mb_info.mb_type.is_skip() {
+ if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
+ add_luma(frm, sstate, mb_info);
+ }
+ add_chroma(frm, sstate, mb_info);
+ }
+}
+
+pub fn wait_for_mb(disp: &Shareable<ThreadDispatcher>, sstate: &SliceState, xpos: usize, ypos: usize, mv: MV, ref_id: u32) -> DecoderResult<()> {
+ let xpos = xpos as isize + ((mv.x >> 2) as isize) + 6;
+ let ypos = ypos as isize + ((mv.y >> 2) as isize) + 6;
+ let dst_mb_x = ((xpos.max(0) as usize) / 16).min(sstate.mb_w - 1);
+ let dst_mb_y = ((ypos.max(0) as usize) / 16).min(sstate.mb_h - 1);
+ let expected_mb = dst_mb_x + dst_mb_y * sstate.mb_w;
+ loop {
+ if let Ok(ds) = disp.read() {
+ match ds.check_pos(ref_id, expected_mb) {
+ FrameDecodingStatus::Ok => return Ok(()),
+ FrameDecodingStatus::NotReady => {},
+ _ => return Err(DecoderError::MissingReference),
+ };
+ }
+ std::thread::yield_now();
+ }
+}
+
+fn wait_b_mc(disp: &Shareable<ThreadDispatcher>, sstate: &SliceState, frame_refs: &SimplifiedSliceRefs, mv: [MV; 2], ref_idx: [PicRef; 2], xpos: usize, ypos: usize, w: usize, h: usize) -> DecoderResult<()> {
+ if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) {
+ wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[0], ref_id)?;
+ }
+ if let Some(ref_id) = frame_refs.get_ref_id(1, ref_idx[1].index()) {
+ wait_for_mb(disp, sstate, xpos + w, ypos + h, mv[1], ref_id)?;
+ }
+ Ok(())
+}
+
+pub fn recon_mb_mt(frm: &mut NASimpleVideoFrame<u16>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &SimplifiedSliceRefs, mc_dsp: &mut H264MC, weight_mode: u8, disp: &Shareable<ThreadDispatcher>) -> DecoderResult<()> {
+ let xpos = sstate.mb_x * 16;
+ let ypos = sstate.mb_y * 16;
+
+ match mb_info.mb_type {
+ MBType::Intra16x16(_, _, _) => {
+ pred_intra(frm, sstate, mb_info);
+ },
+ MBType::Intra4x4 | MBType::Intra8x8 => {
+ pred_intra(frm, sstate, mb_info);
+ },
+ MBType::PCM => {},
+ MBType::PSkip => {
+ let mv = sstate.get_cur_blk4(0).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, 0) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?;
+ }
+ let rpic = frame_refs.select_ref_pic(0, 0);
+ let weight = &slice_hdr.get_weight(0, 0);
+ do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp);
+ },
+ MBType::P16x16 => {
+ let mv = sstate.get_cur_blk4(0).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?;
+ }
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index());
+ do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight, mc_dsp);
+ },
+ MBType::P16x8 | MBType::P8x16 => {
+ let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 {
+ (16, 8, 0, 8)
+ } else {
+ (8, 16, 8, 0)
+ };
+ let mv = sstate.get_cur_blk4(0).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) {
+ wait_for_mb(disp, sstate, xpos + bw, ypos + bh, mv, ref_id)?;
+ }
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index());
+ do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight, mc_dsp);
+ let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[1].index()) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv, ref_id)?;
+ }
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index());
+ do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight, mc_dsp);
+ },
+ MBType::P8x8 | MBType::P8x8Ref0 => {
+ for part in 0..4 {
+ let bx = (part & 1) * 8;
+ let by = (part & 2) * 4;
+ let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0];
+ let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
+ let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index());
+
+ match mb_info.sub_mb_type[part] {
+ SubMBType::P8x8 => {
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
+ wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?;
+ }
+ do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight, mc_dsp);
+ },
+ SubMBType::P8x4 => {
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
+ wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 4, mv, ref_id)?;
+ }
+ do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic, weight, mc_dsp);
+ let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
+ wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?;
+ }
+ do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight, mc_dsp);
+ },
+ SubMBType::P4x8 => {
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
+ wait_for_mb(disp, sstate, xpos + bx + 4, ypos + by + 8, mv, ref_id)?;
+ }
+ do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
+ let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
+ wait_for_mb(disp, sstate, xpos + bx + 8, ypos + by + 8, mv, ref_id)?;
+ }
+ do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight, mc_dsp);
+ },
+ SubMBType::P4x4 => {
+ for sb_no in 0..4 {
+ let sxpos = xpos + bx + (sb_no & 1) * 4;
+ let sypos = ypos + by + (sb_no & 2) * 2;
+ let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4;
+ let mv = sstate.get_cur_blk4(sblk_no).mv[0];
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[part].index()) {
+ wait_for_mb(disp, sstate, sxpos + 4, sypos + 4, mv, ref_id)?;
+ }
+ do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic, weight, mc_dsp);
+ }
+ },
+ _ => unreachable!(),
+ };
+ }
+ },
+ MBType::B16x16(mode) => {
+ let mv0 = sstate.get_cur_blk4(0).mv[0];
+ let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+ let mv1 = sstate.get_cur_blk4(0).mv[1];
+ let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]);
+ wait_b_mc(disp, sstate, frame_refs, [mv0, mv1], [mb_info.ref_l0[0], mb_info.ref_l1[0]], xpos, ypos, 16, 16)?;
+ do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp);
+ },
+ MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
+ let (pw, ph) = mb_info.mb_type.size();
+ let (px, py) = (pw & 8, ph & 8);
+ let modes = [mode0, mode1];
+ let (mut bx, mut by) = (0, 0);
+ for part in 0..2 {
+ let blk = if part == 0 { 0 } else { (px / 4) + py };
+ let mv0 = sstate.get_cur_blk4(blk).mv[0];
+ let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
+ let mv1 = sstate.get_cur_blk4(blk).mv[1];
+ let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]);
+ wait_b_mc(disp, sstate, frame_refs, [mv0, mv1], [mb_info.ref_l0[part], mb_info.ref_l1[part]], xpos + bx, ypos + by, pw, ph)?;
+ do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, mc_dsp);
+ bx += px;
+ by += py;
+ }
+ },
+ MBType::Direct | MBType::BSkip => {
+ if let Some(ref_id) = frame_refs.get_ref_id(1, mb_info.ref_l1[0].index()) {
+ wait_for_mb(disp, sstate, xpos, ypos, ZERO_MV, ref_id)?;
+ }
+ let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type;
+ let is_16x16 = colo_mb_type.is_16x16_ref();
+
+ if is_16x16 {
+ let mv = sstate.get_cur_blk4(0).mv;
+ let ref_idx = sstate.get_cur_blk8(0).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+ if let Some(ref_id) = frame_refs.get_ref_id(0, mb_info.ref_l0[0].index()) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[0], ref_id)?;
+ }
+ if let Some(ref_id) = frame_refs.get_ref_id(1, mb_info.ref_l1[0].index()) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[1], ref_id)?;
+ }
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+ wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos, ypos, 16, 16)?;
+ do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ } else {
+ for blk4 in 0..16 {
+ let mv = sstate.get_cur_blk4(blk4).mv;
+ let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx;
+ if let Some(ref_id) = frame_refs.get_ref_id(0, ref_idx[0].index()) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[0], ref_id)?;
+ }
+ if let Some(ref_id) = frame_refs.get_ref_id(1, ref_idx[1].index()) {
+ wait_for_mb(disp, sstate, xpos + 16, ypos + 16, mv[1], ref_id)?;
+ }
+ let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+ wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4)?;
+ do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ }
+ }
+ sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); });
+ },
+ MBType::B8x8 => {
+ for part in 0..4 {
+ let ridx = sstate.get_cur_blk8(part).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index());
+ let subtype = mb_info.sub_mb_type[part];
+ let blk8 = (part & 1) * 2 + (part & 2) * 4;
+ let mut bx = (part & 1) * 8;
+ let mut by = (part & 2) * 4;
+ match subtype {
+ SubMBType::Direct8x8 => {
+ for blk in 0..4 {
+ let mv = sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv;
+ let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx;
+ let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+ let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+ wait_b_mc(disp, sstate, frame_refs, mv, ref_idx, xpos + bx, ypos + by, 4, 4)?;
+ do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ bx += 4;
+ if blk == 1 {
+ bx -= 8;
+ by += 4;
+ }
+ }
+ sstate.get_cur_blk8(part).ref_idx[0].set_direct();
+ sstate.get_cur_blk8(part).ref_idx[1].set_direct();
+ },
+ SubMBType::B8x8(mode) => {
+ let mv = sstate.get_cur_blk4(blk8).mv;
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+ wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 8, 8)?;
+ do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ },
+ SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => {
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+ let (pw, ph) = subtype.size();
+ let mv = sstate.get_cur_blk4(blk8).mv;
+ wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, pw, ph)?;
+ do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ let addr2 = blk8 + (pw & 4) / 4 + (ph & 4);
+ let mv = sstate.get_cur_blk4(addr2).mv;
+ wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph)?;
+ do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ },
+ SubMBType::B4x4(mode) => {
+ let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+ for i in 0..4 {
+ let addr2 = blk8 + (i & 1) + (i & 2) * 2;
+ let mv = sstate.get_cur_blk4(addr2).mv;
+ wait_b_mc(disp, sstate, frame_refs, mv, ridx, xpos + bx, ypos + by, 4, 4)?;
+ do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, mc_dsp);
+ bx += 4;
+ if i == 1 {
+ bx -= 8;
+ by += 4;
+ }
+ }
+ },
+ _ => unreachable!(),
+ };
+ }
+ },
+ };
+ if !mb_info.mb_type.is_skip() {
+ if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
+ add_luma(frm, sstate, mb_info);
+ }
+ add_chroma(frm, sstate, mb_info);
+ }
+ Ok(())
+}
--- /dev/null
+mod types;
+pub use types::*;
+mod pic_ref;
+pub use pic_ref::*;
+#[allow(clippy::identity_op)]
+#[allow(clippy::erasing_op)]
+#[allow(clippy::many_single_char_names)]
+#[allow(clippy::range_plus_one)]
+mod dsp;
+use dsp::*;
+mod cabac;
+use cabac::*;
+use super::cabac_coder::*;
+mod cavlc;
+use cavlc::*;
+mod loopfilter;
+use loopfilter::*;
+mod mb_recon;
+use mb_recon::*;
+use super::sets::*;
+use super::slice::*;
+
+pub mod decoder_st;
+mod dispatch;
+pub mod decoder_mt;
+
+use super::common_types::*;
+
--- /dev/null
+use nihav_core::codecs::DecoderResult;
+use nihav_core::frame::{FrameType, NAVideoBufferRef, NATimeInfo};
+use nihav_core::refs::*;
+use nihav_codec_support::codecs::MV;
+use super::super::common_types::*;
+use super::super::sets::SeqParameterSet;
+use super::super::slice::*;
+use super::types::*;
+
+#[derive(Clone)]
+pub struct PictureInfo {
+ pub id: u16,
+ pub full_id: u32,
+ pub time: NATimeInfo,
+ pub user_id: u32,
+ pub pic_type: FrameType,
+ pub buf: NAVideoBufferRef<u16>,
+ pub cur_mb: usize,
+ pub is_ref: bool,
+ pub is_idr: bool,
+ pub long_term: Option<usize>,
+
+ pub mv_info: NABufferRef<FrameMV>,
+}
+
+#[derive(Clone,Copy,Default, Debug)]
+pub struct FrameMBInfo {
+ pub mb_type: CompactMBType,
+ pub ref_poc: [[u16; 2]; 4],
+ pub ref_idx: [[PicRef; 2]; 4],
+ pub mv: [[MV; 2]; 16],
+}
+
+impl FrameMBInfo {
+ pub fn new() -> Self { Self::default() }
+}
+
+#[derive(Clone)]
+pub struct FrameMV {
+ pub mbs: Vec<FrameMBInfo>,
+ pub mb_stride: usize,
+}
+
+impl FrameMV {
+ pub fn new(mb_w: usize, mb_h: usize) -> Self {
+ Self {
+ mbs: vec![FrameMBInfo::default(); mb_w * mb_h],
+ mb_stride: mb_w,
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct SliceRefs {
+ pub ref_list0: Vec<Option<PictureInfo>>,
+ pub ref_list1: Vec<Option<PictureInfo>>,
+ pub cur_id: u32,
+}
+
+#[allow(dead_code)]
+impl SliceRefs {
+ pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option<u32> {
+ let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if ref_list.len() > ref_id {
+ ref_list[ref_id].as_ref().map(|pic| pic.full_id)
+ } else {
+ None
+ }
+ }
+ pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option<NAVideoBufferRef<u16>> {
+ let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if ref_list.len() > ref_id {
+ ref_list[ref_id].as_ref().map(|pic| pic.buf.clone())
+ } else {
+ None
+ }
+ }
+ pub fn get_colocated_info(&self, mb_x: usize, mb_y: usize) -> (FrameMBInfo, u16, bool) {
+ if let Some(ref ref_pic) = &self.ref_list1[0] {
+ let mv_info = &ref_pic.mv_info;
+ let mb = mv_info.mbs[mb_x + mb_y * mv_info.mb_stride];
+ (mb, ref_pic.full_id as u16, ref_pic.long_term.is_some())
+ } else {
+ (FrameMBInfo::default(), 0, false)
+ }
+ }
+ pub fn map_ref0(&self, ref0_id: u16) -> (PicRef, bool) {
+ let mut r0_idx = 0;
+ let mut long = false;
+ for (i, rpic0) in self.ref_list0.iter().enumerate() {
+ if let Some(ref pic) = rpic0 {
+ if (pic.full_id as u16) == ref0_id {
+ r0_idx = i as u8;
+ long = pic.long_term.is_some();
+ break;
+ }
+ }
+ }
+ (PicRef::new(r0_idx), long)
+ }
+ pub fn map_refs(&self, ref_idx: [PicRef; 2]) -> [u16; 2] {
+ let r0 = ref_idx[0].index();
+ let r1 = ref_idx[1].index();
+ let ref0 = if r0 < self.ref_list0.len() {
+ if let Some(ref pic) = self.ref_list0[r0] {
+ pic.full_id as u16
+ } else {
+ MISSING_POC
+ }
+ } else {
+ MISSING_POC
+ };
+ let ref1 = if r1 < self.ref_list1.len() {
+ if let Some(ref pic) = self.ref_list1[r1] {
+ pic.full_id as u16
+ } else {
+ MISSING_POC
+ }
+ } else {
+ MISSING_POC
+ };
+ [ref0, ref1]
+ }
+ pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool {
+ if ref1 != ref2 {
+ self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1)
+ } else {
+ true
+ }
+ }
+ fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool {
+ if ref1 == ref2 {
+ true
+ } else {
+ let idx0 = ref1.index();
+ let idx1 = ref2.index();
+ if idx0 == idx1 {
+ return true;
+ }
+ let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if idx0 >= src.len() || idx1 >= src.len() {
+//panic!("wrong refs");
+ return false;
+ }
+ if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) {
+ pic0.full_id == pic1.full_id
+ } else {
+//panic!("missing pics");
+ false
+ }
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct SimplePictureInfo<'a> {
+ pub full_id: u32,
+ pub buf: SimpleFrame<'a>,
+ pub long_term: bool,
+ pub mv_info: &'a FrameMV,
+}
+
+#[derive(Clone)]
+pub struct SimplifiedSliceRefs<'a> {
+ pub ref_list0: Vec<Option<SimplePictureInfo<'a>>>,
+ pub ref_list1: Vec<Option<SimplePictureInfo<'a>>>,
+ pub cur_id: u32,
+}
+
+impl<'a> SimplifiedSliceRefs<'a> {
+ pub fn new(srefs: &'a SliceRefs) -> Self {
+ let mut ref_list0 = Vec::with_capacity(srefs.ref_list0.len());
+ let mut ref_list1 = Vec::with_capacity(srefs.ref_list1.len());
+ for entry in srefs.ref_list0.iter() {
+ ref_list0.push(entry.as_ref().map(|pic| SimplePictureInfo {
+ full_id: pic.full_id,
+ buf: SimpleFrame::new(&pic.buf),
+ long_term: pic.long_term.is_some(),
+ mv_info: &pic.mv_info,
+ }));
+ }
+ for entry in srefs.ref_list1.iter() {
+ ref_list1.push(entry.as_ref().map(|pic| SimplePictureInfo {
+ full_id: pic.full_id,
+ buf: SimpleFrame::new(&pic.buf),
+ long_term: pic.long_term.is_some(),
+ mv_info: &pic.mv_info,
+ }));
+ }
+ Self {
+ cur_id: srefs.cur_id,
+ ref_list0, ref_list1
+ }
+ }
+ pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option<u32> {
+ let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if ref_list.len() > ref_id {
+ ref_list[ref_id].as_ref().map(|pic| pic.full_id)
+ } else {
+ None
+ }
+ }
+ pub fn select_ref_pic(&self, list_id: u8, ref_id: usize) -> Option<&SimpleFrame<'_>> {
+ let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if ref_list.len() > ref_id {
+ ref_list[ref_id].as_ref().map(|pic| &pic.buf)
+ } else {
+ None
+ }
+ }
+ pub fn get_colocated_info(&self, mb_x: usize, mb_y: usize) -> (FrameMBInfo, u16, bool) {
+ if let Some(ref ref_pic) = &self.ref_list1[0] {
+ let mv_info = ref_pic.mv_info;
+ let mb = mv_info.mbs[mb_x + mb_y * mv_info.mb_stride];
+ (mb, ref_pic.full_id as u16, ref_pic.long_term)
+ } else {
+ (FrameMBInfo::default(), 0, false)
+ }
+ }
+ pub fn map_ref0(&self, ref0_id: u16) -> (PicRef, bool) {
+ let mut r0_idx = 0;
+ let mut long = false;
+ for (i, rpic0) in self.ref_list0.iter().enumerate() {
+ if let Some(ref pic) = rpic0 {
+ if (pic.full_id as u16) == ref0_id {
+ r0_idx = i as u8;
+ long = pic.long_term;
+ break;
+ }
+ }
+ }
+ (PicRef::new(r0_idx), long)
+ }
+ pub fn map_refs(&self, ref_idx: [PicRef; 2]) -> [u16; 2] {
+ let r0 = ref_idx[0].index();
+ let r1 = ref_idx[1].index();
+ let ref0 = if r0 < self.ref_list0.len() {
+ if let Some(ref pic) = self.ref_list0[r0] {
+ pic.full_id as u16
+ } else {
+ MISSING_POC
+ }
+ } else {
+ MISSING_POC
+ };
+ let ref1 = if r1 < self.ref_list1.len() {
+ if let Some(ref pic) = self.ref_list1[r1] {
+ pic.full_id as u16
+ } else {
+ MISSING_POC
+ }
+ } else {
+ MISSING_POC
+ };
+ [ref0, ref1]
+ }
+ pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool {
+ if ref1 != ref2 {
+ self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1)
+ } else {
+ true
+ }
+ }
+ fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool {
+ if ref1 == ref2 {
+ true
+ } else {
+ let idx0 = ref1.index();
+ let idx1 = ref2.index();
+ if idx0 == idx1 {
+ return true;
+ }
+ let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 };
+ if idx0 >= src.len() || idx1 >= src.len() {
+//panic!("wrong refs");
+ return false;
+ }
+ if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) {
+ pic0.full_id == pic1.full_id
+ } else {
+//panic!("missing pics");
+ false
+ }
+ }
+ }
+}
+
+pub struct FrameRefs {
+ pub ref_pics: Vec<PictureInfo>,
+ pub cur_refs: SliceRefs,
+ pub long_term: Vec<Option<PictureInfo>>,
+
+ prev_poc_msb: u32,
+ prev_poc_lsb: u16,
+ prev_ref_poc_lsb: u16,
+ prev_frame_num: u16,
+ frame_num_offset: u32,
+ max_frame_num: i32,
+}
+
+impl FrameRefs {
+ pub fn new() -> Self {
+ Self {
+ ref_pics: Vec::with_capacity(16),
+ cur_refs: SliceRefs {
+ ref_list0: Vec::with_capacity(3),
+ ref_list1: Vec::with_capacity(3),
+ cur_id: 0,
+ },
+ long_term: Vec::new(),
+
+ prev_poc_msb: 0,
+ prev_poc_lsb: 0,
+ prev_ref_poc_lsb: 0,
+ prev_frame_num: 0,
+ frame_num_offset: 0,
+ max_frame_num: 0,
+ }
+ }
+ pub fn fill_ref_nums(&self, dst: &mut Vec<u32>) {
+ for pic in self.ref_pics.iter() {
+ if !dst.contains(&pic.full_id) {
+ dst.push(pic.full_id);
+ }
+ }
+ for pic in self.long_term.iter().flatten() {
+ if !dst.contains(&pic.full_id) {
+ dst.push(pic.full_id);
+ }
+ }
+ }
+ pub fn calc_picture_num(&mut self, slice_hdr: &SliceHeader, is_idr: bool, ref_id: u8, sps: &SeqParameterSet) -> u32 {
+ self.max_frame_num = 1 << sps.log2_max_frame_num;
+ match sps.pic_order_cnt_type {
+ 0 => {
+ if is_idr {
+ //self.prev_poc_msb = 0;
+ self.prev_poc_lsb = 0;
+ } else {
+ self.prev_poc_lsb = self.prev_ref_poc_lsb;
+ }
+ let max_poc_lsb = 1 << sps.log2_max_pic_order_cnt_lsb;
+ let half_max_poc_lsb = 1 << (sps.log2_max_pic_order_cnt_lsb - 1);
+ let cur_lsb = slice_hdr.pic_order_cnt_lsb;
+ let poc_msb = if cur_lsb < self.prev_poc_lsb && (self.prev_poc_lsb - cur_lsb >= half_max_poc_lsb) {
+ self.prev_poc_msb + max_poc_lsb
+ } else if cur_lsb > self.prev_poc_lsb && (cur_lsb - self.prev_poc_lsb > half_max_poc_lsb) {
+ self.prev_poc_msb.wrapping_sub(max_poc_lsb)
+ } else {
+ self.prev_poc_msb
+ };
+ let poc = poc_msb + u32::from(cur_lsb);
+ if ref_id != 0 {
+ self.prev_ref_poc_lsb = slice_hdr.pic_order_cnt_lsb;
+ self.prev_poc_msb = poc_msb;
+ }
+ poc
+ },
+ 1 => {
+ let off = if self.prev_frame_num > slice_hdr.frame_num {
+ self.frame_num_offset + (1 << sps.log2_max_frame_num)
+ } else {
+ self.frame_num_offset
+ };
+ let mut anum = if sps.num_ref_frames_in_pic_order_cnt_cycle != 0 {
+ (off as i32) + i32::from(slice_hdr.frame_num)
+ } else {
+ 0
+ };
+ if ref_id == 0 && anum > 0 {
+ anum -= 1;
+ }
+ let (poc_cycle_cnt, fno_in_poc_cycle) = if anum > 0 {
+ let nrf = sps.num_ref_frames_in_pic_order_cnt_cycle as i32;
+ ((anum - 1) / nrf, (anum - 1) % nrf)
+ } else {
+ (0, 0)
+ };
+ let mut expected_delta = 0;
+ for &offset in sps.offset_for_ref_frame[..sps.num_ref_frames_in_pic_order_cnt_cycle].iter() {
+ expected_delta += offset;
+ }
+ let mut expected_poc = if anum > 0 {
+ let mut sum = poc_cycle_cnt * expected_delta;
+ for &offset in sps.offset_for_ref_frame[..=fno_in_poc_cycle as usize].iter() {
+ sum += offset;
+ }
+ sum
+ } else {
+ 0
+ };
+ if ref_id == 0 {
+ expected_poc += sps.offset_for_non_ref_pic;
+ }
+ let (top_id, _bottom_id) = if !slice_hdr.field_pic {
+ let top_id = expected_poc + slice_hdr.delta_pic_order_cnt[0];
+ let bot_id = top_id + sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1];
+ (top_id, bot_id)
+ } else if !slice_hdr.bottom_field {
+ (expected_poc + slice_hdr.delta_pic_order_cnt[0], 0)
+ } else {
+ (0, sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1])
+ };
+ self.prev_frame_num = slice_hdr.frame_num;
+ self.frame_num_offset = off;
+ top_id as u32
+ },
+ _ => {
+ if slice_hdr.frame_num < self.prev_frame_num {
+ self.frame_num_offset += 1 << sps.log2_max_frame_num;
+ }
+ self.prev_frame_num = slice_hdr.frame_num;
+ self.frame_num_offset + u32::from(slice_hdr.frame_num)
+ },
+ }
+ }
+ pub fn apply_adaptive_marking(&mut self, marking: &AdaptiveMarking, cur_id: u16, max_id_mask: u16) -> DecoderResult<()> {
+ let all_ref_pics = self.ref_pics.clone();
+
+ for (&op, (&arg1, &arg2)) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter().zip(marking.operation_arg2.iter())).take(marking.num_ops) {
+ match op {
+ 1 => {
+ let src_id = cur_id.wrapping_sub(arg1) & max_id_mask;
+ let mut found = false;
+ let mut idx = 0;
+ for (i, pic) in self.ref_pics.iter().enumerate() {
+ if pic.id == src_id {
+ found = true;
+ idx = i;
+ break;
+ }
+ }
+ if found {
+ self.ref_pics.remove(idx);
+ }
+ },
+ 2 => { // mark long term picture as unused
+ let idx = arg1 as usize;
+ if idx < self.long_term.len() {
+ self.long_term[idx] = None;
+ }
+ },
+ 3 => {
+ let src_id = cur_id.wrapping_sub(arg1) & max_id_mask;
+
+ let didx = arg2 as usize;
+ for pic in all_ref_pics.iter() {
+ if pic.id == src_id {
+ if didx < self.long_term.len() {
+ self.long_term[didx] = Some(pic.clone());
+ }
+ break;
+ }
+ }
+ },
+ 4 => {
+ self.long_term.resize(arg1 as usize, None);
+ },
+ 5 => {
+ self.ref_pics.clear();
+ self.long_term.clear();
+ },
+ 6 => {
+ // assign an long term index to current pic - done elsewhere
+ },
+ _ => {},
+ };
+ }
+ Ok(())
+ }
+ pub fn clear_refs(&mut self) {
+ self.ref_pics.clear();
+ self.long_term.clear();
+ }
+ #[allow(clippy::cognitive_complexity)]
+ pub fn select_refs(&mut self, sps: &SeqParameterSet, slice_hdr: &SliceHeader, cur_id: u32) {
+ self.cur_refs.cur_id = cur_id;
+ self.cur_refs.ref_list0.clear();
+ self.cur_refs.ref_list1.clear();
+ let pic_num_mask = if sps.log2_max_frame_num == 16 {
+ 0xFFFF
+ } else {
+ (1 << sps.log2_max_frame_num) - 1
+ };
+
+ if !slice_hdr.slice_type.is_intra() {
+ let has_reordering = slice_hdr.ref_pic_list_reordering_l0;
+ if !has_reordering {
+ let num_ref = slice_hdr.num_ref_idx_l0_active;
+ if slice_hdr.slice_type.is_p() {
+ if !self.ref_pics.is_empty() {
+ for pic in self.ref_pics.iter().rev().take(num_ref) {
+ self.cur_refs.ref_list0.push(Some(pic.clone()));
+ }
+ }
+ } else {
+ let mut pivot = 0;
+ for (i, pic) in self.ref_pics.iter().enumerate() {
+ pivot = i;
+ if pic.full_id > cur_id {
+ break;
+ }
+ }
+ for pic in self.ref_pics[..pivot].iter().rev() {
+ if self.cur_refs.ref_list0.len() >= num_ref {
+ break;
+ }
+ self.cur_refs.ref_list0.push(Some(pic.clone()));
+ }
+ for pic in self.ref_pics.iter().skip(pivot) {
+ if self.cur_refs.ref_list0.len() >= num_ref {
+ break;
+ }
+ self.cur_refs.ref_list0.push(Some(pic.clone()));
+ }
+ }
+ if !self.long_term.is_empty() && self.cur_refs.ref_list0.len() < num_ref {
+ let copy_size = num_ref - self.cur_refs.ref_list0.len();
+ for ltpic in self.long_term.iter().take(copy_size) {
+ self.cur_refs.ref_list0.push(ltpic.clone());
+ }
+ }
+ } else {
+ form_ref_list(&mut self.cur_refs.ref_list0,
+ &self.ref_pics, &self.long_term,
+ &slice_hdr.reordering_list_l0,
+ slice_hdr.frame_num, pic_num_mask);
+ }
+ if slice_hdr.slice_type.is_b() {
+ let has_reordering = slice_hdr.ref_pic_list_reordering_l1;
+ if !has_reordering {
+ let num_ref = slice_hdr.num_ref_idx_l1_active;
+ let mut pivot = 0;
+ for (i, pic) in self.ref_pics.iter().enumerate() {
+ pivot = i;
+ if pic.full_id > cur_id {
+ break;
+ }
+ }
+ for pic in self.ref_pics.iter().skip(pivot) {
+ if self.cur_refs.ref_list1.len() >= num_ref {
+ break;
+ }
+ self.cur_refs.ref_list1.push(Some(pic.clone()));
+ }
+ for pic in self.ref_pics[..pivot].iter().rev() {
+ if self.cur_refs.ref_list1.len() >= num_ref {
+ break;
+ }
+ self.cur_refs.ref_list1.push(Some(pic.clone()));
+ }
+ if !self.long_term.is_empty() && self.cur_refs.ref_list1.len() < num_ref {
+ let copy_size = num_ref - self.cur_refs.ref_list1.len();
+ for ltpic in self.long_term.iter().take(copy_size) {
+ self.cur_refs.ref_list1.push(ltpic.clone());
+ }
+ }
+ if self.cur_refs.ref_list1.len() > 1 && self.cur_refs.ref_list0.len() == self.cur_refs.ref_list1.len() {
+ let mut equal = true;
+ for (pic1, pic2) in self.cur_refs.ref_list0.iter().zip(self.cur_refs.ref_list1.iter()) {
+ match (pic1, pic2) {
+ (Some(p1), Some(p2)) => {
+ if p1.full_id != p2.full_id {
+ equal = false;
+ break;
+ }
+ },
+ (None, None) => {},
+ _ => {
+ equal = false;
+ break;
+ },
+ };
+ }
+ if equal {
+ self.cur_refs.ref_list1.swap(0, 1);
+ }
+ }
+ } else {
+ form_ref_list(&mut self.cur_refs.ref_list1,
+ &self.ref_pics, &self.long_term,
+ &slice_hdr.reordering_list_l1,
+ slice_hdr.frame_num, pic_num_mask);
+ }
+ }
+ }
+ }
+ pub fn add_short_term(&mut self, cpic: PictureInfo, num_ref_frames: usize) {
+ if !self.ref_pics.is_empty() && self.ref_pics.len() >= num_ref_frames {
+ let base_id = i32::from(cpic.id);
+ let mut min_id = base_id;
+ let mut min_idx = 0;
+ for (i, pic) in self.ref_pics.iter().enumerate() {
+ let mut pic_id = i32::from(pic.id);
+ if pic_id > base_id {
+ pic_id -= self.max_frame_num;
+ }
+ if pic_id < min_id {
+ min_id = pic_id;
+ min_idx = i;
+ }
+ }
+ self.ref_pics.remove(min_idx);
+ }
+ if self.ref_pics.is_empty() || self.ref_pics.last().unwrap().full_id < cpic.full_id {
+ self.ref_pics.push(cpic);
+ } else {
+ let mut idx = 0;
+ for (i, pic) in self.ref_pics.iter().enumerate() {
+ if pic.full_id < cpic.full_id {
+ idx = i;
+ } else {
+ break;
+ }
+ }
+ self.ref_pics.insert(idx + 1, cpic);
+ }
+ }
+ pub fn add_long_term(&mut self, lt_idx: usize, cpic: PictureInfo) {
+ if lt_idx < self.long_term.len() {
+ self.long_term[lt_idx] = Some(cpic);
+ }
+ }
+}
+
+fn form_ref_list(ref_list: &mut Vec<Option<PictureInfo>>, ref_pics: &[PictureInfo], long_term: &[Option<PictureInfo>], reord_info: &ReorderingInfo, cur_id: u16, pic_num_mask: u16) {
+ let mut ref_pic_id = cur_id;
+ for (&op, &num) in reord_info.reordering_of_pic_nums_idc.iter().zip(reord_info.abs_diff_or_num.iter()).take(reord_info.num_ops) {
+ if op < 2 {
+ if op == 0 {
+ ref_pic_id = ref_pic_id.wrapping_sub(num) & pic_num_mask;
+ } else {
+ ref_pic_id = ref_pic_id.wrapping_add(num) & pic_num_mask;
+ }
+ let mut found = false;
+ for pic in ref_pics.iter() {
+ if pic.id == ref_pic_id {
+ ref_list.push(Some(pic.clone()));
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ ref_list.push(None);
+ }
+ } else {
+ let idx = num as usize;
+ if idx < long_term.len() {
+ ref_list.push(long_term[idx].clone());
+ } else {
+ ref_list.push(None);
+ }
+ }
+ }
+}
--- /dev/null
+use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame};
+use nihav_codec_support::codecs::{MV, ZERO_MV};
+use super::SimplifiedSliceRefs;
+use super::pic_ref::FrameMBInfo;
+use super::super::common_types::*;
+
+#[derive(Clone,Copy)]
+pub struct SimpleFrame<'a> {
+ pub data: &'a [u16],
+ pub offset: [usize; 3],
+ pub stride: [usize; 3],
+}
+
+impl<'a> SimpleFrame<'a> {
+ pub fn new(buf: &'a NAVideoBuffer<u16>) -> Self {
+ let mut offset = [0; 3];
+ let mut stride = [0; 3];
+ for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() {
+ *offs = buf.get_offset(plane);
+ *strd = buf.get_stride(plane);
+ }
+ Self {
+ data: buf.get_data(),
+ offset, stride
+ }
+ }
+}
+
+pub type SliceState = SliceStateCommon<u16>;
+
+impl SliceState {
+ pub fn new() -> Self {
+ let mut obj = SliceState::new_default();
+ obj.def_fill = 0;
+ obj
+ }
+ pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame<u16>) {
+ let dstoff = self.mb_x * 16;
+ let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0];
+ self.left_y[0] = self.top_line_y[dstoff + 15];
+ self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]);
+ for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) {
+ *dst = src[15];
+ }
+ for chroma in 0..2 {
+ let cstride = frm.stride[chroma + 1];
+ let dstoff = self.mb_x * 8;
+ let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride;
+ self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7];
+ self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]);
+ for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) {
+ *dst = src[7];
+ }
+ }
+ }
+ pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) {
+ if deblock_mode == 1 {
+ return;
+ }
+
+ self.deblock = [0; 16];
+
+ let tx8x8 = self.get_cur_mb().transform_8x8;
+
+ let cur_intra = self.get_cur_mb().mb_type.is_intra();
+ let left_intra = self.get_left_mb().mb_type.is_intra();
+ let mut top_intra = self.get_top_mb().mb_type.is_intra();
+
+ let mut coded_cache = [false; 25];
+ let mut mvc = MVCache::default();
+ let mv_cache = &mut mvc.data;
+ let mut ref_cache = [[INVALID_REF; 2]; 25];
+
+ if self.mb_y != 0 || self.has_top {
+ for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() {
+ let blk4 = self.get_top_blk4(x);
+ *cc = blk4.ncoded != 0;
+ *mv = blk4.mv;
+ if (x & 1) == 0 {
+ let blk8 = self.get_top_blk8(x / 2);
+ ref_cache[x + 1] = blk8.ref_idx;
+ } else {
+ ref_cache[x + 1] = ref_cache[x];
+ }
+ }
+ }
+ for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip(
+ mv_cache[5..].chunks_exact_mut(5)).enumerate() {
+ if self.has_left || self.mb_x != 0 {
+ let blk4 = self.get_left_blk4(y * 4);
+ ccs[0] = blk4.ncoded != 0;
+ mvs[0] = blk4.mv;
+ if (y & 1) == 0 {
+ let blk8 = self.get_left_blk8(y);
+ ref_cache[y * 5 + 5] = blk8.ref_idx;
+ } else {
+ ref_cache[y * 5 + 5] = ref_cache[y * 5];
+ }
+ }
+ for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() {
+ let blk4 = self.get_cur_blk4(x + y * 4);
+ *cc = blk4.ncoded != 0;
+ *mv = blk4.mv;
+ ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) {
+ self.get_cur_blk8(x / 2 + y).ref_idx
+ } else {
+ ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5]
+ };
+ }
+ }
+
+ for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in
+ coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip(
+ mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip(
+ ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() {
+ let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2));
+ if can_do_top && (!tx8x8 || (y & 1) == 0) {
+ if is_s || cur_intra || top_intra {
+ let val = if y == 0 { 0x40 } else { 0x30 };
+ for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; }
+ } else {
+ for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in
+ cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip(
+ cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip(
+ cur_refs[1..].iter().zip(
+ top_refs[1..].iter())).take(4).enumerate() {
+ let mut mask = 0;
+ if cur_cc || top_cc {
+ mask = 0x20;
+ } else {
+ if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) {
+ mask = 0x10;
+ }
+ }
+ if mask != 0 {
+ self.deblock[y * 4 + x] = mask;
+ }
+ }
+ }
+ }
+ let mut lleft_intra = left_intra;
+ for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in
+ cur_ccs[1..].iter().zip(cur_ccs.iter()).zip(
+ cur_mvs[1..].iter().zip(cur_mvs.iter())).zip(
+ cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() {
+ let skip_8 = tx8x8 && (x & 1) != 0;
+ let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2);
+ if !can_do_left {
+ continue;
+ }
+ let mut mask = 0;
+ if skip_8 {
+ } else if is_s || cur_intra || lleft_intra {
+ mask = if x == 0 { 4 } else { 3 };
+ } else if cur_cc || left_cc {
+ mask = 2;
+ } else {
+ if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) {
+ mask = 1;
+ }
+ }
+ if mask != 0 {
+ self.deblock[y * 4 + x] |= mask;
+ }
+ lleft_intra = cur_intra;
+ }
+ top_intra = cur_intra;
+ }
+ }
+ pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
+ let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
+ if direct_8x8 {
+ for blk4 in 0..16 {
+ let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]);
+ self.get_cur_blk4(blk4).mv = [mv0, mv1];
+ self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
+ }
+ } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv {
+ let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0);
+ self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]);
+ self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]);
+ } else {
+ for blk4 in 0..16 {
+ let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4);
+ self.get_cur_blk4(blk4).mv = [mv0, mv1];
+ self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
+ }
+ }
+ }
+ pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
+ let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
+ let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
+ let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
+ self.get_cur_blk4(blk4).mv = [mv0, mv1];
+ self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
+ }
+ #[allow(clippy::nonminimal_bool)]
+ pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
+ let blk8 = blk4_to_blk8(blk4);
+ let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
+ (ZERO_MV, MISSING_POC, MISSING_REF)
+ } else if mbi.ref_poc[blk8][0] != MISSING_POC {
+ (mbi.mv[blk4][0], mbi.ref_poc[blk8][0], mbi.ref_idx[blk8][0])
+ } else {
+ (mbi.mv[blk4][1], mbi.ref_poc[blk8][1], mbi.ref_idx[blk8][1])
+ };
+ let (col_ref, r0_long) = frame_refs.map_ref0(r0_poc);
+ if temporal_mv {
+ let td = (i32::from(r1_poc) - i32::from(r0_poc)).clamp(-128, 127);
+ if r0_long || td == 0 {
+ (col_mv, col_ref, ZERO_MV, ZERO_REF)
+ } else {
+ let tx = (16384 + (td / 2).abs()) / td;
+ let tb = (i32::from(cur_id) - i32::from(r0_poc)).clamp(-128, 127);
+ let scale = ((tb * tx + 32) >> 6).clamp(-1024, 1023);
+ let mv0 = MV {
+ x: ((i32::from(col_mv.x) * scale + 128) >> 8) as i16,
+ y: ((i32::from(col_mv.y) * scale + 128) >> 8) as i16,
+ };
+ let mv1 = mv0 - col_mv;
+ (mv0, col_ref, mv1, ZERO_REF)
+ }
+ } else {
+ let blk4 = 0; // we generate the same MV prediction for the whole MB
+ let blk8 = blk4_to_blk8(blk4);
+ let midx = self.get_cur_blk4_idx(blk4);
+ let ridx = self.get_cur_blk8_idx(blk8);
+ let ridx_c = self.get_cur_blk8_idx(blk8) + 16 / 8 - self.blk8.stride;
+
+ let mv_a = self.blk4.data[midx - 1].mv;
+ let mv_b = self.blk4.data[midx - self.blk4.stride].mv;
+ let mut mv_c = self.blk4.data[midx - self.blk4.stride + 16 / 4].mv;
+
+ let ref_a = self.blk8.data[ridx - 1].ref_idx;
+ let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx;
+ let mut ref_c = self.blk8.data[ridx_c].ref_idx;
+
+ if ref_c == [MISSING_REF; 2] {
+ mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv;
+ ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx;
+ }
+ let mut refs = [INVALID_REF; 2];
+ for cur_ref in [ref_a, ref_b, ref_c].iter() {
+ refs[0] = refs[0].min_pos(cur_ref[0]);
+ refs[1] = refs[1].min_pos(cur_ref[1]);
+ }
+ if refs == [INVALID_REF; 2] {
+ return (ZERO_MV, ZERO_REF, ZERO_MV, ZERO_REF);
+ }
+
+ let mut col_zero = true;
+ if r1_long || col_idx != ZERO_REF {
+ col_zero = false;
+ }
+ if col_mv.x.abs() > 1 || col_mv.y.abs() > 1 {
+ col_zero = false;
+ }
+ let mut mvs = [ZERO_MV; 2];
+ for ref_l in 0..2 {
+ if mbi.mb_type.is_intra() || (!refs[ref_l].not_avail() && !(refs[ref_l] == ZERO_REF && col_zero)) {
+ let ref_idx = refs[ref_l];
+ mvs[ref_l] = if ref_b[ref_l] == MISSING_REF && ref_c[ref_l] == MISSING_REF {
+ mv_a[ref_l]
+ } else {
+ let count = ((ref_a[ref_l] == ref_idx) as u8) + ((ref_b[ref_l] == ref_idx) as u8) + ((ref_c[ref_l] == ref_idx) as u8);
+ if count == 1 {
+ if ref_a[ref_l] == ref_idx {
+ mv_a[ref_l]
+ } else if ref_b[ref_l] == ref_idx {
+ mv_b[ref_l]
+ } else {
+ mv_c[ref_l]
+ }
+ } else {
+ MV::pred(mv_a[ref_l], mv_b[ref_l], mv_c[ref_l])
+ }
+ };
+ }
+ }
+ (mvs[0], refs[0], mvs[1], refs[1])
+ }
+ }
+}
use nihav_codec_support::codecs::{MV, ZERO_MV};
mod baseline;
+mod high;
+
mod cabac_coder;
mod common_types;
use common_types::*;
off
}
+fn is_high_bitdepth(edata: &[u8]) -> DecoderResult<bool> {
+ if edata.len() > 11 && &edata[0..4] == b"avcC" {
+ let mut br = MemoryReader::new_read(edata);
+ let mut nal_buf = Vec::new();
+
+ br.read_skip(4)?;
+ let version = br.read_byte()?;
+ validate!(version == 1);
+ let profile = br.read_byte()?;
+ let _compatibility = br.read_byte()?;
+ let _level = br.read_byte()?;
+ let _b = br.read_byte()?;
+ //validate!((b & 0xFC) == 0xFC);
+ //self.nal_len = (b & 3) + 1;
+ let b = br.read_byte()?;
+ //validate!((b & 0xE0) == 0xE0);
+ let num_sps = (b & 0x1F) as usize;
+ for _ in 0..num_sps {
+ let len = br.read_u16be()? as usize;
+ let offset = br.tell() as usize;
+ validate!((br.peek_byte()? & 0x1F) == 7);
+ let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+ br.read_skip(len)?;
+ let sps = sets::parse_sps(&nal_buf[1..])?;
+ if sps.bit_depth_luma > 8 {
+ return Ok(true);
+ }
+ }
+ let num_pps = br.read_byte()? as usize;
+ for _ in 0..num_pps {
+ let len = br.read_u16be()? as usize;
+ let offset = br.tell() as usize;
+ validate!((br.peek_byte()? & 0x1F) == 8);
+ let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+ br.read_skip(len)?;
+ }
+ if br.left() > 0 {
+ match profile {
+ 100 | 110 | 122 | 144 => {
+ let b = br.read_byte()?;
+ // some encoders put something different here
+ if (b & 0xFC) != 0xFC {
+ return Ok(false);
+ }
+ // b & 3 -> chroma format
+ let b = br.read_byte()?;
+ validate!((b & 0xF8) == 0xF8);
+ // b & 7 -> luma depth minus 8
+ let b = br.read_byte()?;
+ validate!((b & 0xF8) == 0xF8);
+ // b & 7 -> chroma depth minus 8
+ let num_spsext = br.read_byte()? as usize;
+ for _ in 0..num_spsext {
+ let len = br.read_u16be()? as usize;
+ // parse spsext
+ br.read_skip(len)?;
+ }
+ },
+ _ => {},
+ };
+ }
+ }
+ Ok(false)
+}
+
struct STDecoderWrapper {
h264: Box<baseline::decoder_st::H264Decoder>,
+ h264_hi: Option<Box<high::decoder_st::H264Decoder>>,
}
impl NADecoder for STDecoderWrapper {
fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef) -> DecoderResult<()> {
- self.h264.init(supp, info)
+ let is_high_depth = if let Some(ref edata) = info.get_extradata() {
+ is_high_bitdepth(edata).unwrap_or(false)
+ } else {
+ false
+ };
+ if !is_high_depth {
+ self.h264.init(supp, info)
+ } else {
+ let mut h264_hi = Box::new(high::decoder_st::H264Decoder::new());
+ h264_hi.init(supp, info)?;
+ self.h264_hi = Some(h264_hi);
+ Ok(())
+ }
}
fn decode(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket) -> DecoderResult<NAFrameRef> {
- self.h264.decode(supp, pkt)
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.decode(supp, pkt)
+ } else {
+ self.h264.decode(supp, pkt)
+ }
}
fn flush(&mut self) {
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.flush()
+ }
self.h264.flush();
}
}
impl NAOptionHandler for STDecoderWrapper {
fn get_supported_options(&self) -> &[NAOptionDefinition] {
- self.h264.get_supported_options()
+ if let Some(ref h264_hi) = self.h264_hi {
+ h264_hi.get_supported_options()
+ } else {
+ self.h264.get_supported_options()
+ }
}
fn set_options(&mut self, options: &[NAOption]) {
- self.h264.set_options(options);
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.set_options(options);
+ } else {
+ self.h264.set_options(options);
+ }
}
fn query_option_value(&self, name: &str) -> Option<NAValue> {
- self.h264.query_option_value(name)
+ if let Some(ref h264_hi) = self.h264_hi {
+ h264_hi.query_option_value(name)
+ } else {
+ self.h264.query_option_value(name)
+ }
}
}
pub fn get_decoder() -> Box<dyn NADecoder + Send> {
Box::new(STDecoderWrapper {
h264: Box::new(baseline::decoder_st::H264Decoder::new()),
+ h264_hi: None,
})
}
struct MTDecoderWrapper {
h264: Box<baseline::decoder_mt::H264MTDecoder>,
+ h264_hi: Option<Box<high::decoder_mt::H264MTDecoder>>,
}
impl NADecoderMT for MTDecoderWrapper {
fn init(&mut self, supp: &mut NADecoderSupport, info: NACodecInfoRef, nthreads: usize) -> DecoderResult<()> {
- self.h264.init(supp, info, nthreads)
+ let is_high_depth = if let Some(ref edata) = info.get_extradata() {
+ is_high_bitdepth(edata).unwrap_or(false)
+ } else {
+ false
+ };
+ if !is_high_depth {
+ self.h264.init(supp, info, nthreads)
+ } else {
+ let mut h264_hi = Box::new(high::decoder_mt::H264MTDecoder::new());
+ h264_hi.init(supp, info, nthreads)?;
+ self.h264_hi = Some(h264_hi);
+ Ok(())
+ }
}
fn can_take_input(&mut self) -> bool {
- self.h264.can_take_input()
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.can_take_input()
+ } else {
+ self.h264.can_take_input()
+ }
}
fn queue_pkt(&mut self, supp: &mut NADecoderSupport, pkt: &NAPacket, user_id: u32) -> DecoderResult<bool> {
- self.h264.queue_pkt(supp, pkt, user_id)
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.queue_pkt(supp, pkt, user_id)
+ } else {
+ self.h264.queue_pkt(supp, pkt, user_id)
+ }
}
fn has_output(&mut self) -> bool {
- self.h264.has_output()
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.has_output()
+ } else {
+ self.h264.has_output()
+ }
}
fn get_frame(&mut self) -> (DecoderResult<NAFrameRef>, u32) {
- self.h264.get_frame()
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.get_frame()
+ } else {
+ self.h264.get_frame()
+ }
}
fn flush(&mut self) {
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.flush();
+ }
self.h264.flush();
}
}
impl NAOptionHandler for MTDecoderWrapper {
fn get_supported_options(&self) -> &[NAOptionDefinition] {
- self.h264.get_supported_options()
+ if let Some(ref h264_hi) = self.h264_hi {
+ h264_hi.get_supported_options()
+ } else {
+ self.h264.get_supported_options()
+ }
}
fn set_options(&mut self, options: &[NAOption]) {
- self.h264.set_options(options);
+ if let Some(ref mut h264_hi) = self.h264_hi {
+ h264_hi.set_options(options);
+ } else {
+ self.h264.set_options(options);
+ }
}
fn query_option_value(&self, name: &str) -> Option<NAValue> {
- self.h264.query_option_value(name)
+ if let Some(ref h264_hi) = self.h264_hi {
+ h264_hi.query_option_value(name)
+ } else {
+ self.h264.query_option_value(name)
+ }
}
}
pub fn get_decoder_mt() -> Box<dyn NADecoderMT + Send> {
Box::new(MTDecoderWrapper {
h264: Box::new(baseline::decoder_mt::H264MTDecoder::new()),
+ h264_hi: None,
})
}
[0x26078d38, 0xf6a59d57, 0xcd14eaf8, 0x8eb08259],
[0x31494337, 0x6f8d3f52, 0x4bc9ff92, 0x0c601b1c]]));
}
+ // a sample from libav FATE suite
+ #[test]
+ fn test_h264_10_bit() {
+ let mut dmx_reg = RegisteredDemuxers::new();
+ dmx_reg.add_demuxer(&RawH264DemuxerCreator{});
+ generic_register_all_demuxers(&mut dmx_reg);
+ let mut dec_reg = RegisteredDecoders::new();
+ itu_register_all_decoders(&mut dec_reg);
+ test_decoding("rawh264", "h264", "assets/ITU/normal-10.h264",
+ None, &dmx_reg, &dec_reg,
+ ExpectedTestResult::MD5Frames(vec![
+ [0x50aa3b23, 0x62021b6d, 0x5a4ed176, 0x309bbeb2],
+ [0xdc632b62, 0x5a588c84, 0x7a5f8537, 0xc82e356b],
+ [0x0be7e536, 0xf54f8a6f, 0xe6ed1bb5, 0xf10ee116],
+ [0xcb69cc4d, 0xfe8cccab, 0x161c3687, 0x7bf5de8b],
+ [0x36667f83, 0x78468cab, 0xa378f86b, 0x9358a3f2],
+ [0x31a31140, 0xb8069e3e, 0x22d90a05, 0x0b3f8bff],
+ [0xb5f144a8, 0xb56ac2d6, 0x0bcae310, 0xebc3042e],
+ [0x324a14ce, 0x87bc9e89, 0x312bc02f, 0x9c6e2b0b],
+ [0xa8e10c16, 0xa85de23e, 0xd8e8e47e, 0xd00a9fd9],
+ [0x0b626c0f, 0x9ab5212d, 0x98728b97, 0xb8ce84a0]]));
+ }
+ #[test]
+ fn test_h264_10bit_mt() {
+ let mut dmx_reg = RegisteredDemuxers::new();
+ dmx_reg.add_demuxer(&RawH264DemuxerCreator{});
+ generic_register_all_demuxers(&mut dmx_reg);
+ let mut dec_reg = RegisteredMTDecoders::new();
+ itu_register_all_mt_decoders(&mut dec_reg);
+ test_mt_decoding("rawh264", "h264", "assets/ITU/normal-10.h264",
+ None, &dmx_reg, &dec_reg,
+ ExpectedTestResult::MD5Frames(vec![
+ [0x50aa3b23, 0x62021b6d, 0x5a4ed176, 0x309bbeb2],
+ [0xdc632b62, 0x5a588c84, 0x7a5f8537, 0xc82e356b],
+ [0x0be7e536, 0xf54f8a6f, 0xe6ed1bb5, 0xf10ee116],
+ [0xcb69cc4d, 0xfe8cccab, 0x161c3687, 0x7bf5de8b],
+ [0x36667f83, 0x78468cab, 0xa378f86b, 0x9358a3f2],
+ [0x31a31140, 0xb8069e3e, 0x22d90a05, 0x0b3f8bff],
+ [0xb5f144a8, 0xb56ac2d6, 0x0bcae310, 0xebc3042e],
+ [0x324a14ce, 0x87bc9e89, 0x312bc02f, 0x9c6e2b0b],
+ [0xa8e10c16, 0xa85de23e, 0xd8e8e47e, 0xd00a9fd9],
+ [0x0b626c0f, 0x9ab5212d, 0x98728b97, 0xb8ce84a0]]));
+ }
}
pub const I4X4_SCAN: [(u8, u8); 16] = [