X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=nihav-itu%2Fsrc%2Fcodecs%2Fh264%2Ftypes.rs;h=4bcdb4943547318d09df4c858f7412fa7aedd080;hb=fe64781def821c3900abf44bdfbb38f3b3d21345;hp=6b5d0100c5810583ce9faa24c0bb5f94bab998d9;hpb=4a1ca15c9298340e8f2cb2ad6016109375cc45b9;p=nihav.git diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs index 6b5d010..4bcdb49 100644 --- a/nihav-itu/src/codecs/h264/types.rs +++ b/nihav-itu/src/codecs/h264/types.rs @@ -1,6 +1,30 @@ +use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame}; use nihav_codec_support::codecs::{MV, ZERO_MV}; use nihav_codec_support::data::GenericCache; -use super::FrameRefs; +use super::SimplifiedSliceRefs; +use super::pic_ref::FrameMBInfo; + +#[derive(Clone,Copy)] +pub struct SimpleFrame<'a> { + pub data: &'a [u8], + pub offset: [usize; 3], + pub stride: [usize; 3], +} + +impl<'a> SimpleFrame<'a> { + pub fn new(buf: &'a NAVideoBuffer) -> Self { + let mut offset = [0; 3]; + let mut stride = [0; 3]; + for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() { + *offs = buf.get_offset(plane); + *strd = buf.get_stride(plane); + } + Self { + data: buf.get_data(), + offset, stride + } + } +} #[repr(u8)] #[derive(Clone,Copy,Debug,PartialEq)] @@ -10,8 +34,9 @@ pub enum BMode { Bi, } -#[derive(Clone,Copy,Debug,PartialEq)] +#[derive(Clone,Copy,Debug,PartialEq,Default)] pub enum MBType { + #[default] Intra4x4, Intra8x8, Intra16x16(u8, u8, u8), @@ -34,23 +59,13 @@ pub enum MBType { impl MBType { pub fn is_intra(self) -> bool { - match self { - MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM => true, - _ => false, - } + matches!(self, MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM) } pub fn is_intra16x16(self) -> bool { - if let MBType::Intra16x16(_, _, _) = self { - true - } else { - false - } + matches!(self, MBType::Intra16x16(_, _, _)) } pub fn is_skip(self) -> bool { - match self { - MBType::PSkip | MBType::BSkip => true, - _ => false, - } + matches!(self, MBType::PSkip | MBType::BSkip) } pub fn is_4x4(self) -> bool { self.num_parts() == 4 } pub fn is_l0(self, part: usize) -> bool { @@ -114,16 +129,13 @@ impl MBType { } } -impl Default for MBType { - fn default() -> Self { MBType::Intra4x4 } -} - -#[derive(Clone,Copy,Debug,PartialEq)] +#[derive(Clone,Copy,Debug,PartialEq,Default)] pub enum SubMBType { P8x8, P8x4, P4x8, P4x4, + #[default] Direct8x8, B8x8(BMode), B8x4(BMode), @@ -167,12 +179,8 @@ impl SubMBType { } } -impl Default for SubMBType { - fn default() -> Self { SubMBType::Direct8x8 } -} - #[repr(u8)] -#[derive(Clone,Copy,Debug,PartialEq)] +#[derive(Clone,Copy,Debug,PartialEq,Default)] pub enum CompactMBType { Intra4x4, Intra8x8, @@ -193,52 +201,37 @@ pub enum CompactMBType { B8x8, BSkip, + #[default] None, } impl CompactMBType { pub fn is_intra(self) -> bool { - match self { - CompactMBType::Intra4x4 | CompactMBType::Intra8x8 | CompactMBType::Intra16x16 => true, - _ => false, - } + matches!(self, CompactMBType::Intra4x4 | CompactMBType::Intra8x8 | CompactMBType::Intra16x16) } pub fn is_intra16orpcm(self) -> bool { - match self { - CompactMBType::Intra16x16 | CompactMBType::PCM => true, - _ => false, - } + matches!(self, CompactMBType::Intra16x16 | CompactMBType::PCM) } pub fn is_skip(self) -> bool { - match self { - CompactMBType::PSkip | CompactMBType::BSkip => true, - _ => false, - } + matches!(self, CompactMBType::PSkip | CompactMBType::BSkip) } pub fn is_direct(self) -> bool { - match self { - CompactMBType::BSkip | CompactMBType::Direct | CompactMBType::None => true, - _ => false, - } + matches!(self, CompactMBType::BSkip | CompactMBType::Direct | CompactMBType::None) } pub fn is_inter(self) -> bool { !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM } - pub fn is_16x16(self) -> bool { - match self { - CompactMBType::P16x8 | CompactMBType::P8x16 | - CompactMBType::P8x8 | CompactMBType::P8x8Ref0 | - CompactMBType::B16x8 | CompactMBType::B8x16 | - CompactMBType::B8x8 => false, - _ => true, - } + pub fn is_16x16_ref(self) -> bool { + matches!(self, + CompactMBType::Intra4x4 | + CompactMBType::Intra8x8 | + CompactMBType::Intra16x16 | + CompactMBType::PCM | + CompactMBType::P16x16 | + CompactMBType::B16x16) } } -impl Default for CompactMBType { - fn default() -> Self { CompactMBType::None } -} - impl From for CompactMBType { fn from(mbtype: MBType) -> Self { match mbtype { @@ -263,7 +256,7 @@ impl From for CompactMBType { } #[repr(u8)] -#[derive(Clone,Copy,Debug,PartialEq)] +#[derive(Clone,Copy,Debug,PartialEq,Default)] pub enum IntraPredMode { Vertical, Horizontal, @@ -274,6 +267,7 @@ pub enum IntraPredMode { HorDown, VerLeft, HorUp, + #[default] None, } @@ -288,10 +282,6 @@ impl IntraPredMode { } } -impl Default for IntraPredMode { - fn default() -> Self { IntraPredMode::None } -} - impl From for IntraPredMode { fn from(val: u8) -> Self { match val { @@ -309,9 +299,9 @@ impl From for IntraPredMode { } } -impl Into for IntraPredMode { - fn into(self) -> u8 { - match self { +impl From for u8 { + fn from(val: IntraPredMode) -> Self { + match val { IntraPredMode::Vertical => 0, IntraPredMode::Horizontal => 1, IntraPredMode::DC => 2, @@ -393,8 +383,9 @@ pub struct MBData { } pub fn blk4_to_blk8(blk4: usize) -> usize { - const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ]; - MAP[blk4 & 0xF] + /*const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ]; + MAP[blk4 & 0xF]*/ + ((blk4 & 2) >> 1) | ((blk4 & 8) >> 2) } #[derive(Clone,Copy)] @@ -431,12 +422,19 @@ pub struct SliceState { pub blk8: GenericCache, pub blk4: GenericCache, - pub deblock: GenericCache, + pub deblock: [u8; 16], pub has_top: bool, pub has_left: bool, + + pub top_line_y: Vec, + pub left_y: [u8; 17], // first element is top-left + pub top_line_c: [Vec; 2], + pub left_c: [[u8; 9]; 2], } +const BLK4_TO_D8: [usize; 16] = [ 0, 0, 3, 3, 0, 0, 3, 3, 12, 12, 15, 15, 12, 12, 15, 15 ]; + impl SliceState { pub fn new() -> Self { Self { @@ -449,10 +447,15 @@ impl SliceState { blk8: GenericCache::new(0, 0, Blk8Data::default()), blk4: GenericCache::new(0, 0, Blk4Data::default()), - deblock: GenericCache::new(0, 0, 0), + deblock: [0; 16], has_top: false, has_left: false, + + top_line_y: Vec::new(), + left_y: [0; 17], + top_line_c: [Vec::new(), Vec::new()], + left_c: [[0; 9]; 2], } } pub fn reset(&mut self, mb_w: usize, mb_h: usize, mb_pos: usize) { @@ -470,75 +473,137 @@ impl SliceState { self.blk8 = GenericCache::new(2, mb_w * 2 + 2, Blk8Data::default()); self.blk4 = GenericCache::new(4, mb_w * 4 + 2, Blk4Data::default()); - self.deblock = GenericCache::new(4, mb_w * 4 + 1, 0); - self.has_top = false; self.has_left = false; + + self.top_line_y.resize(mb_w * 16 + 1, 0x80); + self.top_line_c[0].resize(mb_w * 8 + 1, 0x80); + self.top_line_c[1].resize(mb_w * 8 + 1, 0x80); + self.left_y = [0x80; 17]; + self.left_c = [[0x80; 9]; 2]; + } + pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame) { + let dstoff = self.mb_x * 16; + let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0]; + self.left_y[0] = self.top_line_y[dstoff + 15]; + self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]); + for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) { + *dst = src[15]; + } + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let dstoff = self.mb_x * 8; + let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride; + self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7]; + self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]); + for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) { + *dst = src[7]; + } + } } - pub fn fill_deblock(&mut self, deblock_mode: u8, is_s: bool) { + pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) { if deblock_mode == 1 { return; } + self.deblock = [0; 16]; + let tx8x8 = self.get_cur_mb().transform_8x8; - let mut idx = self.deblock.xpos + self.mb_x * 4; - let cur_mbt = self.get_cur_mb().mb_type; - let left_mbt = self.get_left_mb().mb_type; - let mut top_mbt = self.get_top_mb().mb_type; - for y in 0..4 { - if tx8x8 && (y & 1) != 0 { - continue; + let cur_intra = self.get_cur_mb().mb_type.is_intra(); + let left_intra = self.get_left_mb().mb_type.is_intra(); + let mut top_intra = self.get_top_mb().mb_type.is_intra(); + + let mut coded_cache = [false; 25]; + let mut mv_cache = [[ZERO_MV; 2]; 25]; + let mut ref_cache = [[INVALID_REF; 2]; 25]; + + if self.mb_y != 0 || self.has_top { + for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() { + let blk4 = self.get_top_blk4(x); + *cc = blk4.ncoded != 0; + *mv = blk4.mv; + if (x & 1) == 0 { + let blk8 = self.get_top_blk8(x / 2); + ref_cache[x + 1] = blk8.ref_idx; + } else { + ref_cache[x + 1] = ref_cache[x]; + } + } + } + for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip( + mv_cache[5..].chunks_exact_mut(5)).enumerate() { + if self.has_left || self.mb_x != 0 { + let blk4 = self.get_left_blk4(y * 4); + ccs[0] = blk4.ncoded != 0; + mvs[0] = blk4.mv; + if (y & 1) == 0 { + let blk8 = self.get_left_blk8(y); + ref_cache[y * 5 + 5] = blk8.ref_idx; + } else { + ref_cache[y * 5 + 5] = ref_cache[y * 5]; + } + } + for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() { + let blk4 = self.get_cur_blk4(x + y * 4); + *cc = blk4.ncoded != 0; + *mv = blk4.mv; + ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) { + self.get_cur_blk8(x / 2 + y).ref_idx + } else { + ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5] + }; } + } + + for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in + coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip( + mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip( + ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() { let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2)); - if can_do_top { - if is_s || cur_mbt.is_intra() || top_mbt.is_intra() { + if can_do_top && (!tx8x8 || (y & 1) == 0) { + if is_s || cur_intra || top_intra { let val = if y == 0 { 0x40 } else { 0x30 }; - for el in self.deblock.data[idx..][..4].iter_mut() { *el |= val; } + for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; } } else { - for x in 0..4 { - if self.get_cur_blk4(x).ncoded != 0 || self.get_top_blk4(x).ncoded != 0 { - self.deblock.data[idx + x] |= 0x20; + for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in + cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip( + cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip( + cur_refs[1..].iter().zip( + top_refs[1..].iter())).take(4).enumerate() { + if cur_cc || top_cc { + self.deblock[y * 4 + x] |= 0x20; } else { - let cur_mv = self.get_cur_blk4(x).mv; - let top_mv = self.get_top_blk4(x).mv; - let cur_ref = self.get_cur_blk8(x / 2).ref_idx; - let top_ref = self.get_top_blk8(x / 2).ref_idx; - if mvdiff4(cur_mv[0], top_mv[0]) || mvdiff4(cur_mv[1], top_mv[1]) || cur_ref != top_ref { - self.deblock.data[idx + x] |= 0x10; + if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) { + self.deblock[y * 4 + x] |= 0x10; } } } } } - let mut lleft_mbt = left_mbt; - for x in 0..4 { - if tx8x8 && (x & 1) != 0 { - continue; - } + let mut lleft_intra = left_intra; + for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in + cur_ccs[1..].iter().zip(cur_ccs.iter()).zip( + cur_mvs[1..].iter().zip(cur_mvs.iter())).zip( + cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() { + let skip_8 = tx8x8 && (x & 1) != 0; let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2); if !can_do_left { continue; } - let blk4 = x + y * 4; - let blk8 = x / 2 + (y / 2) * 2; - if is_s || cur_mbt.is_intra() || lleft_mbt.is_intra() { - self.deblock.data[idx + x] |= if x == 0 { 4 } else { 3 }; - } else if self.get_cur_blk4(blk4).ncoded != 0 || self.get_top_blk4(blk4).ncoded != 0 { - self.deblock.data[idx + x] |= 2; + if skip_8 { + } else if is_s || cur_intra || lleft_intra { + self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 }; + } else if cur_cc || left_cc { + self.deblock[y * 4 + x] |= 2; } else { - let cur_mv = self.get_cur_blk4(blk4).mv; - let left_mv = self.get_left_blk4(blk4).mv; - let cur_ref = self.get_cur_blk8(blk8).ref_idx; - let left_ref = self.get_left_blk8(blk8).ref_idx; - if mvdiff4(cur_mv[0], left_mv[0]) || mvdiff4(cur_mv[1], left_mv[1]) || cur_ref != left_ref { - self.deblock.data[idx + x] |= 1; + if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) { + self.deblock[y * 4 + x] |= 1; } } - lleft_mbt = cur_mbt; + lleft_intra = cur_intra; } - top_mbt = cur_mbt; - idx += self.deblock.stride; + top_intra = cur_intra; } } pub fn next_mb(&mut self) { @@ -551,8 +616,6 @@ impl SliceState { self.blk8.update_row(); self.blk4.update_row(); - self.deblock.update_row(); - self.has_left = false; } self.has_top = self.mb_x + self.mb_y * self.mb_w >= self.mb_start + self.mb_w; @@ -733,27 +796,35 @@ impl SliceState { self.fill_mv (0, 0, 16, 16, 0, mv); self.fill_ref(0, 0, 16, 16, 0, ref_idx); } - pub fn predict_direct_mb(&mut self, frame_refs: &FrameRefs, temporal_mv: bool, cur_id: u16) { - let (col_mb, _, _) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); - if col_mb.mb_type.is_16x16() || !temporal_mv { - let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, temporal_mv, cur_id, 0); + pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) { + let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); + if direct_8x8 { + for blk4 in 0..16 { + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]); + self.get_cur_blk4(blk4).mv = [mv0, mv1]; + self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; + } + } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv { + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0); self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]); self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]); } else { for blk4 in 0..16 { - let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, temporal_mv, cur_id, blk4); + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4); self.get_cur_blk4(blk4).mv = [mv0, mv1]; self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; } } } - pub fn predict_direct_sub(&mut self, frame_refs: &FrameRefs, temporal_mv: bool, cur_id: u16, blk4: usize) { - let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, temporal_mv, cur_id, blk4); + pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) { + let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] }; + let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); + let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk); self.get_cur_blk4(blk4).mv = [mv0, mv1]; self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; } - pub fn get_direct_mv(&self, frame_refs: &FrameRefs, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { - let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); + #[allow(clippy::nonminimal_bool)] + pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { let blk8 = blk4_to_blk8(blk4); let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] { (ZERO_MV, MISSING_POC, MISSING_REF) @@ -869,7 +940,27 @@ impl SliceState { } } -fn mvdiff4(mv1: MV, mv2: MV) -> bool { - let mv = mv1 - mv2; - (mv.x.abs() >= 4) || (mv.y.abs() >= 4) +#[cfg(not(target_arch="x86_64"))] +fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool { + let mvd0 = mv1[0] - mv2[0]; + let mvd1 = mv1[1] - mv2[1]; + (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4) +} + +#[cfg(target_arch="x86_64")] +fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool { + unsafe { + let mut flag = false; + let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr()); + let mut m0 = *ptr; + let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr()); + let mut m1 = *ptr; + for _ in 0..4 { + let tmp = m0.wrapping_sub(m1) as u16; + flag |= tmp.wrapping_add(3) > 6; + m0 >>= 16; + m1 >>= 16; + } + flag + } }