X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=nihav-itu%2Fsrc%2Fcodecs%2Fh264%2Ftypes.rs;h=4bcdb4943547318d09df4c858f7412fa7aedd080;hb=fe64781def821c3900abf44bdfbb38f3b3d21345;hp=6fe5aea9f00f40f4612e38dece654d24ccb4a029;hpb=e6aaad5c5273cd814b5748b7faf3751835a37217;p=nihav.git diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs index 6fe5aea..4bcdb49 100644 --- a/nihav-itu/src/codecs/h264/types.rs +++ b/nihav-itu/src/codecs/h264/types.rs @@ -1,9 +1,31 @@ -use nihav_core::frame::NASimpleVideoFrame; +use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame}; use nihav_codec_support::codecs::{MV, ZERO_MV}; use nihav_codec_support::data::GenericCache; -use super::SliceRefs; +use super::SimplifiedSliceRefs; use super::pic_ref::FrameMBInfo; +#[derive(Clone,Copy)] +pub struct SimpleFrame<'a> { + pub data: &'a [u8], + pub offset: [usize; 3], + pub stride: [usize; 3], +} + +impl<'a> SimpleFrame<'a> { + pub fn new(buf: &'a NAVideoBuffer) -> Self { + let mut offset = [0; 3]; + let mut stride = [0; 3]; + for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() { + *offs = buf.get_offset(plane); + *strd = buf.get_stride(plane); + } + Self { + data: buf.get_data(), + offset, stride + } + } +} + #[repr(u8)] #[derive(Clone,Copy,Debug,PartialEq)] pub enum BMode { @@ -361,8 +383,9 @@ pub struct MBData { } pub fn blk4_to_blk8(blk4: usize) -> usize { - const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ]; - MAP[blk4 & 0xF] + /*const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ]; + MAP[blk4 & 0xF]*/ + ((blk4 & 2) >> 1) | ((blk4 & 8) >> 2) } #[derive(Clone,Copy)] @@ -478,7 +501,7 @@ impl SliceState { } } } - pub fn fill_deblock(&mut self, frefs: &SliceRefs, deblock_mode: u8, is_s: bool) { + pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) { if deblock_mode == 1 { return; } @@ -490,24 +513,68 @@ impl SliceState { let cur_intra = self.get_cur_mb().mb_type.is_intra(); let left_intra = self.get_left_mb().mb_type.is_intra(); let mut top_intra = self.get_top_mb().mb_type.is_intra(); - for y in 0..4 { + + let mut coded_cache = [false; 25]; + let mut mv_cache = [[ZERO_MV; 2]; 25]; + let mut ref_cache = [[INVALID_REF; 2]; 25]; + + if self.mb_y != 0 || self.has_top { + for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() { + let blk4 = self.get_top_blk4(x); + *cc = blk4.ncoded != 0; + *mv = blk4.mv; + if (x & 1) == 0 { + let blk8 = self.get_top_blk8(x / 2); + ref_cache[x + 1] = blk8.ref_idx; + } else { + ref_cache[x + 1] = ref_cache[x]; + } + } + } + for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip( + mv_cache[5..].chunks_exact_mut(5)).enumerate() { + if self.has_left || self.mb_x != 0 { + let blk4 = self.get_left_blk4(y * 4); + ccs[0] = blk4.ncoded != 0; + mvs[0] = blk4.mv; + if (y & 1) == 0 { + let blk8 = self.get_left_blk8(y); + ref_cache[y * 5 + 5] = blk8.ref_idx; + } else { + ref_cache[y * 5 + 5] = ref_cache[y * 5]; + } + } + for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() { + let blk4 = self.get_cur_blk4(x + y * 4); + *cc = blk4.ncoded != 0; + *mv = blk4.mv; + ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) { + self.get_cur_blk8(x / 2 + y).ref_idx + } else { + ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5] + }; + } + } + + for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in + coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip( + mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip( + ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() { let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2)); if can_do_top && (!tx8x8 || (y & 1) == 0) { if is_s || cur_intra || top_intra { let val = if y == 0 { 0x40 } else { 0x30 }; for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; } } else { - for x in 0..4 { - let blk4 = x + y * 4; - let blk8 = x / 2 + (y / 2) * 2; - if self.get_cur_blk4(blk4).ncoded != 0 || self.get_top_blk4(blk4).ncoded != 0 { + for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in + cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip( + cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip( + cur_refs[1..].iter().zip( + top_refs[1..].iter())).take(4).enumerate() { + if cur_cc || top_cc { self.deblock[y * 4 + x] |= 0x20; } else { - let cur_mv = self.get_cur_blk4(blk4).mv; - let top_mv = self.get_top_blk4(blk4).mv; - let cur_ref = self.get_cur_blk8(blk8).ref_idx; - let top_ref = if (y & 1) == 0 { self.get_top_blk8(blk8).ref_idx } else { cur_ref }; - if mvdiff4(cur_mv[0], top_mv[0]) || mvdiff4(cur_mv[1], top_mv[1]) || !frefs.cmp_refs(cur_ref, top_ref) { + if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) { self.deblock[y * 4 + x] |= 0x10; } } @@ -515,25 +582,22 @@ impl SliceState { } } let mut lleft_intra = left_intra; - for x in 0..4 { + for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in + cur_ccs[1..].iter().zip(cur_ccs.iter()).zip( + cur_mvs[1..].iter().zip(cur_mvs.iter())).zip( + cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() { let skip_8 = tx8x8 && (x & 1) != 0; let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2); if !can_do_left { continue; } - let blk4 = x + y * 4; - let blk8 = x / 2 + (y / 2) * 2; if skip_8 { } else if is_s || cur_intra || lleft_intra { self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 }; - } else if self.get_cur_blk4(blk4).ncoded != 0 || self.get_left_blk4(blk4).ncoded != 0 { + } else if cur_cc || left_cc { self.deblock[y * 4 + x] |= 2; } else { - let cur_mv = self.get_cur_blk4(blk4).mv; - let left_mv = self.get_left_blk4(blk4).mv; - let cur_ref = self.get_cur_blk8(blk8).ref_idx; - let left_ref = if (x & 1) == 0 { self.get_left_blk8(blk8).ref_idx } else { cur_ref }; - if mvdiff4(cur_mv[0], left_mv[0]) || mvdiff4(cur_mv[1], left_mv[1]) || !frefs.cmp_refs(cur_ref, left_ref) { + if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) { self.deblock[y * 4 + x] |= 1; } } @@ -732,7 +796,7 @@ impl SliceState { self.fill_mv (0, 0, 16, 16, 0, mv); self.fill_ref(0, 0, 16, 16, 0, ref_idx); } - pub fn predict_direct_mb(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) { + pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) { let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); if direct_8x8 { for blk4 in 0..16 { @@ -752,7 +816,7 @@ impl SliceState { } } } - pub fn predict_direct_sub(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) { + pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) { let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] }; let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk); @@ -760,7 +824,7 @@ impl SliceState { self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1]; } #[allow(clippy::nonminimal_bool)] - pub fn get_direct_mv(&self, frame_refs: &SliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { + pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) { let blk8 = blk4_to_blk8(blk4); let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] { (ZERO_MV, MISSING_POC, MISSING_REF) @@ -876,7 +940,27 @@ impl SliceState { } } -fn mvdiff4(mv1: MV, mv2: MV) -> bool { - let mv = mv1 - mv2; - (mv.x.abs() >= 4) || (mv.y.abs() >= 4) +#[cfg(not(target_arch="x86_64"))] +fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool { + let mvd0 = mv1[0] - mv2[0]; + let mvd1 = mv1[1] - mv2[1]; + (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4) +} + +#[cfg(target_arch="x86_64")] +fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool { + unsafe { + let mut flag = false; + let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr()); + let mut m0 = *ptr; + let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr()); + let mut m1 = *ptr; + for _ in 0..4 { + let tmp = m0.wrapping_sub(m1) as u16; + flag |= tmp.wrapping_add(3) > 6; + m0 >>= 16; + m1 >>= 16; + } + flag + } }