-use nihav_core::frame::NASimpleVideoFrame;
+use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame};
use nihav_codec_support::codecs::{MV, ZERO_MV};
use nihav_codec_support::data::GenericCache;
-use super::SliceRefs;
+use super::SimplifiedSliceRefs;
use super::pic_ref::FrameMBInfo;
+#[derive(Clone,Copy)]
+pub struct SimpleFrame<'a> {
+ pub data: &'a [u8],
+ pub offset: [usize; 3],
+ pub stride: [usize; 3],
+}
+
+impl<'a> SimpleFrame<'a> {
+ pub fn new(buf: &'a NAVideoBuffer<u8>) -> Self {
+ let mut offset = [0; 3];
+ let mut stride = [0; 3];
+ for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() {
+ *offs = buf.get_offset(plane);
+ *strd = buf.get_stride(plane);
+ }
+ Self {
+ data: buf.get_data(),
+ offset, stride
+ }
+ }
+}
+
#[repr(u8)]
#[derive(Clone,Copy,Debug,PartialEq)]
pub enum BMode {
Bi,
}
-#[derive(Clone,Copy,Debug,PartialEq)]
+#[derive(Clone,Copy,Debug,PartialEq,Default)]
pub enum MBType {
+ #[default]
Intra4x4,
Intra8x8,
Intra16x16(u8, u8, u8),
}
}
-impl Default for MBType {
- fn default() -> Self { MBType::Intra4x4 }
-}
-
-#[derive(Clone,Copy,Debug,PartialEq)]
+#[derive(Clone,Copy,Debug,PartialEq,Default)]
pub enum SubMBType {
P8x8,
P8x4,
P4x8,
P4x4,
+ #[default]
Direct8x8,
B8x8(BMode),
B8x4(BMode),
}
}
-impl Default for SubMBType {
- fn default() -> Self { SubMBType::Direct8x8 }
-}
-
#[repr(u8)]
-#[derive(Clone,Copy,Debug,PartialEq)]
+#[derive(Clone,Copy,Debug,PartialEq,Default)]
pub enum CompactMBType {
Intra4x4,
Intra8x8,
B8x8,
BSkip,
+ #[default]
None,
}
}
}
-impl Default for CompactMBType {
- fn default() -> Self { CompactMBType::None }
-}
-
impl From<MBType> for CompactMBType {
fn from(mbtype: MBType) -> Self {
match mbtype {
}
#[repr(u8)]
-#[derive(Clone,Copy,Debug,PartialEq)]
+#[derive(Clone,Copy,Debug,PartialEq,Default)]
pub enum IntraPredMode {
Vertical,
Horizontal,
HorDown,
VerLeft,
HorUp,
+ #[default]
None,
}
}
}
-impl Default for IntraPredMode {
- fn default() -> Self { IntraPredMode::None }
-}
-
impl From<u8> for IntraPredMode {
fn from(val: u8) -> Self {
match val {
}
}
}
- pub fn fill_deblock(&mut self, frefs: &SliceRefs, deblock_mode: u8, is_s: bool) {
+ pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) {
if deblock_mode == 1 {
return;
}
let cur_intra = self.get_cur_mb().mb_type.is_intra();
let left_intra = self.get_left_mb().mb_type.is_intra();
let mut top_intra = self.get_top_mb().mb_type.is_intra();
- for y in 0..4 {
+
+ let mut coded_cache = [false; 25];
+ let mut mv_cache = [[ZERO_MV; 2]; 25];
+ let mut ref_cache = [[INVALID_REF; 2]; 25];
+
+ if self.mb_y != 0 || self.has_top {
+ for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() {
+ let blk4 = self.get_top_blk4(x);
+ *cc = blk4.ncoded != 0;
+ *mv = blk4.mv;
+ if (x & 1) == 0 {
+ let blk8 = self.get_top_blk8(x / 2);
+ ref_cache[x + 1] = blk8.ref_idx;
+ } else {
+ ref_cache[x + 1] = ref_cache[x];
+ }
+ }
+ }
+ for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip(
+ mv_cache[5..].chunks_exact_mut(5)).enumerate() {
+ if self.has_left || self.mb_x != 0 {
+ let blk4 = self.get_left_blk4(y * 4);
+ ccs[0] = blk4.ncoded != 0;
+ mvs[0] = blk4.mv;
+ if (y & 1) == 0 {
+ let blk8 = self.get_left_blk8(y);
+ ref_cache[y * 5 + 5] = blk8.ref_idx;
+ } else {
+ ref_cache[y * 5 + 5] = ref_cache[y * 5];
+ }
+ }
+ for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() {
+ let blk4 = self.get_cur_blk4(x + y * 4);
+ *cc = blk4.ncoded != 0;
+ *mv = blk4.mv;
+ ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) {
+ self.get_cur_blk8(x / 2 + y).ref_idx
+ } else {
+ ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5]
+ };
+ }
+ }
+
+ for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in
+ coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip(
+ mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip(
+ ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() {
let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2));
if can_do_top && (!tx8x8 || (y & 1) == 0) {
if is_s || cur_intra || top_intra {
let val = if y == 0 { 0x40 } else { 0x30 };
for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; }
} else {
- for x in 0..4 {
- let blk4 = x + y * 4;
- let blk8 = x / 2 + (y / 2) * 2;
- if self.get_cur_blk4(blk4).ncoded != 0 || self.get_top_blk4(blk4).ncoded != 0 {
+ for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in
+ cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip(
+ cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip(
+ cur_refs[1..].iter().zip(
+ top_refs[1..].iter())).take(4).enumerate() {
+ if cur_cc || top_cc {
self.deblock[y * 4 + x] |= 0x20;
} else {
- let cur_mv = self.get_cur_blk4(blk4).mv;
- let top_mv = self.get_top_blk4(blk4).mv;
- let cur_ref = self.get_cur_blk8(blk8).ref_idx;
- let top_ref = if (y & 1) == 0 { self.get_top_blk8(blk8).ref_idx } else { cur_ref };
- if mvdiff4(cur_mv[0], top_mv[0]) || mvdiff4(cur_mv[1], top_mv[1]) || !frefs.cmp_refs(cur_ref, top_ref) {
+ if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) {
self.deblock[y * 4 + x] |= 0x10;
}
}
}
}
let mut lleft_intra = left_intra;
- for x in 0..4 {
+ for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in
+ cur_ccs[1..].iter().zip(cur_ccs.iter()).zip(
+ cur_mvs[1..].iter().zip(cur_mvs.iter())).zip(
+ cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() {
let skip_8 = tx8x8 && (x & 1) != 0;
let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2);
if !can_do_left {
continue;
}
- let blk4 = x + y * 4;
- let blk8 = x / 2 + (y / 2) * 2;
if skip_8 {
} else if is_s || cur_intra || lleft_intra {
self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 };
- } else if self.get_cur_blk4(blk4).ncoded != 0 || self.get_left_blk4(blk4).ncoded != 0 {
+ } else if cur_cc || left_cc {
self.deblock[y * 4 + x] |= 2;
} else {
- let cur_mv = self.get_cur_blk4(blk4).mv;
- let left_mv = self.get_left_blk4(blk4).mv;
- let cur_ref = self.get_cur_blk8(blk8).ref_idx;
- let left_ref = if (x & 1) == 0 { self.get_left_blk8(blk8).ref_idx } else { cur_ref };
- if mvdiff4(cur_mv[0], left_mv[0]) || mvdiff4(cur_mv[1], left_mv[1]) || !frefs.cmp_refs(cur_ref, left_ref) {
+ if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) {
self.deblock[y * 4 + x] |= 1;
}
}
self.fill_mv (0, 0, 16, 16, 0, mv);
self.fill_ref(0, 0, 16, 16, 0, ref_idx);
}
- pub fn predict_direct_mb(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
+ pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
if direct_8x8 {
for blk4 in 0..16 {
}
}
}
- pub fn predict_direct_sub(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
+ pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
}
#[allow(clippy::nonminimal_bool)]
- pub fn get_direct_mv(&self, frame_refs: &SliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
+ pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
let blk8 = blk4_to_blk8(blk4);
let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
(ZERO_MV, MISSING_POC, MISSING_REF)
}
}
-fn mvdiff4(mv1: MV, mv2: MV) -> bool {
- let mv = mv1 - mv2;
- (mv.x.abs() >= 4) || (mv.y.abs() >= 4)
+#[cfg(not(target_arch="x86_64"))]
+fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
+ let mvd0 = mv1[0] - mv2[0];
+ let mvd1 = mv1[1] - mv2[1];
+ (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4)
+}
+
+#[cfg(target_arch="x86_64")]
+fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
+ unsafe {
+ let mut flag = false;
+ let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr());
+ let mut m0 = *ptr;
+ let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr());
+ let mut m1 = *ptr;
+ for _ in 0..4 {
+ let tmp = m0.wrapping_sub(m1) as u16;
+ flag |= tmp.wrapping_add(3) > 6;
+ m0 >>= 16;
+ m1 >>= 16;
+ }
+ flag
+ }
}