From 495b7ec009b39e925ba204a61014ab316883cf66 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Wed, 27 Jul 2022 10:27:16 +0200 Subject: [PATCH] h264: reword MB reconstruction and add weighted MC --- nihav-itu/src/codecs/h264/dsp/mod.rs | 83 ++++ nihav-itu/src/codecs/h264/mb_recon.rs | 589 ++++++++++++++++++++++++++ nihav-itu/src/codecs/h264/mod.rs | 426 +------------------ nihav-itu/src/codecs/h264/slice.rs | 54 ++- nihav-itu/src/codecs/h264/types.rs | 16 +- 5 files changed, 749 insertions(+), 419 deletions(-) create mode 100644 nihav-itu/src/codecs/h264/mb_recon.rs diff --git a/nihav-itu/src/codecs/h264/dsp/mod.rs b/nihav-itu/src/codecs/h264/dsp/mod.rs index 19d260a..ef706be 100644 --- a/nihav-itu/src/codecs/h264/dsp/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mod.rs @@ -943,6 +943,60 @@ pub fn do_mc(frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, xpo } } +pub fn mc_blocks(ydst: &mut [u8], udst: &mut [u8], vdst: &mut [u8], refpic: NAVideoBufferRef, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) { + let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize; + + let pre = if mode != 0 { 2 } else { 0 }; + let post = if mode != 0 { 3 } else { 0 }; + let (width, height) = refpic.get_dimensions(0); + let sx = (xpos as isize) + ((mv.x >> 2) as isize); + let sy = (ypos as isize) + ((mv.y >> 2) as isize); + + const EBUF_STRIDE: usize = 32; + let mut ebuf = [0u8; EBUF_STRIDE * (16 + 2 + 3)]; + + if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) || + (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) { + let edge = (pre + post) as usize; + edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge, + &mut ebuf, EBUF_STRIDE, 0, 0); + (H264_LUMA_INTERP[mode])(ydst, 16, &ebuf, EBUF_STRIDE, w, h); + } else { + let sstride = refpic.get_stride(0); + let soff = refpic.get_offset(0); + let sdta = refpic.get_data(); + let sbuf: &[u8] = sdta.as_slice(); + let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride; + (H264_LUMA_INTERP[mode])(ydst, 16, &sbuf[saddr..], sstride, w, h); + } + + let (cw, ch) = refpic.get_dimensions(1); + let mvx = mv.x >> 3; + let mvy = mv.y >> 3; + let dx = (mv.x & 7) as u16; + let dy = (mv.y & 7) as u16; + let src_x = ((xpos >> 1) as isize) + (mvx as isize); + let src_y = ((ypos >> 1) as isize) + (mvy as isize); + let suoff = refpic.get_offset(1); + let svoff = refpic.get_offset(2); + let sustride = refpic.get_stride(1); + let svstride = refpic.get_stride(2); + let src = refpic.get_data(); + let cbw = w / 2; + let cbh = h / 2; + let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) { + edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf, 18, 1, 4); + edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4); + ([&ebuf, &ebuf[9..]], [18, 18]) + } else { + ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..], + &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]], + [sustride, svstride]) + }; + chroma_interp(udst, 16, csrc[0], cstride[0], dx, dy, cbw, cbh); + chroma_interp(vdst, 16, csrc[1], cstride[1], dx, dy, cbw, cbh); +} + pub fn gray_block(frm: &mut NASimpleVideoFrame, x: usize, y: usize, w: usize, h: usize) { let yoff = frm.offset[0] + x + y * frm.stride[0]; let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1], @@ -983,6 +1037,35 @@ pub fn do_mc_avg(frm: &mut NASimpleVideoFrame, refpic: NAVideoBufferRef, } } +pub fn put_block_weighted(dst: &mut [u8], stride: usize, src: &[u8], w: usize, h: usize, wparams: [i8; 3]) { + let weight = i16::from(wparams[0]); + let offset = i16::from(wparams[1]); + let wshift = wparams[2] as u8; + let bias = (1 << wshift) >> 1; + + for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks(16)).take(h) { + for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) { + *dst = clip8(((i16::from(src) * weight + bias) >> wshift) + offset); + } + } +} + +pub fn put_block_weighted2(dst: &mut [u8], stride: usize, src0: &[u8], src1: &[u8], w: usize, h: usize, wparams: [i8; 5]) { + let weight0 = i16::from(wparams[0]); + let offset0 = i16::from(wparams[1]); + let weight1 = i16::from(wparams[2]); + let offset1 = i16::from(wparams[3]); + let wshift = (wparams[4] as u8) + 1; + let offset = (offset0 + offset1 + 1) >> 1; + let bias = (1 << wshift) >> 1; + + for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks(16).zip(src1.chunks(16))).take(h) { + for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) { + *dst = clip8(((i16::from(src0) * weight0 + i16::from(src1) * weight1 + bias) >> wshift) + offset); + } + } +} + macro_rules! loop_filter { (lumaedge; $buf: expr, $off: expr, $step: expr, $alpha: expr, $beta: expr) => { let p2 = i16::from($buf[$off - $step * 3]); diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs new file mode 100644 index 0000000..4f3b7cd --- /dev/null +++ b/nihav-itu/src/codecs/h264/mb_recon.rs @@ -0,0 +1,589 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::MV; +use super::{CurrentMBInfo, I4X4_SCAN}; +use super::dsp::*; +use super::pic_ref::FrameRefs; +use super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO}; +use super::types::*; + +fn pred_intra(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { + let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; + match mb_info.mb_type { + MBType::Intra16x16(imode, _, _) => { + let id = if imode != 2 || (sstate.has_top && sstate.has_left) { + imode as usize + } else if !sstate.has_top && !sstate.has_left { + IPRED8_DC128 + } else if !sstate.has_left { + IPRED8_DC_TOP + } else { + IPRED8_DC_LEFT + }; + IPRED_FUNCS16X16[id](&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y); + }, + MBType::Intra8x8 => { + let mut ictx = IPred8Context::new(); + for part in 0..4 { + let x = (part & 1) * 2; + let y = part & 2; + let blk4 = x + y * 4; + + let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0]; + let has_top = y > 0 || sstate.has_top; + let has_left = x > 0 || sstate.has_left; + let imode = mb_info.ipred[blk4]; + let id = if imode != IntraPredMode::DC || (has_top && has_left) { + let im_id: u8 = imode.into(); + im_id as usize + } else if !has_top && !has_left { + IPRED4_DC128 + } else if !has_left { + IPRED4_DC_TOP + } else { + IPRED4_DC_LEFT + }; + let mb_idx = sstate.mb_x + sstate.mb_y * sstate.mb_w; + let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2); + let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w); + if id != IPRED4_DC128 { + let top = if y == 0 { + &sstate.top_line_y[sstate.mb_x * 16 + x * 4..] + } else { + &frm.data[cur_yoff - frm.stride[0]..] + }; + let mut left_buf = [0; 9]; + let left = if x == 0 { + &sstate.left_y[y * 4..] + } else { + if has_tl { + if y == 0 { + left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; + } else { + left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]]; + } + } + if has_left { + for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { + *dst = src[0]; + } + } + &left_buf + }; + ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl); + } + IPRED_FUNCS8X8_LUMA[id](&mut frm.data[cur_yoff..], frm.stride[0], &ictx); + if mb_info.coded[blk4] { + add_coeffs8(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs8x8[part].coeffs); + } + } + }, + MBType::Intra4x4 => { + for &(x,y) in I4X4_SCAN.iter() { + let x = x as usize; + let y = y as usize; + let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0]; + let has_top = y > 0 || sstate.has_top; + let has_left = x > 0 || sstate.has_left; + let imode = mb_info.ipred[x + y * 4]; + let id = if imode != IntraPredMode::DC || (has_top && has_left) { + let im_id: u8 = imode.into(); + im_id as usize + } else if !has_top && !has_left { + IPRED4_DC128 + } else if !has_left { + IPRED4_DC_TOP + } else { + IPRED4_DC_LEFT + }; + let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3); + let tr: [u8; 4] = if y == 0 { + let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]; + if has_top && !noright { + [tsrc[4], tsrc[5], tsrc[6], tsrc[7]] + } else if has_top { + [tsrc[3]; 4] + } else { + [0; 4] + } + } else if (x & 1) == 0 || (x == 1 && y == 2) { + let i = cur_yoff - frm.stride[0]; + [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]] + } else { + let i = cur_yoff - frm.stride[0]; + [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]] + }; + let mut top = [128; 4]; + let mut left = [128; 9]; + if y == 0 { + if has_top { + top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]); + } + } else { + top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]); + } + if x == 0 { + if has_left { + for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) { + *dst = src; + } + } + } else { + if y == 0 { + if x == 0 { + left[0] = sstate.left_y[y * 4]; + } else if has_top { + left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; + } + } else { + left[0] = frm.data[cur_yoff - frm.stride[0] - 1]; + } + for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { + *dst = row[0]; + } + } + IPRED_FUNCS4X4[id](&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr); + if mb_info.coded[x + y * 4] { + add_coeffs(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]); + } + } + }, + _ => unreachable!(), + }; + let id = if mb_info.chroma_ipred != 0 || (sstate.has_top && sstate.has_left) { + mb_info.chroma_ipred as usize + } else if !sstate.has_top && !sstate.has_left { + IPRED8_DC128 + } else if !sstate.has_left { + IPRED8_DC_TOP + } else { + IPRED8_DC_LEFT + }; + for chroma in 1..3 { + let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; + let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..]; + IPRED_FUNCS8X8_CHROMA[id](&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]); + } +} + +fn add_luma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { + let mut yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; + if !mb_info.transform_size_8x8 { + for y in 0..4 { + for x in 0..4 { + if mb_info.coded[x + y * 4] { + add_coeffs(frm.data, yoff + x * 4, frm.stride[0], &mb_info.coeffs[x + y * 4]); + } + } + yoff += frm.stride[0] * 4; + } + } else { + for y in 0..2 { + for x in 0..2 { + if mb_info.coded[x * 2 + y * 2 * 4] { + add_coeffs8(frm.data, yoff + x * 8, frm.stride[0], &mb_info.coeffs8x8[x + y * 2].coeffs); + } + } + yoff += frm.stride[0] * 8; + } + } +} + +fn add_chroma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { + for chroma in 1..3 { + let mut off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; + for y in 0..2 { + for x in 0..2 { + let blk_no = 16 + (chroma - 1) * 4 + x + y * 2; + if mb_info.coded[blk_no] || mb_info.coeffs[blk_no][0] != 0 { + add_coeffs(frm.data, off + x * 4, frm.stride[chroma], &mb_info.coeffs[blk_no]); + } + } + off += frm.stride[chroma] * 4; + } + } +} + +fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option>, weight: &WeightInfo) { + if let Some(buf) = ref_pic { + if !weight.is_weighted() { + do_mc(frm, buf, xpos, ypos, w, h, mv); + } else { + let mut ytmp = [0; 16 * 16]; + let mut utmp = [0; 16 * 16]; + let mut vtmp = [0; 16 * 16]; + mc_blocks(&mut ytmp, &mut utmp, &mut vtmp, buf, xpos, ypos, w, h, mv); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = if weight.luma_weighted { + [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8] + } else { + [1, 0, 0] + }; + put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp, w, h, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw = if weight.chroma_weighted { + [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8] + } else { + [1, 0, 0] + }; + let csrc = if chroma == 0 { &utmp } else { &vtmp }; + put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw); + } + } + } else { + gray_block(frm, xpos, ypos, w, h); + } +} + +fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option>, weight1: &WeightInfo, avg_buf: &mut NAVideoBufferRef) { + let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) { + (BMode::L0, true, _) => true, + (BMode::L1, _, true) => true, + (BMode::Bi, true, true) => true, + _ => false, + }; + if !do_weight { + match mode { + BMode::L0 => { + if let Some(buf) = ref_pic0 { + do_mc(frm, buf, xpos, ypos, w, h, mv0); + } else { + gray_block(frm, xpos, ypos, w, h); + } + }, + BMode::L1 => { + if let Some(buf) = ref_pic1 { + do_mc(frm, buf, xpos, ypos, w, h, mv1); + } else { + gray_block(frm, xpos, ypos, w, h); + } + }, + BMode::Bi => { + match (ref_pic0, ref_pic1) { + (Some(buf0), Some(buf1)) => { + do_mc(frm, buf0, xpos, ypos, w, h, mv0); + do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1, avg_buf); + }, + (Some(buf0), None) => { + do_mc(frm, buf0, xpos, ypos, w, h, mv0); + }, + (None, Some(buf1)) => { + do_mc(frm, buf1, xpos, ypos, w, h, mv1); + }, + (None, None) => { + gray_block(frm, xpos, ypos, w, h); + }, + }; + }, + }; + } else { + let mut ytmp0 = [0x80; 16 * 16]; + let mut utmp0 = [0x80; 16 * 16]; + let mut vtmp0 = [0x80; 16 * 16]; + let mut ytmp1 = [0x80; 16 * 16]; + let mut utmp1 = [0x80; 16 * 16]; + let mut vtmp1 = [0x80; 16 * 16]; + match (mode, ref_pic0, ref_pic1) { + (BMode::L0, Some(buf), _) | (BMode::L1, _, Some(buf)) => { + let (mv, weight) = if mode == BMode::L0 { (mv0, weight0) } else { (mv1, weight1) }; + mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf, xpos, ypos, w, h, mv); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = if weight.luma_weighted { + [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8] + } else { + [1, 0, 0] + }; + put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp0, w, h, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw = if weight.chroma_weighted { + [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8] + } else { + [1, 0, 0] + }; + let csrc = if chroma == 0 { &utmp0 } else { &vtmp0 }; + put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw); + } + }, + (BMode::Bi, Some(buf0), Some(buf1)) => { // do both and avg + mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf0, xpos, ypos, w, h, mv0); + mc_blocks(&mut ytmp1, &mut utmp1, &mut vtmp1, buf1, xpos, ypos, w, h, mv1); + + let yoff = frm.offset[0] + xpos + ypos * frm.stride[0]; + let yw = match (weight0.luma_weighted, weight1.luma_weighted) { + (true, true) => [weight0.luma_weight, weight0.luma_offset, weight1.luma_weight, weight1.luma_offset, weight0.luma_shift as i8], + (true, false) => [weight0.luma_weight, weight0.luma_offset, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8], + (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8], + (false, false) => [1, 0, 1, 0, 0], + }; + put_block_weighted2(&mut frm.data[yoff..], frm.stride[0], &ytmp0, &ytmp1, w, h, yw); + + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride; + let cw0 = weight0.chroma_weight[chroma]; + let co0 = weight0.chroma_offset[chroma]; + let cw1 = weight1.chroma_weight[chroma]; + let co1 = weight1.chroma_offset[chroma]; + let cw = match (weight0.chroma_weighted, weight1.chroma_weighted) { + (true, true) => [cw0, co0, cw1, co1, weight0.luma_shift as i8], + (true, false) => [cw0, co0, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8], + (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8], + (false, false) => [1, 0, 1, 0, 0], + }; + let csrc0 = if chroma == 0 { &utmp0 } else { &vtmp0 }; + let csrc1 = if chroma == 0 { &utmp1 } else { &vtmp1 }; + put_block_weighted2(&mut frm.data[coff..], cstride, csrc0, csrc1, w / 2, h / 2, cw); + } + }, + _ => { + gray_block(frm, xpos, ypos, w, h); + }, + }; + } +} + +fn get_weights(slice_hdr: &SliceHeader, frame_refs: &FrameRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) { + let idx_l0 = ref_l0.index(); + let idx_l1 = ref_l1.index(); + if mode != BMode::Bi || weight_mode != 2 { + (slice_hdr.get_weight(0, idx_l0), slice_hdr.get_weight(1, idx_l1)) + } else if let (Some(Some(ref pic0)), Some(Some(ref pic1))) = (frame_refs.ref_list0.get(idx_l0), frame_refs.ref_list1.get(idx_l1)) { + let r0_poc = pic0.full_id as u16; + let r1_poc = pic1.full_id as u16; + let cur_id = frame_refs.cur_id as u16; + if (r0_poc == r1_poc) || pic0.long_term.is_some() || pic1.long_term.is_some() { + return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO); + } + + let td = (i32::from(r1_poc) - i32::from(r0_poc)).max(-128).min(127); + let tx = (16384 + (td / 2).abs()) / td; + let tb = (i32::from(cur_id) - i32::from(r0_poc)).max(-128).min(127); + let scale = ((tb * tx + 32) >> 6).max(-1024).min(1023); + if scale == 128 || (scale >> 2) < -64 || (scale >> 2) > 128 { + return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO); + } + let w1 = (scale >> 2) as i8; + let w0 = 64 - w1; + + let weight0 = WeightInfo { + luma_weighted: true, + luma_weight: w0, + luma_offset: 0, + luma_shift: 5, + chroma_weighted: true, + chroma_weight: [w0; 2], + chroma_offset: [0; 2], + chroma_shift: 5, + }; + let weight1 = WeightInfo { + luma_weighted: true, + luma_weight: w1, + luma_offset: 0, + luma_shift: 5, + chroma_weighted: true, + chroma_weight: [w1; 2], + chroma_offset: [0; 2], + chroma_shift: 5, + }; + + (weight0, weight1) + } else { + (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO) + } +} + +pub fn recon_mb(frm: &mut NASimpleVideoFrame, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &FrameRefs, avg_buf: &mut NAVideoBufferRef, weight_mode: u8) { + let xpos = sstate.mb_x * 16; + let ypos = sstate.mb_y * 16; + + match mb_info.mb_type { + MBType::Intra16x16(_, _, _) => { + pred_intra(frm, &sstate, &mb_info); + }, + MBType::Intra4x4 | MBType::Intra8x8 => { + pred_intra(frm, &sstate, &mb_info); + }, + MBType::PCM => {}, + MBType::PSkip => { + let mv = sstate.get_cur_blk4(0).mv[0]; + let rpic = frame_refs.select_ref_pic(0, 0); + let weight = &slice_hdr.get_weight(0, 0); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight); + }, + MBType::P16x16 => { + let mv = sstate.get_cur_blk4(0).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight); + }, + MBType::P16x8 | MBType::P8x16 => { + let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 { + (16, 8, 0, 8) + } else { + (8, 16, 8, 0) + }; + let mv = sstate.get_cur_blk4(0).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index()); + do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight); + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index()); + do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight); + }, + MBType::P8x8 | MBType::P8x8Ref0 => { + for part in 0..4 { + let bx = (part & 1) * 8; + let by = (part & 2) * 4; + let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0]; + let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index()); + + match mb_info.sub_mb_type[part] { + SubMBType::P8x8 => { + do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight); + }, + SubMBType::P8x4 => { + do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight); + let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; + do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight); + }, + SubMBType::P4x8 => { + do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight); + let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; + do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight); + }, + SubMBType::P4x4 => { + for sb_no in 0..4 { + let sxpos = xpos + bx + (sb_no & 1) * 4; + let sypos = ypos + by + (sb_no & 2) * 2; + let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; + let mv = sstate.get_cur_blk4(sblk_no).mv[0]; + do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight); + } + }, + _ => unreachable!(), + }; + } + }, + MBType::B16x16(mode) => { + let mv0 = sstate.get_cur_blk4(0).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); + let mv1 = sstate.get_cur_blk4(0).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]); + do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, avg_buf); + }, + MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { + let (pw, ph) = mb_info.mb_type.size(); + let (px, py) = (pw & 8, ph & 8); + let modes = [mode0, mode1]; + let (mut bx, mut by) = (0, 0); + for part in 0..2 { + let blk = if part == 0 { 0 } else { (px / 4) + py }; + let mv0 = sstate.get_cur_blk4(blk).mv[0]; + let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); + let mv1 = sstate.get_cur_blk4(blk).mv[1]; + let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]); + do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, avg_buf); + bx += px; + by += py; + } + }, + MBType::Direct | MBType::BSkip => { + let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type; + let is_16x16 = colo_mb_type.is_16x16_ref(); + + if is_16x16 { + let mv = sstate.get_cur_blk4(0).mv; + let ref_idx = sstate.get_cur_blk8(0).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + } else { + for blk4 in 0..16 { + let mv = sstate.get_cur_blk4(blk4).mv; + let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + } + } + sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); }); + }, + MBType::B8x8 => { + for part in 0..4 { + let ridx = sstate.get_cur_blk8(part).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index()); + let subtype = mb_info.sub_mb_type[part]; + let blk8 = (part & 1) * 2 + (part & 2) * 4; + let mut bx = (part & 1) * 8; + let mut by = (part & 2) * 4; + match subtype { + SubMBType::Direct8x8 => { + for blk in 0..4 { + let mv = sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv; + let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx; + let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index()); + let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index()); + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]); + do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + bx += 4; + if blk == 1 { + bx -= 8; + by += 4; + } + } + sstate.get_cur_blk8(part).ref_idx[0].set_direct(); + sstate.get_cur_blk8(part).ref_idx[1].set_direct(); + }, + SubMBType::B8x8(mode) => { + let mv = sstate.get_cur_blk4(blk8).mv; + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + }, + SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + let (pw, ph) = subtype.size(); + let mv = sstate.get_cur_blk4(blk8).mv; + do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, avg_buf); + let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); + let mv = sstate.get_cur_blk4(addr2).mv; + do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf); + }, + SubMBType::B4x4(mode) => { + let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]); + for i in 0..4 { + let addr2 = blk8 + (i & 1) + (i & 2) * 2; + let mv = sstate.get_cur_blk4(addr2).mv; + do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, avg_buf); + bx += 4; + if i == 1 { + bx -= 8; + by += 4; + } + } + }, + _ => unreachable!(), + }; + } + }, + }; + if !mb_info.mb_type.is_skip() { + if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { + add_luma(frm, &sstate, &mb_info); + } + add_chroma(frm, &sstate, &mb_info); + } +} diff --git a/nihav-itu/src/codecs/h264/mod.rs b/nihav-itu/src/codecs/h264/mod.rs index b6f6dd5..59fec0c 100644 --- a/nihav-itu/src/codecs/h264/mod.rs +++ b/nihav-itu/src/codecs/h264/mod.rs @@ -1,6 +1,5 @@ /* known bugs and limitations: - * weighted motion compensation is not implemented * wrong slice boundary filtering * not fully correct deblock strength selection for P/B-macroblocks * scaling lists for 4x4 blocks @@ -29,6 +28,8 @@ mod cavlc; use cavlc::*; mod loopfilter; use loopfilter::*; +mod mb_recon; +use mb_recon::*; mod sets; use sets::*; mod slice; @@ -447,200 +448,6 @@ println!("PAFF?"); Ok(()) } - fn pred_intra(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { - let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; - match mb_info.mb_type { - MBType::Intra16x16(imode, _, _) => { - let id = if imode != 2 || (sstate.has_top && sstate.has_left) { - imode as usize - } else if !sstate.has_top && !sstate.has_left { - IPRED8_DC128 - } else if !sstate.has_left { - IPRED8_DC_TOP - } else { - IPRED8_DC_LEFT - }; - IPRED_FUNCS16X16[id](&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y); - }, - MBType::Intra8x8 => { - let mut ictx = IPred8Context::new(); - for part in 0..4 { - let x = (part & 1) * 2; - let y = part & 2; - let blk4 = x + y * 4; - - let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0]; - let has_top = y > 0 || sstate.has_top; - let has_left = x > 0 || sstate.has_left; - let imode = mb_info.ipred[blk4]; - let id = if imode != IntraPredMode::DC || (has_top && has_left) { - let im_id: u8 = imode.into(); - im_id as usize - } else if !has_top && !has_left { - IPRED4_DC128 - } else if !has_left { - IPRED4_DC_TOP - } else { - IPRED4_DC_LEFT - }; - let mb_idx = sstate.mb_x + sstate.mb_y * sstate.mb_w; - let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2); - let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w); - if id != IPRED4_DC128 { - let top = if y == 0 { - &sstate.top_line_y[sstate.mb_x * 16 + x * 4..] - } else { - &frm.data[cur_yoff - frm.stride[0]..] - }; - let mut left_buf = [0; 9]; - let left = if x == 0 { - &sstate.left_y[y * 4..] - } else { - if has_tl { - if y == 0 { - left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; - } else { - left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]]; - } - } - if has_left { - for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { - *dst = src[0]; - } - } - &left_buf - }; - ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl); - } - IPRED_FUNCS8X8_LUMA[id](&mut frm.data[cur_yoff..], frm.stride[0], &ictx); - if mb_info.coded[blk4] { - add_coeffs8(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs8x8[part].coeffs); - } - } - }, - MBType::Intra4x4 => { - for &(x,y) in I4X4_SCAN.iter() { - let x = x as usize; - let y = y as usize; - let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0]; - let has_top = y > 0 || sstate.has_top; - let has_left = x > 0 || sstate.has_left; - let imode = mb_info.ipred[x + y * 4]; - let id = if imode != IntraPredMode::DC || (has_top && has_left) { - let im_id: u8 = imode.into(); - im_id as usize - } else if !has_top && !has_left { - IPRED4_DC128 - } else if !has_left { - IPRED4_DC_TOP - } else { - IPRED4_DC_LEFT - }; - let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3); - let tr: [u8; 4] = if y == 0 { - let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]; - if has_top && !noright { - [tsrc[4], tsrc[5], tsrc[6], tsrc[7]] - } else if has_top { - [tsrc[3]; 4] - } else { - [0; 4] - } - } else if (x & 1) == 0 || (x == 1 && y == 2) { - let i = cur_yoff - frm.stride[0]; - [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]] - } else { - let i = cur_yoff - frm.stride[0]; - [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]] - }; - let mut top = [128; 4]; - let mut left = [128; 9]; - if y == 0 { - if has_top { - top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]); - } - } else { - top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]); - } - if x == 0 { - if has_left { - for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) { - *dst = src; - } - } - } else { - if y == 0 { - if x == 0 { - left[0] = sstate.left_y[y * 4]; - } else if has_top { - left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; - } - } else { - left[0] = frm.data[cur_yoff - frm.stride[0] - 1]; - } - for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { - *dst = row[0]; - } - } - IPRED_FUNCS4X4[id](&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr); - if mb_info.coded[x + y * 4] { - add_coeffs(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]); - } - } - }, - _ => unreachable!(), - }; - let id = if mb_info.chroma_ipred != 0 || (sstate.has_top && sstate.has_left) { - mb_info.chroma_ipred as usize - } else if !sstate.has_top && !sstate.has_left { - IPRED8_DC128 - } else if !sstate.has_left { - IPRED8_DC_TOP - } else { - IPRED8_DC_LEFT - }; - for chroma in 1..3 { - let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; - let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..]; - IPRED_FUNCS8X8_CHROMA[id](&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]); - } - } - fn add_luma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { - let mut yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; - if !mb_info.transform_size_8x8 { - for y in 0..4 { - for x in 0..4 { - if mb_info.coded[x + y * 4] { - add_coeffs(frm.data, yoff + x * 4, frm.stride[0], &mb_info.coeffs[x + y * 4]); - } - } - yoff += frm.stride[0] * 4; - } - } else { - for y in 0..2 { - for x in 0..2 { - if mb_info.coded[x * 2 + y * 2 * 4] { - add_coeffs8(frm.data, yoff + x * 8, frm.stride[0], &mb_info.coeffs8x8[x + y * 2].coeffs); - } - } - yoff += frm.stride[0] * 8; - } - } - } - fn add_chroma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { - for chroma in 1..3 { - let mut off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; - for y in 0..2 { - for x in 0..2 { - let blk_no = 16 + (chroma - 1) * 4 + x + y * 2; - if mb_info.coded[blk_no] || mb_info.coeffs[blk_no][0] != 0 { - add_coeffs(frm.data, off + x * 4, frm.stride[chroma], &mb_info.coeffs[blk_no]); - } - } - off += frm.stride[chroma] * 4; - } - } - } fn pred_mv(sstate: &mut SliceState, frame_refs: &FrameRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool) { let mb_type = mb_info.mb_type; if !mb_type.is_4x4() { @@ -701,7 +508,7 @@ println!("PAFF?"); } } #[allow(clippy::cognitive_complexity)] - fn handle_macroblock(&mut self, mb_info: &mut CurrentMBInfo) { + fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo) { let pps = &self.pps[self.cur_pps]; let qp_y = mb_info.qp_y; @@ -778,172 +585,16 @@ println!("PAFF?"); let ypos = self.sstate.mb_y * 16; if let Some(ref mut pic) = self.cur_pic { let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap(); - match mb_info.mb_type { - MBType::Intra16x16(_, _, _) => { - Self::pred_intra(&mut frm, &self.sstate, &mb_info); - }, - MBType::Intra4x4 | MBType::Intra8x8 => { - Self::pred_intra(&mut frm, &self.sstate, &mb_info); - }, - MBType::PCM => {}, - MBType::PSkip => { - let mv = self.sstate.get_cur_blk4(0).mv[0]; - let rpic = self.frame_refs.select_ref_pic(0, 0); - Self::do_p_mc(&mut frm, xpos, ypos, 16, 16, mv, rpic); - }, - MBType::P16x16 => { - let mv = self.sstate.get_cur_blk4(0).mv[0]; - let rpic = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); - Self::do_p_mc(&mut frm, xpos, ypos, 16, 16, mv, rpic); - }, - MBType::P16x8 | MBType::P8x16 => { - let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 { - (16, 8, 0, 8) - } else { - (8, 16, 8, 0) - }; - let mv = self.sstate.get_cur_blk4(0).mv[0]; - let rpic = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); - Self::do_p_mc(&mut frm, xpos, ypos, bw, bh, mv, rpic); - let mv = self.sstate.get_cur_blk4(bx / 4 + by).mv[0]; - let rpic = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index()); - Self::do_p_mc(&mut frm, xpos + bx, ypos + by, bw, bh, mv, rpic); - }, - MBType::P8x8 | MBType::P8x8Ref0 => { - for part in 0..4 { - let bx = (part & 1) * 8; - let by = (part & 2) * 4; - if let Some(buf) = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()) { - let mv = self.sstate.get_cur_blk4(bx / 4 + by).mv[0]; - - match mb_info.sub_mb_type[part] { - SubMBType::P8x8 => { - do_mc(&mut frm, buf, xpos + bx, ypos + by, 8, 8, mv); - }, - SubMBType::P8x4 => { - do_mc(&mut frm, buf.clone(), xpos + bx, ypos + by, 8, 4, mv); - let mv = self.sstate.get_cur_blk4(bx / 4 + by + 4).mv[0]; - do_mc(&mut frm, buf, xpos + bx, ypos + by + 4, 8, 4, mv); - }, - SubMBType::P4x8 => { - do_mc(&mut frm, buf.clone(), xpos + bx, ypos + by, 4, 8, mv); - let mv = self.sstate.get_cur_blk4(bx / 4 + by + 1).mv[0]; - do_mc(&mut frm, buf, xpos + bx + 4, ypos + by, 4, 8, mv); - }, - SubMBType::P4x4 => { - for sb_no in 0..4 { - let sxpos = xpos + bx + (sb_no & 1) * 4; - let sypos = ypos + by + (sb_no & 2) * 2; - let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4; - let mv = self.sstate.get_cur_blk4(sblk_no).mv[0]; - do_mc(&mut frm, buf.clone(), sxpos, sypos, 4, 4, mv); - } - }, - _ => unreachable!(), - }; - } else { - gray_block(&mut frm, xpos + bx, ypos + by, 8, 8); - } - } - }, - MBType::B16x16(mode) => { - let mv0 = self.sstate.get_cur_blk4(0).mv[0]; - let rpic0 = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index()); - let mv1 = self.sstate.get_cur_blk4(0).mv[1]; - let rpic1 = self.frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index()); - Self::do_b_mc(&mut frm, mode, xpos, ypos, 16, 16, mv0, rpic0, mv1, rpic1, &mut self.avg_buf); - }, - MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => { - let (pw, ph) = mb_info.mb_type.size(); - let (px, py) = (pw & 8, ph & 8); - let modes = [mode0, mode1]; - let (mut bx, mut by) = (0, 0); - for part in 0..2 { - let blk = if part == 0 { 0 } else { (px / 4) + py }; - let mv0 = self.sstate.get_cur_blk4(blk).mv[0]; - let rpic0 = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()); - let mv1 = self.sstate.get_cur_blk4(blk).mv[1]; - let rpic1 = self.frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index()); - Self::do_b_mc(&mut frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, mv1, rpic1, &mut self.avg_buf); - bx += px; - by += py; - } - }, - MBType::Direct | MBType::BSkip => { - let is_16x16 = self.frame_refs.get_colocated_info(self.sstate.mb_x, self.sstate.mb_y).0.mb_type.is_16x16(); - if is_16x16 || !self.temporal_mv { - let mv = self.sstate.get_cur_blk4(0).mv; - let ref_idx = self.sstate.get_cur_blk8(0).ref_idx; - let rpic0 = self.frame_refs.select_ref_pic(0, ref_idx[0].index()); - let rpic1 = self.frame_refs.select_ref_pic(1, ref_idx[1].index()); - Self::do_b_mc(&mut frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf); + if mb_info.mb_type != MBType::PCM { + let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() { + 1 + } else if slice_hdr.slice_type.is_b() { + self.pps[self.cur_pps].weighted_bipred_idc } else { - for blk4 in 0..16 { - let mv = self.sstate.get_cur_blk4(blk4).mv; - let ref_idx = self.sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx; - let rpic0 = self.frame_refs.select_ref_pic(0, ref_idx[0].index()); - let rpic1 = self.frame_refs.select_ref_pic(1, ref_idx[1].index()); - Self::do_b_mc(&mut frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf); - } - } - self.sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); }); - }, - MBType::B8x8 => { - for part in 0..4 { - let ridx = self.sstate.get_cur_blk8(part).ref_idx; - let rpic0 = self.frame_refs.select_ref_pic(0, ridx[0].index()); - let rpic1 = self.frame_refs.select_ref_pic(1, ridx[1].index()); - let subtype = mb_info.sub_mb_type[part]; - let blk8 = (part & 1) * 2 + (part & 2) * 4; - let mut bx = (part & 1) * 8; - let mut by = (part & 2) * 4; - match subtype { - SubMBType::Direct8x8 => { - for blk in 0..4 { - let mv = self.sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv; - let ref_idx = self.sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx; - let rpic0 = self.frame_refs.select_ref_pic(0, ref_idx[0].index()); - let rpic1 = self.frame_refs.select_ref_pic(1, ref_idx[1].index()); - Self::do_b_mc(&mut frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf); - bx += 4; - if blk == 1 { - bx -= 8; - by += 4; - } - } - self.sstate.get_cur_blk8(part).ref_idx[0].set_direct(); - self.sstate.get_cur_blk8(part).ref_idx[1].set_direct(); - }, - SubMBType::B8x8(mode) => { - let mv = self.sstate.get_cur_blk4(blk8).mv; - Self::do_b_mc(&mut frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf); - }, - SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => { - let (pw, ph) = subtype.size(); - let mv = self.sstate.get_cur_blk4(blk8).mv; - Self::do_b_mc(&mut frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), mv[1], rpic1.clone(), &mut self.avg_buf); - let addr2 = blk8 + (pw & 4) / 4 + (ph & 4); - let mv = self.sstate.get_cur_blk4(addr2).mv; - Self::do_b_mc(&mut frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf); - }, - SubMBType::B4x4(mode) => { - for i in 0..4 { - let addr2 = blk8 + (i & 1) + (i & 2) * 2; - let mv = self.sstate.get_cur_blk4(addr2).mv; - Self::do_b_mc(&mut frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), mv[1], rpic1.clone(), &mut self.avg_buf); - bx += 4; - if i == 1 { - bx -= 8; - by += 4; - } - } - }, - _ => unreachable!(), - }; - } - }, - }; - if mb_info.mb_type == MBType::PCM { + 0 + }; + recon_mb(&mut frm, slice_hdr, &mb_info, &mut self.sstate, &self.frame_refs, &mut self.avg_buf, weight_mode); + } else { for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) { dline[..16].copy_from_slice(src); } @@ -953,11 +604,6 @@ println!("PAFF?"); for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) { dline[..8].copy_from_slice(src); } - } else if !mb_info.mb_type.is_skip() { - if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 { - Self::add_luma(&mut frm, &self.sstate, &mb_info); - } - Self::add_chroma(&mut frm, &self.sstate, &mb_info); } /*match mb_info.mb_type { MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBType::B8x16(_, _) | MBType::B8x8 => { @@ -995,48 +641,6 @@ _ => {}, } self.sstate.next_mb(); } - fn do_p_mc(frm: &mut NASimpleVideoFrame, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option>) { - if let Some(buf) = ref_pic { - do_mc(frm, buf, xpos, ypos, w, h, mv); - } else { - gray_block(frm, xpos, ypos, w, h); - } - } - fn do_b_mc(frm: &mut NASimpleVideoFrame, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option>, mv1: MV, ref_pic1: Option>, avg_buf: &mut NAVideoBufferRef) { - match mode { - BMode::L0 => { - if let Some(buf) = ref_pic0 { - do_mc(frm, buf, xpos, ypos, w, h, mv0); - } else { - gray_block(frm, xpos, ypos, w, h); - } - }, - BMode::L1 => { - if let Some(buf) = ref_pic1 { - do_mc(frm, buf, xpos, ypos, w, h, mv1); - } else { - gray_block(frm, xpos, ypos, w, h); - } - }, - BMode::Bi => { - match (ref_pic0, ref_pic1) { - (Some(buf0), Some(buf1)) => { - do_mc(frm, buf0, xpos, ypos, w, h, mv0); - do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1, avg_buf); - }, - (Some(buf0), None) => { - do_mc(frm, buf0, xpos, ypos, w, h, mv0); - }, - (None, Some(buf1)) => { - do_mc(frm, buf1, xpos, ypos, w, h, mv1); - }, - (None, None) => { - gray_block(frm, xpos, ypos, w, h); - }, - }; - }, - }; - } fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize) -> DecoderResult { const INTRA_CBP: [u8; 48] = [ 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, @@ -1068,7 +672,7 @@ _ => {}, validate!(mb_idx + mb_skip_run <= self.num_mbs); mb_info.mb_type = skip_type; for _ in 0..mb_skip_run { - self.handle_macroblock(&mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info); mb_idx += 1; } if mb_idx == self.num_mbs || br.tell() >= full_size { @@ -1134,7 +738,7 @@ _ => {}, decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?; } } - self.handle_macroblock(&mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info); } mb_idx += 1; } @@ -1240,7 +844,7 @@ _ => {}, mb_info.transform_size_8x8 = false; last_qp_diff = false; } - self.handle_macroblock(&mut mb_info); + self.handle_macroblock(slice_hdr, &mut mb_info); prev_mb_skipped = mb_skip; if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() { if let Some(ref mut pic) = self.cur_pic { diff --git a/nihav-itu/src/codecs/h264/slice.rs b/nihav-itu/src/codecs/h264/slice.rs index 864eec7..5c70729 100644 --- a/nihav-itu/src/codecs/h264/slice.rs +++ b/nihav-itu/src/codecs/h264/slice.rs @@ -50,14 +50,22 @@ const SLICE_TYPES: [SliceType; 10] = [ SliceType::P, SliceType::B, SliceType::I, SliceType::SP, SliceType::SI, ]; -#[derive(Clone,Copy)] +#[derive(Clone,Copy,Default)] pub struct WeightInfo { pub luma_weighted: bool, pub luma_weight: i8, pub luma_offset: i8, + pub luma_shift: u8, pub chroma_weighted: bool, pub chroma_weight: [i8; 2], pub chroma_offset: [i8; 2], + pub chroma_shift: u8, +} + +impl WeightInfo { + pub fn is_weighted(&self) -> bool { + self.luma_weighted || self.chroma_weighted + } } #[derive(Clone,Copy)] @@ -117,6 +125,35 @@ pub struct SliceHeader { pub slice_group_change_cycle: u32, } +pub const DEF_WEIGHT_INFO: WeightInfo = WeightInfo { + luma_weighted: false, + luma_weight: 0, + luma_offset: 0, + luma_shift: 0, + chroma_weighted: false, + chroma_weight: [0; 2], + chroma_offset: [0; 2], + chroma_shift: 0, +}; + +impl SliceHeader { + pub fn get_weight(&self, list_id: u8, idx: usize) -> WeightInfo { + if list_id == 0 { + if idx < self.num_ref_idx_l0_active { + self.weights_l0[idx] + } else { + DEF_WEIGHT_INFO + } + } else { + if idx < self.num_ref_idx_l1_active { + self.weights_l1[idx] + } else { + DEF_WEIGHT_INFO + } + } + } +} + pub fn parse_slice_header_minimal(br: &mut BitReader) -> DecoderResult<(usize, SliceType)> { let first_mb_in_slice = br.read_ue()? as usize; let stype = br.read_ue_lim(SLICE_TYPES.len() as u32 - 1)?; @@ -199,6 +236,15 @@ pub fn parse_slice_header(br: &mut BitReader, sps_arr: &[SeqParameterSet], pps_a if (pps.weighted_pred && hdr.slice_type.is_p()) || (pps.weighted_bipred_idc == 1 && hdr.slice_type.is_b()) { parse_pred_weight_table(&mut hdr, br)?; + } else { + for weight in hdr.weights_l0[..hdr.num_ref_idx_l0_active].iter_mut() { + weight.luma_weighted = false; + weight.chroma_weighted = false; + } + for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() { + weight.luma_weighted = false; + weight.chroma_weighted = false; + } } if nal_ref_idc != 0 { if is_idr { @@ -346,6 +392,8 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder validate!(offset >= -128 && offset <= 127); weight.luma_offset = offset as i8; } + weight.luma_shift = hdr.luma_log2_weight_denom; + weight.chroma_weighted = br.read_bool()?; if weight.chroma_weighted { for i in 0..2 { @@ -357,6 +405,7 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder weight.chroma_offset[i] = offset as i8; } } + weight.chroma_shift = hdr.chroma_log2_weight_denom; } for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() { weight.luma_weighted = br.read_bool()?; @@ -368,6 +417,8 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder validate!(offset >= -128 && offset <= 127); weight.luma_offset = offset as i8; } + weight.luma_shift = hdr.luma_log2_weight_denom; + weight.chroma_weighted = br.read_bool()?; if weight.chroma_weighted { for i in 0..2 { @@ -379,6 +430,7 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder weight.chroma_offset[i] = offset as i8; } } + weight.chroma_shift = hdr.chroma_log2_weight_denom; } Ok(()) diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs index 0d97e02..0a0cc64 100644 --- a/nihav-itu/src/codecs/h264/types.rs +++ b/nihav-itu/src/codecs/h264/types.rs @@ -225,13 +225,15 @@ impl CompactMBType { pub fn is_inter(self) -> bool { !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM } - pub fn is_16x16(self) -> bool { + pub fn is_16x16_ref(self) -> bool { match self { - CompactMBType::P16x8 | CompactMBType::P8x16 | - CompactMBType::P8x8 | CompactMBType::P8x8Ref0 | - CompactMBType::B16x8 | CompactMBType::B8x16 | - CompactMBType::B8x8 => false, - _ => true, + CompactMBType::Intra4x4 | + CompactMBType::Intra8x8 | + CompactMBType::Intra16x16 | + CompactMBType::PCM | + CompactMBType::P16x16 | + CompactMBType::B16x16 => true, + _ => false, } } } @@ -765,7 +767,7 @@ impl SliceState { } pub fn predict_direct_mb(&mut self, frame_refs: &FrameRefs, temporal_mv: bool, cur_id: u16) { let (col_mb, _, _) = frame_refs.get_colocated_info(self.mb_x, self.mb_y); - if col_mb.mb_type.is_16x16() || !temporal_mv { + if col_mb.mb_type.is_16x16_ref() || !temporal_mv { let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, temporal_mv, cur_id, 0); self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]); self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]); -- 2.30.2