h264: reword MB reconstruction and add weighted MC
authorKostya Shishkov <kostya.shishkov@gmail.com>
Wed, 27 Jul 2022 08:27:16 +0000 (10:27 +0200)
committerKostya Shishkov <kostya.shishkov@gmail.com>
Wed, 27 Jul 2022 08:27:16 +0000 (10:27 +0200)
nihav-itu/src/codecs/h264/dsp/mod.rs
nihav-itu/src/codecs/h264/mb_recon.rs [new file with mode: 0644]
nihav-itu/src/codecs/h264/mod.rs
nihav-itu/src/codecs/h264/slice.rs
nihav-itu/src/codecs/h264/types.rs

index 19d260a3c1831fbc220f16079bb1ab7950be94e6..ef706be01af2cb59aa66cc8ab7dcbae70728e5bc 100644 (file)
@@ -943,6 +943,60 @@ pub fn do_mc(frm: &mut NASimpleVideoFrame<u8>, refpic: NAVideoBufferRef<u8>, xpo
     }
 }
 
+pub fn mc_blocks(ydst: &mut [u8], udst: &mut [u8], vdst: &mut [u8], refpic: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV) {
+    let mode = ((mv.x & 3) + (mv.y & 3) * 4) as usize;
+
+    let pre  = if mode != 0 { 2 } else { 0 };
+    let post = if mode != 0 { 3 } else { 0 };
+    let (width, height) = refpic.get_dimensions(0);
+    let sx = (xpos as isize) + ((mv.x >> 2) as isize);
+    let sy = (ypos as isize) + ((mv.y >> 2) as isize);
+
+    const EBUF_STRIDE: usize = 32;
+    let mut ebuf = [0u8; EBUF_STRIDE * (16 + 2 + 3)];
+
+    if (sx - pre < 0) || (sx + (w as isize) + post > (width as isize)) ||
+       (sy - pre < 0) || (sy + (h as isize) + post > (height as isize)) {
+        let edge = (pre + post) as usize;
+        edge_emu(&refpic, sx - pre, sy - pre, w + edge, h + edge,
+                 &mut ebuf, EBUF_STRIDE, 0, 0);
+        (H264_LUMA_INTERP[mode])(ydst, 16, &ebuf, EBUF_STRIDE, w, h);
+    } else {
+        let sstride = refpic.get_stride(0);
+        let soff    = refpic.get_offset(0);
+        let sdta    = refpic.get_data();
+        let sbuf: &[u8] = sdta.as_slice();
+        let saddr = soff + ((sx - pre) as usize) + ((sy - pre) as usize) * sstride;
+        (H264_LUMA_INTERP[mode])(ydst, 16, &sbuf[saddr..], sstride, w, h);
+    }
+
+    let (cw, ch) = refpic.get_dimensions(1);
+    let mvx = mv.x >> 3;
+    let mvy = mv.y >> 3;
+    let dx = (mv.x & 7) as u16;
+    let dy = (mv.y & 7) as u16;
+    let src_x = ((xpos >> 1) as isize) + (mvx as isize);
+    let src_y = ((ypos >> 1) as isize) + (mvy as isize);
+    let suoff = refpic.get_offset(1);
+    let svoff = refpic.get_offset(2);
+    let sustride = refpic.get_stride(1);
+    let svstride = refpic.get_stride(2);
+    let src = refpic.get_data();
+    let cbw = w / 2;
+    let cbh = h / 2;
+    let (csrc, cstride) = if (src_x < 0) || (src_x + (cbw as isize) + 1 > (cw as isize)) || (src_y < 0) || (src_y + (cbh as isize) + 1 > (ch as isize)) {
+            edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf,      18, 1, 4);
+            edge_emu(&refpic, src_x, src_y, cbw+1, cbh+1, &mut ebuf[9..], 18, 2, 4);
+            ([&ebuf, &ebuf[9..]], [18, 18])
+        } else {
+            ([&src[suoff + (src_x as usize) + (src_y as usize) * sustride..],
+             &src[svoff + (src_x as usize) + (src_y as usize) * svstride..]],
+             [sustride, svstride])
+        };
+    chroma_interp(udst, 16, csrc[0], cstride[0], dx, dy, cbw, cbh);
+    chroma_interp(vdst, 16, csrc[1], cstride[1], dx, dy, cbw, cbh);
+}
+
 pub fn gray_block(frm: &mut NASimpleVideoFrame<u8>, x: usize, y: usize, w: usize, h: usize) {
     let yoff = frm.offset[0] + x + y * frm.stride[0];
     let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1],
@@ -983,6 +1037,35 @@ pub fn do_mc_avg(frm: &mut NASimpleVideoFrame<u8>, refpic: NAVideoBufferRef<u8>,
     }
 }
 
+pub fn put_block_weighted(dst: &mut [u8], stride: usize, src: &[u8], w: usize, h: usize, wparams: [i8; 3]) {
+    let weight = i16::from(wparams[0]);
+    let offset = i16::from(wparams[1]);
+    let wshift = wparams[2] as u8;
+    let bias = (1 << wshift) >> 1;
+
+    for (drow, srow) in dst.chunks_mut(stride).zip(src.chunks(16)).take(h) {
+        for (dst, &src) in drow[..w].iter_mut().zip(srow.iter()) {
+            *dst = clip8(((i16::from(src) * weight + bias) >> wshift) + offset);
+        }
+    }
+}
+
+pub fn put_block_weighted2(dst: &mut [u8], stride: usize, src0: &[u8], src1: &[u8], w: usize, h: usize, wparams: [i8; 5]) {
+    let weight0 = i16::from(wparams[0]);
+    let offset0 = i16::from(wparams[1]);
+    let weight1 = i16::from(wparams[2]);
+    let offset1 = i16::from(wparams[3]);
+    let wshift = (wparams[4] as u8) + 1;
+    let offset = (offset0 + offset1 + 1) >> 1;
+    let bias = (1 << wshift) >> 1;
+
+    for (drow, (srow0, srow1)) in dst.chunks_mut(stride).zip(src0.chunks(16).zip(src1.chunks(16))).take(h) {
+        for (dst, (&src0, &src1)) in drow[..w].iter_mut().zip(srow0.iter().zip(srow1.iter())) {
+            *dst = clip8(((i16::from(src0) * weight0 + i16::from(src1) * weight1 + bias) >> wshift) + offset);
+        }
+    }
+}
+
 macro_rules! loop_filter {
     (lumaedge; $buf: expr, $off: expr, $step: expr, $alpha: expr, $beta: expr) => {
         let p2 = i16::from($buf[$off - $step * 3]);
diff --git a/nihav-itu/src/codecs/h264/mb_recon.rs b/nihav-itu/src/codecs/h264/mb_recon.rs
new file mode 100644 (file)
index 0000000..4f3b7cd
--- /dev/null
@@ -0,0 +1,589 @@
+use nihav_core::frame::*;
+use nihav_codec_support::codecs::MV;
+use super::{CurrentMBInfo, I4X4_SCAN};
+use super::dsp::*;
+use super::pic_ref::FrameRefs;
+use super::slice::{SliceHeader, WeightInfo, DEF_WEIGHT_INFO};
+use super::types::*;
+
+fn pred_intra(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
+    let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
+    match mb_info.mb_type {
+        MBType::Intra16x16(imode, _, _) => {
+            let id = if imode != 2 || (sstate.has_top && sstate.has_left) {
+                    imode as usize
+                } else if !sstate.has_top && !sstate.has_left {
+                    IPRED8_DC128
+                } else if !sstate.has_left {
+                    IPRED8_DC_TOP
+                } else {
+                    IPRED8_DC_LEFT
+                };
+            IPRED_FUNCS16X16[id](&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y);
+        },
+        MBType::Intra8x8 => {
+            let mut ictx = IPred8Context::new();
+            for part in 0..4 {
+                let x = (part & 1) * 2;
+                let y = part & 2;
+                let blk4 = x + y * 4;
+
+                let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0];
+                let has_top = y > 0 || sstate.has_top;
+                let has_left = x > 0 || sstate.has_left;
+                let imode = mb_info.ipred[blk4];
+                let id = if imode != IntraPredMode::DC || (has_top && has_left) {
+                        let im_id: u8 = imode.into();
+                        im_id as usize
+                    } else if !has_top && !has_left {
+                        IPRED4_DC128
+                    } else if !has_left {
+                        IPRED4_DC_TOP
+                    } else {
+                        IPRED4_DC_LEFT
+                    };
+                let mb_idx = sstate.mb_x + sstate.mb_y * sstate.mb_w;
+                let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2);
+                let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w);
+                if id != IPRED4_DC128 {
+                    let top = if y == 0 {
+                            &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]
+                        } else {
+                            &frm.data[cur_yoff - frm.stride[0]..]
+                        };
+                    let mut left_buf = [0; 9];
+                    let left = if x == 0 {
+                            &sstate.left_y[y * 4..]
+                        } else {
+                            if has_tl {
+                                if y == 0 {
+                                    left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1];
+                                } else {
+                                    left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]];
+                                }
+                            }
+                            if has_left {
+                                for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) {
+                                    *dst = src[0];
+                                }
+                            }
+                            &left_buf
+                        };
+                    ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl);
+                }
+                IPRED_FUNCS8X8_LUMA[id](&mut frm.data[cur_yoff..], frm.stride[0], &ictx);
+                if mb_info.coded[blk4] {
+                    add_coeffs8(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs8x8[part].coeffs);
+                }
+            }
+        },
+        MBType::Intra4x4 => {
+            for &(x,y) in I4X4_SCAN.iter() {
+                let x = x as usize;
+                let y = y as usize;
+                let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0];
+                let has_top = y > 0 || sstate.has_top;
+                let has_left = x > 0 || sstate.has_left;
+                let imode = mb_info.ipred[x + y * 4];
+                let id = if imode != IntraPredMode::DC || (has_top && has_left) {
+                        let im_id: u8 = imode.into();
+                        im_id as usize
+                    } else if !has_top && !has_left {
+                        IPRED4_DC128
+                    } else if !has_left {
+                        IPRED4_DC_TOP
+                    } else {
+                        IPRED4_DC_LEFT
+                    };
+                let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3);
+                let tr: [u8; 4] = if y == 0 {
+                        let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..];
+                        if has_top && !noright {
+                            [tsrc[4], tsrc[5], tsrc[6], tsrc[7]]
+                        } else if has_top {
+                            [tsrc[3]; 4]
+                        } else {
+                            [0; 4]
+                        }
+                    } else if (x & 1) == 0 || (x == 1 && y == 2) {
+                        let i = cur_yoff - frm.stride[0];
+                        [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]]
+                    } else {
+                        let i = cur_yoff - frm.stride[0];
+                        [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]]
+                    };
+                let mut top = [128; 4];
+                let mut left = [128; 9];
+                if y == 0 {
+                    if has_top {
+                        top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]);
+                    }
+                } else {
+                    top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]);
+                }
+                if x == 0 {
+                    if has_left {
+                        for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) {
+                            *dst = src;
+                        }
+                    }
+                } else {
+                    if y == 0 {
+                        if x == 0 {
+                            left[0] = sstate.left_y[y * 4];
+                        } else if has_top {
+                            left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1];
+                        }
+                    } else {
+                        left[0] = frm.data[cur_yoff - frm.stride[0] - 1];
+                    }
+                    for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) {
+                        *dst = row[0];
+                    }
+                }
+                IPRED_FUNCS4X4[id](&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr);
+                if mb_info.coded[x + y * 4] {
+                    add_coeffs(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]);
+                }
+            }
+        },
+        _ => unreachable!(),
+    };
+    let id = if mb_info.chroma_ipred != 0 || (sstate.has_top && sstate.has_left) {
+            mb_info.chroma_ipred as usize
+        } else if !sstate.has_top && !sstate.has_left {
+            IPRED8_DC128
+        } else if !sstate.has_left {
+            IPRED8_DC_TOP
+        } else {
+            IPRED8_DC_LEFT
+        };
+    for chroma in 1..3 {
+        let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma];
+        let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..];
+        IPRED_FUNCS8X8_CHROMA[id](&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]);
+    }
+}
+
+fn add_luma(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
+    let mut yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
+    if !mb_info.transform_size_8x8 {
+        for y in 0..4 {
+            for x in 0..4 {
+                if mb_info.coded[x + y * 4] {
+                    add_coeffs(frm.data, yoff + x * 4, frm.stride[0], &mb_info.coeffs[x + y * 4]);
+                }
+            }
+            yoff += frm.stride[0] * 4;
+        }
+    } else {
+        for y in 0..2 {
+            for x in 0..2 {
+                if mb_info.coded[x * 2 + y * 2 * 4] {
+                    add_coeffs8(frm.data, yoff + x * 8, frm.stride[0], &mb_info.coeffs8x8[x + y * 2].coeffs);
+                }
+            }
+            yoff += frm.stride[0] * 8;
+        }
+    }
+}
+
+fn add_chroma(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
+    for chroma in 1..3 {
+        let mut off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma];
+        for y in 0..2 {
+            for x in 0..2 {
+                let blk_no = 16 + (chroma - 1) * 4 + x + y * 2;
+                if mb_info.coded[blk_no] || mb_info.coeffs[blk_no][0] != 0 {
+                    add_coeffs(frm.data, off + x * 4, frm.stride[chroma], &mb_info.coeffs[blk_no]);
+                }
+            }
+            off += frm.stride[chroma] * 4;
+        }
+    }
+}
+
+fn do_p_mc(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<NAVideoBufferRef<u8>>, weight: &WeightInfo) {
+    if let Some(buf) = ref_pic {
+        if !weight.is_weighted() {
+            do_mc(frm, buf, xpos, ypos, w, h, mv);
+        } else {
+            let mut ytmp = [0; 16 * 16];
+            let mut utmp = [0; 16 * 16];
+            let mut vtmp = [0; 16 * 16];
+            mc_blocks(&mut ytmp, &mut utmp, &mut vtmp, buf, xpos, ypos, w, h, mv);
+
+            let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+            let yw = if weight.luma_weighted {
+                    [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8]
+                } else {
+                    [1, 0, 0]
+                };
+            put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp, w, h, yw);
+
+            for chroma in 0..2 {
+                let cstride = frm.stride[chroma + 1];
+                let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+                let cw = if weight.chroma_weighted {
+                        [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8]
+                    } else {
+                        [1, 0, 0]
+                    };
+                let csrc = if chroma == 0 { &utmp } else { &vtmp };
+                put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw);
+            }
+        }
+    } else {
+        gray_block(frm, xpos, ypos, w, h);
+    }
+}
+
+fn do_b_mc(frm: &mut NASimpleVideoFrame<u8>, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<NAVideoBufferRef<u8>>, weight0: &WeightInfo, mv1: MV, ref_pic1: Option<NAVideoBufferRef<u8>>, weight1: &WeightInfo, avg_buf: &mut NAVideoBufferRef<u8>) {
+    let do_weight = match (mode, weight0.is_weighted(), weight1.is_weighted()) {
+            (BMode::L0, true, _) => true,
+            (BMode::L1, _, true) => true,
+            (BMode::Bi, true, true) => true,
+            _ => false,
+        };
+    if !do_weight {
+        match mode {
+            BMode::L0 => {
+                if let Some(buf) = ref_pic0 {
+                    do_mc(frm, buf, xpos, ypos, w, h, mv0);
+                } else {
+                    gray_block(frm, xpos, ypos, w, h);
+                }
+            },
+            BMode::L1 => {
+                if let Some(buf) = ref_pic1 {
+                    do_mc(frm, buf, xpos, ypos, w, h, mv1);
+                } else {
+                    gray_block(frm, xpos, ypos, w, h);
+                }
+            },
+            BMode::Bi => {
+                match (ref_pic0, ref_pic1) {
+                    (Some(buf0), Some(buf1)) => {
+                        do_mc(frm, buf0, xpos, ypos, w, h, mv0);
+                        do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1, avg_buf);
+                    },
+                    (Some(buf0), None) => {
+                        do_mc(frm, buf0, xpos, ypos, w, h, mv0);
+                    },
+                    (None, Some(buf1)) => {
+                        do_mc(frm, buf1, xpos, ypos, w, h, mv1);
+                    },
+                    (None, None) => {
+                        gray_block(frm, xpos, ypos, w, h);
+                    },
+                };
+            },
+        };
+    } else {
+        let mut ytmp0 = [0x80; 16 * 16];
+        let mut utmp0 = [0x80; 16 * 16];
+        let mut vtmp0 = [0x80; 16 * 16];
+        let mut ytmp1 = [0x80; 16 * 16];
+        let mut utmp1 = [0x80; 16 * 16];
+        let mut vtmp1 = [0x80; 16 * 16];
+        match (mode, ref_pic0, ref_pic1) {
+            (BMode::L0, Some(buf), _) | (BMode::L1, _, Some(buf)) => {
+                let (mv, weight) = if mode == BMode::L0 { (mv0, weight0) } else { (mv1, weight1) };
+                mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf, xpos, ypos, w, h, mv);
+
+                let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+                let yw = if weight.luma_weighted {
+                        [weight.luma_weight, weight.luma_offset, weight.luma_shift as i8]
+                    } else {
+                        [1, 0, 0]
+                    };
+                put_block_weighted(&mut frm.data[yoff..], frm.stride[0], &ytmp0, w, h, yw);
+
+                for chroma in 0..2 {
+                    let cstride = frm.stride[chroma + 1];
+                    let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+                    let cw = if weight.chroma_weighted {
+                            [weight.chroma_weight[chroma], weight.chroma_offset[chroma], weight.chroma_shift as i8]
+                        } else {
+                            [1, 0, 0]
+                        };
+                    let csrc = if chroma == 0 { &utmp0 } else { &vtmp0 };
+                    put_block_weighted(&mut frm.data[coff..], cstride, csrc, w / 2, h / 2, cw);
+                }
+            },
+            (BMode::Bi, Some(buf0), Some(buf1)) => { // do both and avg
+                mc_blocks(&mut ytmp0, &mut utmp0, &mut vtmp0, buf0, xpos, ypos, w, h, mv0);
+                mc_blocks(&mut ytmp1, &mut utmp1, &mut vtmp1, buf1, xpos, ypos, w, h, mv1);
+
+                let yoff = frm.offset[0] + xpos + ypos * frm.stride[0];
+                let yw = match (weight0.luma_weighted, weight1.luma_weighted) {
+                        (true, true) => [weight0.luma_weight, weight0.luma_offset, weight1.luma_weight, weight1.luma_offset, weight0.luma_shift as i8],
+                        (true, false) => [weight0.luma_weight, weight0.luma_offset, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8],
+                        (false, true) => [1 << weight1.luma_shift, 0, weight1.luma_weight, weight1.luma_offset, weight1.luma_shift as i8],
+                        (false, false) => [1, 0, 1, 0, 0],
+                    };
+                put_block_weighted2(&mut frm.data[yoff..], frm.stride[0], &ytmp0, &ytmp1, w, h, yw);
+
+                for chroma in 0..2 {
+                    let cstride = frm.stride[chroma + 1];
+                    let coff = frm.offset[chroma + 1] + xpos / 2 + ypos / 2 * cstride;
+                    let cw0 = weight0.chroma_weight[chroma];
+                    let co0 = weight0.chroma_offset[chroma];
+                    let cw1 = weight1.chroma_weight[chroma];
+                    let co1 = weight1.chroma_offset[chroma];
+                    let cw = match (weight0.chroma_weighted, weight1.chroma_weighted) {
+                            (true, true) => [cw0, co0, cw1, co1, weight0.luma_shift as i8],
+                            (true, false) => [cw0, co0, 1 << weight0.luma_shift, 0, weight0.luma_shift as i8],
+                            (false, true) => [1 << weight1.luma_shift, 0, cw1, co1, weight1.luma_shift as i8],
+                            (false, false) => [1, 0, 1, 0, 0],
+                        };
+                    let csrc0 = if chroma == 0 { &utmp0 } else { &vtmp0 };
+                    let csrc1 = if chroma == 0 { &utmp1 } else { &vtmp1 };
+                    put_block_weighted2(&mut frm.data[coff..], cstride, csrc0, csrc1, w / 2, h / 2, cw);
+                }
+            },
+            _ => {
+                gray_block(frm, xpos, ypos, w, h);
+            },
+        };
+    }
+}
+
+fn get_weights(slice_hdr: &SliceHeader, frame_refs: &FrameRefs, mode: BMode, weight_mode: u8, ref_l0: PicRef, ref_l1: PicRef) -> (WeightInfo, WeightInfo) {
+    let idx_l0 = ref_l0.index();
+    let idx_l1 = ref_l1.index();
+    if mode != BMode::Bi || weight_mode != 2 {
+        (slice_hdr.get_weight(0, idx_l0), slice_hdr.get_weight(1, idx_l1))
+    } else if let (Some(Some(ref pic0)), Some(Some(ref pic1))) = (frame_refs.ref_list0.get(idx_l0), frame_refs.ref_list1.get(idx_l1)) {
+        let r0_poc = pic0.full_id as u16;
+        let r1_poc = pic1.full_id as u16;
+        let cur_id = frame_refs.cur_id as u16;
+        if (r0_poc == r1_poc) || pic0.long_term.is_some() || pic1.long_term.is_some() {
+            return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO);
+        }
+
+        let td = (i32::from(r1_poc) - i32::from(r0_poc)).max(-128).min(127);
+        let tx = (16384 + (td / 2).abs()) / td;
+        let tb = (i32::from(cur_id) - i32::from(r0_poc)).max(-128).min(127);
+        let scale = ((tb * tx + 32) >> 6).max(-1024).min(1023);
+        if scale == 128 || (scale >> 2) < -64 || (scale >> 2) > 128 {
+            return (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO);
+        }
+        let w1 = (scale >> 2) as i8;
+        let w0 = 64 - w1;
+
+        let weight0 = WeightInfo {
+            luma_weighted:      true,
+            luma_weight:        w0,
+            luma_offset:        0,
+            luma_shift:         5,
+            chroma_weighted:    true,
+            chroma_weight:      [w0; 2],
+            chroma_offset:      [0; 2],
+            chroma_shift:       5,
+        };
+        let weight1 = WeightInfo {
+            luma_weighted:      true,
+            luma_weight:        w1,
+            luma_offset:        0,
+            luma_shift:         5,
+            chroma_weighted:    true,
+            chroma_weight:      [w1; 2],
+            chroma_offset:      [0; 2],
+            chroma_shift:       5,
+        };
+
+        (weight0, weight1)
+    } else {
+        (DEF_WEIGHT_INFO, DEF_WEIGHT_INFO)
+    }
+}
+
+pub fn recon_mb(frm: &mut NASimpleVideoFrame<u8>, slice_hdr: &SliceHeader, mb_info: &CurrentMBInfo, sstate: &mut SliceState, frame_refs: &FrameRefs, avg_buf: &mut NAVideoBufferRef<u8>, weight_mode: u8) {
+    let xpos = sstate.mb_x * 16;
+    let ypos = sstate.mb_y * 16;
+
+    match mb_info.mb_type {
+        MBType::Intra16x16(_, _, _) => {
+            pred_intra(frm, &sstate, &mb_info);
+        },
+        MBType::Intra4x4 | MBType::Intra8x8 => {
+            pred_intra(frm, &sstate, &mb_info);
+        },
+        MBType::PCM => {},
+        MBType::PSkip => {
+            let mv = sstate.get_cur_blk4(0).mv[0];
+            let rpic = frame_refs.select_ref_pic(0, 0);
+            let weight = &slice_hdr.get_weight(0, 0);
+            do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight);
+        },
+        MBType::P16x16 => {
+            let mv = sstate.get_cur_blk4(0).mv[0];
+            let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+            let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index());
+            do_p_mc(frm, xpos, ypos, 16, 16, mv, rpic, weight);
+        },
+        MBType::P16x8 | MBType::P8x16 => {
+            let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 {
+                    (16, 8, 0, 8)
+                } else {
+                    (8, 16, 8, 0)
+                };
+            let mv = sstate.get_cur_blk4(0).mv[0];
+            let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+            let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[0].index());
+            do_p_mc(frm, xpos, ypos, bw, bh, mv, rpic, weight);
+            let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0];
+            let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index());
+            let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[1].index());
+            do_p_mc(frm, xpos + bx, ypos + by, bw, bh, mv, rpic, weight);
+        },
+        MBType::P8x8 | MBType::P8x8Ref0 => {
+            for part in 0..4 {
+                let bx = (part & 1) * 8;
+                let by = (part & 2) * 4;
+                let mv = sstate.get_cur_blk4(bx / 4 + by).mv[0];
+                let rpic = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
+                let weight = &slice_hdr.get_weight(0, mb_info.ref_l0[part].index());
+
+                match mb_info.sub_mb_type[part] {
+                    SubMBType::P8x8 => {
+                        do_p_mc(frm, xpos + bx, ypos + by, 8, 8, mv, rpic, weight);
+                    },
+                    SubMBType::P8x4 => {
+                        do_p_mc(frm, xpos + bx, ypos + by, 8, 4, mv, rpic.clone(), weight);
+                        let mv = sstate.get_cur_blk4(bx / 4 + by + 4).mv[0];
+                        do_p_mc(frm, xpos + bx, ypos + by + 4, 8, 4, mv, rpic, weight);
+                    },
+                    SubMBType::P4x8 => {
+                        do_p_mc(frm, xpos + bx, ypos + by, 4, 8, mv, rpic.clone(), weight);
+                        let mv = sstate.get_cur_blk4(bx / 4 + by + 1).mv[0];
+                        do_p_mc(frm, xpos + bx + 4, ypos + by, 4, 8, mv, rpic, weight);
+                    },
+                    SubMBType::P4x4 => {
+                        for sb_no in 0..4 {
+                            let sxpos = xpos + bx + (sb_no & 1) * 4;
+                            let sypos = ypos + by + (sb_no & 2) * 2;
+                            let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4;
+                            let mv = sstate.get_cur_blk4(sblk_no).mv[0];
+                            do_p_mc(frm, sxpos, sypos, 4, 4, mv, rpic.clone(), weight);
+                        }
+                    },
+                    _ => unreachable!(),
+                };
+            }
+        },
+        MBType::B16x16(mode) => {
+            let mv0 = sstate.get_cur_blk4(0).mv[0];
+            let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
+            let mv1 = sstate.get_cur_blk4(0).mv[1];
+            let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index());
+            let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, mb_info.ref_l0[0], mb_info.ref_l1[0]);
+            do_b_mc(frm, mode, xpos, ypos, 16, 16, mv0, rpic0, &weight0, mv1, rpic1, &weight1, avg_buf);
+        },
+        MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
+            let (pw, ph) = mb_info.mb_type.size();
+            let (px, py) = (pw & 8, ph & 8);
+            let modes = [mode0, mode1];
+            let (mut bx, mut by) = (0, 0);
+            for part in 0..2 {
+                let blk = if part == 0 { 0 } else { (px / 4) + py };
+                let mv0 = sstate.get_cur_blk4(blk).mv[0];
+                let rpic0 = frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
+                let mv1 = sstate.get_cur_blk4(blk).mv[1];
+                let rpic1 = frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index());
+                let (weight0, weight1) = get_weights(slice_hdr, frame_refs, modes[part], weight_mode, mb_info.ref_l0[part], mb_info.ref_l1[part]);
+                do_b_mc(frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, &weight0, mv1, rpic1, &weight1, avg_buf);
+                bx += px;
+                by += py;
+            }
+        },
+        MBType::Direct | MBType::BSkip => {
+            let colo_mb_type = frame_refs.get_colocated_info(sstate.mb_x, sstate.mb_y).0.mb_type;
+            let is_16x16 = colo_mb_type.is_16x16_ref();
+
+            if is_16x16 {
+                let mv = sstate.get_cur_blk4(0).mv;
+                let ref_idx = sstate.get_cur_blk8(0).ref_idx;
+                let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+                let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+                let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+                do_b_mc(frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf);
+            } else {
+                for blk4 in 0..16 {
+                    let mv = sstate.get_cur_blk4(blk4).mv;
+                    let ref_idx = sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx;
+                    let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+                    let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+                    let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+                    do_b_mc(frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf);
+                }
+            }
+            sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); });
+        },
+        MBType::B8x8 => {
+            for part in 0..4 {
+                let ridx = sstate.get_cur_blk8(part).ref_idx;
+                let rpic0 = frame_refs.select_ref_pic(0, ridx[0].index());
+                let rpic1 = frame_refs.select_ref_pic(1, ridx[1].index());
+                let subtype = mb_info.sub_mb_type[part];
+                let blk8 = (part & 1) * 2 + (part & 2) * 4;
+                let mut bx = (part & 1) * 8;
+                let mut by = (part & 2) * 4;
+                match subtype {
+                    SubMBType::Direct8x8 => {
+                        for blk in 0..4 {
+                            let mv = sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv;
+                            let ref_idx = sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx;
+                            let rpic0 = frame_refs.select_ref_pic(0, ref_idx[0].index());
+                            let rpic1 = frame_refs.select_ref_pic(1, ref_idx[1].index());
+                            let (weight0, weight1) = get_weights(slice_hdr, frame_refs, BMode::Bi, weight_mode, ref_idx[0], ref_idx[1]);
+                            do_b_mc(frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf);
+                            bx += 4;
+                            if blk == 1 {
+                                bx -= 8;
+                                by += 4;
+                            }
+                        }
+                        sstate.get_cur_blk8(part).ref_idx[0].set_direct();
+                        sstate.get_cur_blk8(part).ref_idx[1].set_direct();
+                    },
+                    SubMBType::B8x8(mode) => {
+                        let mv = sstate.get_cur_blk4(blk8).mv;
+                        let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+                        do_b_mc(frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf);
+                    },
+                    SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => {
+                        let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+                        let (pw, ph) = subtype.size();
+                        let mv = sstate.get_cur_blk4(blk8).mv;
+                        do_b_mc(frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, avg_buf);
+                        let addr2 = blk8 + (pw & 4) / 4 + (ph & 4);
+                        let mv = sstate.get_cur_blk4(addr2).mv;
+                        do_b_mc(frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, &weight0, mv[1], rpic1, &weight1, avg_buf);
+                    },
+                    SubMBType::B4x4(mode) => {
+                        let (weight0, weight1) = get_weights(slice_hdr, frame_refs, mode, weight_mode, ridx[0], ridx[1]);
+                        for i in 0..4 {
+                            let addr2 = blk8 + (i & 1) + (i & 2) * 2;
+                            let mv = sstate.get_cur_blk4(addr2).mv;
+                            do_b_mc(frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), &weight0, mv[1], rpic1.clone(), &weight1, avg_buf);
+                            bx += 4;
+                            if i == 1 {
+                                bx -= 8;
+                                by += 4;
+                            }
+                        }
+                    },
+                    _ => unreachable!(),
+                };
+            }
+        },
+    };
+    if !mb_info.mb_type.is_skip() {
+        if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
+            add_luma(frm, &sstate, &mb_info);
+        }
+        add_chroma(frm, &sstate, &mb_info);
+    }
+}
index b6f6dd510f024c14c9e66f1be091cb4871b48091..59fec0cff22ba0eb8f8aae1065abb4ff8760e56b 100644 (file)
@@ -1,6 +1,5 @@
 /*
  known bugs and limitations:
-  * weighted motion compensation is not implemented
   * wrong slice boundary filtering
   * not fully correct deblock strength selection for P/B-macroblocks
   * scaling lists for 4x4 blocks
@@ -29,6 +28,8 @@ mod cavlc;
 use cavlc::*;
 mod loopfilter;
 use loopfilter::*;
+mod mb_recon;
+use mb_recon::*;
 mod sets;
 use sets::*;
 mod slice;
@@ -447,200 +448,6 @@ println!("PAFF?");
 
         Ok(())
     }
-    fn pred_intra(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
-        let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
-        match mb_info.mb_type {
-            MBType::Intra16x16(imode, _, _) => {
-                let id = if imode != 2 || (sstate.has_top && sstate.has_left) {
-                        imode as usize
-                    } else if !sstate.has_top && !sstate.has_left {
-                        IPRED8_DC128
-                    } else if !sstate.has_left {
-                        IPRED8_DC_TOP
-                    } else {
-                        IPRED8_DC_LEFT
-                    };
-                    IPRED_FUNCS16X16[id](&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y);
-            },
-            MBType::Intra8x8 => {
-                let mut ictx = IPred8Context::new();
-                for part in 0..4 {
-                    let x = (part & 1) * 2;
-                    let y = part & 2;
-                    let blk4 = x + y * 4;
-
-                    let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0];
-                    let has_top = y > 0 || sstate.has_top;
-                    let has_left = x > 0 || sstate.has_left;
-                    let imode = mb_info.ipred[blk4];
-                    let id = if imode != IntraPredMode::DC || (has_top && has_left) {
-                            let im_id: u8 = imode.into();
-                            im_id as usize
-                        } else if !has_top && !has_left {
-                            IPRED4_DC128
-                        } else if !has_left {
-                            IPRED4_DC_TOP
-                        } else {
-                            IPRED4_DC_LEFT
-                        };
-                    let mb_idx = sstate.mb_x + sstate.mb_y * sstate.mb_w;
-                    let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2);
-                    let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w);
-                    if id != IPRED4_DC128 {
-                        let top = if y == 0 {
-                                &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]
-                            } else {
-                                &frm.data[cur_yoff - frm.stride[0]..]
-                            };
-                        let mut left_buf = [0; 9];
-                        let left = if x == 0 {
-                                &sstate.left_y[y * 4..]
-                            } else {
-                                if has_tl {
-                                    if y == 0 {
-                                        left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1];
-                                    } else {
-                                        left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]];
-                                    }
-                                }
-                                if has_left {
-                                    for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) {
-                                        *dst = src[0];
-                                    }
-                                }
-                                &left_buf
-                            };
-                        ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl);
-                    }
-                    IPRED_FUNCS8X8_LUMA[id](&mut frm.data[cur_yoff..], frm.stride[0], &ictx);
-                    if mb_info.coded[blk4] {
-                        add_coeffs8(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs8x8[part].coeffs);
-                    }
-                }
-            },
-            MBType::Intra4x4 => {
-                for &(x,y) in I4X4_SCAN.iter() {
-                    let x = x as usize;
-                    let y = y as usize;
-                    let cur_yoff = yoff + x * 4 + y * 4 * frm.stride[0];
-                    let has_top = y > 0 || sstate.has_top;
-                    let has_left = x > 0 || sstate.has_left;
-                    let imode = mb_info.ipred[x + y * 4];
-                    let id = if imode != IntraPredMode::DC || (has_top && has_left) {
-                            let im_id: u8 = imode.into();
-                            im_id as usize
-                        } else if !has_top && !has_left {
-                            IPRED4_DC128
-                        } else if !has_left {
-                            IPRED4_DC_TOP
-                        } else {
-                            IPRED4_DC_LEFT
-                        };
-                    let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3);
-                    let tr: [u8; 4] = if y == 0 {
-                            let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..];
-                            if has_top && !noright {
-                                [tsrc[4], tsrc[5], tsrc[6], tsrc[7]]
-                            } else if has_top {
-                                [tsrc[3]; 4]
-                            } else {
-                                [0; 4]
-                            }
-                        } else if (x & 1) == 0 || (x == 1 && y == 2) {
-                            let i = cur_yoff - frm.stride[0];
-                            [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]]
-                        } else {
-                            let i = cur_yoff - frm.stride[0];
-                            [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]]
-                        };
-                    let mut top = [128; 4];
-                    let mut left = [128; 9];
-                    if y == 0 {
-                        if has_top {
-                            top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]);
-                        }
-                    } else {
-                        top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]);
-                    }
-                    if x == 0 {
-                        if has_left {
-                            for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) {
-                                *dst = src;
-                            }
-                        }
-                    } else {
-                        if y == 0 {
-                            if x == 0 {
-                                left[0] = sstate.left_y[y * 4];
-                            } else if has_top {
-                                left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1];
-                            }
-                        } else {
-                            left[0] = frm.data[cur_yoff - frm.stride[0] - 1];
-                        }
-                        for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) {
-                            *dst = row[0];
-                        }
-                    }
-                    IPRED_FUNCS4X4[id](&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr);
-                    if mb_info.coded[x + y * 4] {
-                        add_coeffs(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]);
-                    }
-                }
-            },
-            _ => unreachable!(),
-        };
-        let id = if mb_info.chroma_ipred != 0 || (sstate.has_top && sstate.has_left) {
-                mb_info.chroma_ipred as usize
-            } else if !sstate.has_top && !sstate.has_left {
-                IPRED8_DC128
-            } else if !sstate.has_left {
-                IPRED8_DC_TOP
-            } else {
-                IPRED8_DC_LEFT
-            };
-        for chroma in 1..3 {
-            let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma];
-            let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..];
-            IPRED_FUNCS8X8_CHROMA[id](&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]);
-        }
-    }
-    fn add_luma(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
-        let mut yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0];
-        if !mb_info.transform_size_8x8 {
-            for y in 0..4 {
-                for x in 0..4 {
-                    if mb_info.coded[x + y * 4] {
-                        add_coeffs(frm.data, yoff + x * 4, frm.stride[0], &mb_info.coeffs[x + y * 4]);
-                    }
-                }
-                yoff += frm.stride[0] * 4;
-            }
-        } else {
-            for y in 0..2 {
-                for x in 0..2 {
-                    if mb_info.coded[x * 2 + y * 2 * 4] {
-                        add_coeffs8(frm.data, yoff + x * 8, frm.stride[0], &mb_info.coeffs8x8[x + y * 2].coeffs);
-                    }
-                }
-                yoff += frm.stride[0] * 8;
-            }
-        }
-    }
-    fn add_chroma(frm: &mut NASimpleVideoFrame<u8>, sstate: &SliceState, mb_info: &CurrentMBInfo) {
-        for chroma in 1..3 {
-            let mut off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma];
-            for y in 0..2 {
-                for x in 0..2 {
-                    let blk_no = 16 + (chroma - 1) * 4 + x + y * 2;
-                    if mb_info.coded[blk_no] || mb_info.coeffs[blk_no][0] != 0 {
-                        add_coeffs(frm.data, off + x * 4, frm.stride[chroma], &mb_info.coeffs[blk_no]);
-                    }
-                }
-                off += frm.stride[chroma] * 4;
-            }
-        }
-    }
     fn pred_mv(sstate: &mut SliceState, frame_refs: &FrameRefs, mb_info: &mut CurrentMBInfo, cur_id: u16, temporal_mv: bool) {
         let mb_type = mb_info.mb_type;
         if !mb_type.is_4x4() {
@@ -701,7 +508,7 @@ println!("PAFF?");
         }
     }
     #[allow(clippy::cognitive_complexity)]
-    fn handle_macroblock(&mut self, mb_info: &mut CurrentMBInfo) {
+    fn handle_macroblock(&mut self, slice_hdr: &SliceHeader, mb_info: &mut CurrentMBInfo) {
         let pps = &self.pps[self.cur_pps];
 
         let qp_y = mb_info.qp_y;
@@ -778,172 +585,16 @@ println!("PAFF?");
         let ypos = self.sstate.mb_y * 16;
         if let Some(ref mut pic) = self.cur_pic {
             let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap();
-            match mb_info.mb_type {
-                MBType::Intra16x16(_, _, _) => {
-                    Self::pred_intra(&mut frm, &self.sstate, &mb_info);
-                },
-                MBType::Intra4x4 | MBType::Intra8x8 => {
-                    Self::pred_intra(&mut frm, &self.sstate, &mb_info);
-                },
-                MBType::PCM => {},
-                MBType::PSkip => {
-                    let mv = self.sstate.get_cur_blk4(0).mv[0];
-                    let rpic = self.frame_refs.select_ref_pic(0, 0);
-                    Self::do_p_mc(&mut frm, xpos, ypos, 16, 16, mv, rpic);
-                },
-                MBType::P16x16 => {
-                    let mv = self.sstate.get_cur_blk4(0).mv[0];
-                    let rpic = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
-                    Self::do_p_mc(&mut frm, xpos, ypos, 16, 16, mv, rpic);
-                },
-                MBType::P16x8 | MBType::P8x16 => {
-                    let (bw, bh, bx, by) = if mb_info.mb_type == MBType::P16x8 {
-                            (16, 8, 0, 8)
-                        } else {
-                            (8, 16, 8, 0)
-                        };
-                    let mv = self.sstate.get_cur_blk4(0).mv[0];
-                    let rpic = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
-                    Self::do_p_mc(&mut frm, xpos, ypos, bw, bh, mv, rpic);
-                    let mv = self.sstate.get_cur_blk4(bx / 4 + by).mv[0];
-                    let rpic = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[1].index());
-                    Self::do_p_mc(&mut frm, xpos + bx, ypos + by, bw, bh, mv, rpic);
-                },
-                MBType::P8x8 | MBType::P8x8Ref0 => {
-                    for part in 0..4 {
-                        let bx = (part & 1) * 8;
-                        let by = (part & 2) * 4;
-                        if let Some(buf) = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index()) {
-                            let mv = self.sstate.get_cur_blk4(bx / 4 + by).mv[0];
-
-                            match mb_info.sub_mb_type[part] {
-                                SubMBType::P8x8 => {
-                                    do_mc(&mut frm, buf, xpos + bx, ypos + by, 8, 8, mv);
-                                },
-                                SubMBType::P8x4 => {
-                                    do_mc(&mut frm, buf.clone(), xpos + bx, ypos + by, 8, 4, mv);
-                                    let mv = self.sstate.get_cur_blk4(bx / 4 + by + 4).mv[0];
-                                    do_mc(&mut frm, buf, xpos + bx, ypos + by + 4, 8, 4, mv);
-                                },
-                                SubMBType::P4x8 => {
-                                    do_mc(&mut frm, buf.clone(), xpos + bx, ypos + by, 4, 8, mv);
-                                    let mv = self.sstate.get_cur_blk4(bx / 4 + by + 1).mv[0];
-                                    do_mc(&mut frm, buf, xpos + bx + 4, ypos + by, 4, 8, mv);
-                                },
-                                SubMBType::P4x4 => {
-                                    for sb_no in 0..4 {
-                                        let sxpos = xpos + bx + (sb_no & 1) * 4;
-                                        let sypos = ypos + by + (sb_no & 2) * 2;
-                                        let sblk_no = (bx / 4 + (sb_no & 1)) + ((by / 4) + (sb_no >> 1)) * 4;
-                                        let mv = self.sstate.get_cur_blk4(sblk_no).mv[0];
-                                        do_mc(&mut frm, buf.clone(), sxpos, sypos, 4, 4, mv);
-                                    }
-                                },
-                                _ => unreachable!(),
-                            };
-                        } else {
-                            gray_block(&mut frm, xpos + bx, ypos + by, 8, 8);
-                        }
-                    }
-                },
-                MBType::B16x16(mode) => {
-                    let mv0 = self.sstate.get_cur_blk4(0).mv[0];
-                    let rpic0 = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[0].index());
-                    let mv1 = self.sstate.get_cur_blk4(0).mv[1];
-                    let rpic1 = self.frame_refs.select_ref_pic(1, mb_info.ref_l1[0].index());
-                    Self::do_b_mc(&mut frm, mode, xpos, ypos, 16, 16, mv0, rpic0, mv1, rpic1, &mut self.avg_buf);
-                },
-                MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
-                    let (pw, ph) = mb_info.mb_type.size();
-                    let (px, py) = (pw & 8, ph & 8);
-                    let modes = [mode0, mode1];
-                    let (mut bx, mut by) = (0, 0);
-                    for part in 0..2 {
-                        let blk = if part == 0 { 0 } else { (px / 4) + py };
-                        let mv0 = self.sstate.get_cur_blk4(blk).mv[0];
-                        let rpic0 = self.frame_refs.select_ref_pic(0, mb_info.ref_l0[part].index());
-                        let mv1 = self.sstate.get_cur_blk4(blk).mv[1];
-                        let rpic1 = self.frame_refs.select_ref_pic(1, mb_info.ref_l1[part].index());
-                        Self::do_b_mc(&mut frm, modes[part], xpos + bx, ypos + by, pw, ph, mv0, rpic0, mv1, rpic1, &mut self.avg_buf);
-                        bx += px;
-                        by += py;
-                    }
-                },
-                MBType::Direct | MBType::BSkip => {
-                    let is_16x16 = self.frame_refs.get_colocated_info(self.sstate.mb_x, self.sstate.mb_y).0.mb_type.is_16x16();
-                    if is_16x16 || !self.temporal_mv {
-                        let mv = self.sstate.get_cur_blk4(0).mv;
-                        let ref_idx = self.sstate.get_cur_blk8(0).ref_idx;
-                        let rpic0 = self.frame_refs.select_ref_pic(0, ref_idx[0].index());
-                        let rpic1 = self.frame_refs.select_ref_pic(1, ref_idx[1].index());
-                        Self::do_b_mc(&mut frm, BMode::Bi, xpos, ypos, 16, 16, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf);
+            if mb_info.mb_type != MBType::PCM {
+                let weight_mode = if self.pps[self.cur_pps].weighted_pred && slice_hdr.slice_type.is_p() {
+                        1
+                    } else if slice_hdr.slice_type.is_b() {
+                        self.pps[self.cur_pps].weighted_bipred_idc
                     } else {
-                        for blk4 in 0..16 {
-                            let mv = self.sstate.get_cur_blk4(blk4).mv;
-                            let ref_idx = self.sstate.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx;
-                            let rpic0 = self.frame_refs.select_ref_pic(0, ref_idx[0].index());
-                            let rpic1 = self.frame_refs.select_ref_pic(1, ref_idx[1].index());
-                            Self::do_b_mc(&mut frm, BMode::Bi, xpos + (blk4 & 3) * 4, ypos + (blk4 >> 2) * 4, 4, 4, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf);
-                        }
-                    }
-                    self.sstate.apply_to_blk8(|blk8| { blk8.ref_idx[0].set_direct(); blk8.ref_idx[1].set_direct(); });
-                },
-                MBType::B8x8 => {
-                    for part in 0..4 {
-                        let ridx = self.sstate.get_cur_blk8(part).ref_idx;
-                        let rpic0 = self.frame_refs.select_ref_pic(0, ridx[0].index());
-                        let rpic1 = self.frame_refs.select_ref_pic(1, ridx[1].index());
-                        let subtype = mb_info.sub_mb_type[part];
-                        let blk8 = (part & 1) * 2 + (part & 2) * 4;
-                        let mut bx = (part & 1) * 8;
-                        let mut by = (part & 2) * 4;
-                        match subtype {
-                            SubMBType::Direct8x8 => {
-                                for blk in 0..4 {
-                                    let mv = self.sstate.get_cur_blk4(bx / 4 + (by / 4) * 4).mv;
-                                    let ref_idx = self.sstate.get_cur_blk8(bx / 8 + (by / 8) * 2).ref_idx;
-                                    let rpic0 = self.frame_refs.select_ref_pic(0, ref_idx[0].index());
-                                    let rpic1 = self.frame_refs.select_ref_pic(1, ref_idx[1].index());
-                                    Self::do_b_mc(&mut frm, BMode::Bi, xpos + bx, ypos + by, 4, 4, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf);
-                                    bx += 4;
-                                    if blk == 1 {
-                                        bx -= 8;
-                                        by += 4;
-                                    }
-                                }
-                                self.sstate.get_cur_blk8(part).ref_idx[0].set_direct();
-                                self.sstate.get_cur_blk8(part).ref_idx[1].set_direct();
-                            },
-                            SubMBType::B8x8(mode) => {
-                                let mv = self.sstate.get_cur_blk4(blk8).mv;
-                                Self::do_b_mc(&mut frm, mode, xpos + bx, ypos + by, 8, 8, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf);
-                            },
-                            SubMBType::B8x4(mode) | SubMBType::B4x8(mode) => {
-                                let (pw, ph) = subtype.size();
-                                let mv = self.sstate.get_cur_blk4(blk8).mv;
-                                Self::do_b_mc(&mut frm, mode, xpos + bx, ypos + by, pw, ph, mv[0], rpic0.clone(), mv[1], rpic1.clone(), &mut self.avg_buf);
-                                let addr2 = blk8 + (pw & 4) / 4 + (ph & 4);
-                                let mv = self.sstate.get_cur_blk4(addr2).mv;
-                                Self::do_b_mc(&mut frm, mode, xpos + bx + (pw & 4), ypos + by + (ph & 4), pw, ph, mv[0], rpic0, mv[1], rpic1, &mut self.avg_buf);
-                            },
-                            SubMBType::B4x4(mode) => {
-                                for i in 0..4 {
-                                    let addr2 = blk8 + (i & 1) + (i & 2) * 2;
-                                    let mv = self.sstate.get_cur_blk4(addr2).mv;
-                                    Self::do_b_mc(&mut frm, mode, xpos + bx, ypos + by, 4, 4, mv[0], rpic0.clone(), mv[1], rpic1.clone(), &mut self.avg_buf);
-                                    bx += 4;
-                                    if i == 1 {
-                                        bx -= 8;
-                                        by += 4;
-                                    }
-                                }
-                            },
-                            _ => unreachable!(),
-                        };
-                    }
-                },
-            };
-            if mb_info.mb_type == MBType::PCM {
+                        0
+                    };
+                recon_mb(&mut frm, slice_hdr, &mb_info, &mut self.sstate, &self.frame_refs, &mut self.avg_buf, weight_mode);
+            } else {
                 for (dline, src) in frm.data[frm.offset[0] + xpos + ypos * frm.stride[0]..].chunks_mut(frm.stride[0]).take(16).zip(self.ipcm_buf.chunks(16)) {
                     dline[..16].copy_from_slice(src);
                 }
@@ -953,11 +604,6 @@ println!("PAFF?");
                 for (dline, src) in frm.data[frm.offset[2] + xpos/2 + ypos/2 * frm.stride[2]..].chunks_mut(frm.stride[2]).take(8).zip(self.ipcm_buf[256 + 64..].chunks(8)) {
                     dline[..8].copy_from_slice(src);
                 }
-            } else if !mb_info.mb_type.is_skip() {
-                if mb_info.mb_type != MBType::Intra4x4 && mb_info.mb_type != MBType::Intra8x8 {
-                    Self::add_luma(&mut frm, &self.sstate, &mb_info);
-                }
-                Self::add_chroma(&mut frm, &self.sstate, &mb_info);
             }
 /*match mb_info.mb_type {
 MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBType::B8x16(_, _) | MBType::B8x8 => {
@@ -995,48 +641,6 @@ _ => {},
         }
         self.sstate.next_mb();
     }
-    fn do_p_mc(frm: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, w: usize, h: usize, mv: MV, ref_pic: Option<NAVideoBufferRef<u8>>) {
-        if let Some(buf) = ref_pic {
-            do_mc(frm, buf, xpos, ypos, w, h, mv);
-        } else {
-            gray_block(frm, xpos, ypos, w, h);
-        }
-    }
-    fn do_b_mc(frm: &mut NASimpleVideoFrame<u8>, mode: BMode, xpos: usize, ypos: usize, w: usize, h: usize, mv0: MV, ref_pic0: Option<NAVideoBufferRef<u8>>, mv1: MV, ref_pic1: Option<NAVideoBufferRef<u8>>, avg_buf: &mut NAVideoBufferRef<u8>) {
-        match mode {
-            BMode::L0 => {
-                if let Some(buf) = ref_pic0 {
-                    do_mc(frm, buf, xpos, ypos, w, h, mv0);
-                } else {
-                    gray_block(frm, xpos, ypos, w, h);
-                }
-            },
-            BMode::L1 => {
-                if let Some(buf) = ref_pic1 {
-                    do_mc(frm, buf, xpos, ypos, w, h, mv1);
-                } else {
-                    gray_block(frm, xpos, ypos, w, h);
-                }
-            },
-            BMode::Bi => {
-                match (ref_pic0, ref_pic1) {
-                    (Some(buf0), Some(buf1)) => {
-                        do_mc(frm, buf0, xpos, ypos, w, h, mv0);
-                        do_mc_avg(frm, buf1, xpos, ypos, w, h, mv1, avg_buf);
-                    },
-                    (Some(buf0), None) => {
-                        do_mc(frm, buf0, xpos, ypos, w, h, mv0);
-                    },
-                    (None, Some(buf1)) => {
-                        do_mc(frm, buf1, xpos, ypos, w, h, mv1);
-                    },
-                    (None, None) => {
-                        gray_block(frm, xpos, ypos, w, h);
-                    },
-                };
-            },
-        };
-    }
     fn decode_slice_cavlc(&mut self, br: &mut BitReader, slice_hdr: &SliceHeader, full_size: usize) -> DecoderResult<bool> {
         const INTRA_CBP: [u8; 48] = [
             47, 31, 15,  0, 23, 27, 29, 30,  7, 11, 13, 14, 39, 43, 45, 46,
@@ -1068,7 +672,7 @@ _ => {},
                 validate!(mb_idx + mb_skip_run <= self.num_mbs);
                 mb_info.mb_type = skip_type;
                 for _ in 0..mb_skip_run {
-                    self.handle_macroblock(&mut mb_info);
+                    self.handle_macroblock(slice_hdr, &mut mb_info);
                     mb_idx += 1;
                 }
                 if mb_idx == self.num_mbs || br.tell() >= full_size {
@@ -1134,7 +738,7 @@ _ => {},
                         decode_residual_cavlc(br, &mut self.sstate, &mut mb_info, &self.cavlc_cb)?;
                     }
                 }
-                self.handle_macroblock(&mut mb_info);
+                self.handle_macroblock(slice_hdr, &mut mb_info);
             }
             mb_idx += 1;
         }
@@ -1240,7 +844,7 @@ _ => {},
                 mb_info.transform_size_8x8 = false;
                 last_qp_diff = false;
             }
-            self.handle_macroblock(&mut mb_info);
+            self.handle_macroblock(slice_hdr, &mut mb_info);
             prev_mb_skipped = mb_skip;
             if !(self.is_mbaff && ((mb_idx & 1) == 0)) && cabac.decode_terminate() {
                 if let Some(ref mut pic) = self.cur_pic {
index 864eec7829ada5c272795b156f5d45efa54e4afa..5c7072908951084788299a2d35177ff1ce31ae6c 100644 (file)
@@ -50,14 +50,22 @@ const SLICE_TYPES: [SliceType; 10] = [
     SliceType::P, SliceType::B, SliceType::I, SliceType::SP, SliceType::SI,
 ];
 
-#[derive(Clone,Copy)]
+#[derive(Clone,Copy,Default)]
 pub struct WeightInfo {
     pub luma_weighted:                      bool,
     pub luma_weight:                        i8,
     pub luma_offset:                        i8,
+    pub luma_shift:                         u8,
     pub chroma_weighted:                    bool,
     pub chroma_weight:                      [i8; 2],
     pub chroma_offset:                      [i8; 2],
+    pub chroma_shift:                       u8,
+}
+
+impl WeightInfo {
+    pub fn is_weighted(&self) -> bool {
+        self.luma_weighted || self.chroma_weighted
+    }
 }
 
 #[derive(Clone,Copy)]
@@ -117,6 +125,35 @@ pub struct SliceHeader {
     pub slice_group_change_cycle:           u32,
 }
 
+pub const DEF_WEIGHT_INFO: WeightInfo = WeightInfo {
+    luma_weighted:                      false,
+    luma_weight:                        0,
+    luma_offset:                        0,
+    luma_shift:                         0,
+    chroma_weighted:                    false,
+    chroma_weight:                      [0; 2],
+    chroma_offset:                      [0; 2],
+    chroma_shift:                       0,
+};
+
+impl SliceHeader {
+    pub fn get_weight(&self, list_id: u8, idx: usize) -> WeightInfo {
+        if list_id == 0 {
+            if idx < self.num_ref_idx_l0_active {
+                self.weights_l0[idx]
+            } else {
+                DEF_WEIGHT_INFO
+            }
+        } else {
+            if idx < self.num_ref_idx_l1_active {
+                self.weights_l1[idx]
+            } else {
+                DEF_WEIGHT_INFO
+            }
+        }
+    }
+}
+
 pub fn parse_slice_header_minimal(br: &mut BitReader) -> DecoderResult<(usize, SliceType)> {
     let first_mb_in_slice                           = br.read_ue()? as usize;
     let stype                                       = br.read_ue_lim(SLICE_TYPES.len() as u32 - 1)?;
@@ -199,6 +236,15 @@ pub fn parse_slice_header(br: &mut BitReader, sps_arr: &[SeqParameterSet], pps_a
     if (pps.weighted_pred && hdr.slice_type.is_p()) ||
         (pps.weighted_bipred_idc == 1 && hdr.slice_type.is_b()) {
         parse_pred_weight_table(&mut hdr, br)?;
+    } else {
+        for weight in hdr.weights_l0[..hdr.num_ref_idx_l0_active].iter_mut() {
+            weight.luma_weighted = false;
+            weight.chroma_weighted = false;
+        }
+        for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() {
+            weight.luma_weighted = false;
+            weight.chroma_weighted = false;
+        }
     }
     if nal_ref_idc != 0 {
         if is_idr {
@@ -346,6 +392,8 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder
             validate!(offset >= -128 && offset <= 127);
             weight.luma_offset = offset as i8;
         }
+        weight.luma_shift = hdr.luma_log2_weight_denom;
+
         weight.chroma_weighted                      = br.read_bool()?;
         if weight.chroma_weighted {
             for i in 0..2 {
@@ -357,6 +405,7 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder
                 weight.chroma_offset[i] = offset as i8;
             }
         }
+        weight.chroma_shift = hdr.chroma_log2_weight_denom;
     }
     for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() {
         weight.luma_weighted                        = br.read_bool()?;
@@ -368,6 +417,8 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder
             validate!(offset >= -128 && offset <= 127);
             weight.luma_offset = offset as i8;
         }
+        weight.luma_shift = hdr.luma_log2_weight_denom;
+
         weight.chroma_weighted                      = br.read_bool()?;
         if weight.chroma_weighted {
             for i in 0..2 {
@@ -379,6 +430,7 @@ fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> Decoder
                 weight.chroma_offset[i] = offset as i8;
             }
         }
+        weight.chroma_shift = hdr.chroma_log2_weight_denom;
     }
 
     Ok(())
index 0d97e023921b3ff2977e99f9fb5b68e8c017f2ed..0a0cc64cb934fee76d984082144995c81cf4f800 100644 (file)
@@ -225,13 +225,15 @@ impl CompactMBType {
     pub fn is_inter(self) -> bool {
         !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM
     }
-    pub fn is_16x16(self) -> bool {
+    pub fn is_16x16_ref(self) -> bool {
         match self {
-            CompactMBType::P16x8 | CompactMBType::P8x16 |
-            CompactMBType::P8x8 | CompactMBType::P8x8Ref0 |
-            CompactMBType::B16x8 | CompactMBType::B8x16 |
-            CompactMBType::B8x8 => false,
-            _ => true,
+            CompactMBType::Intra4x4 |
+            CompactMBType::Intra8x8 |
+            CompactMBType::Intra16x16 |
+            CompactMBType::PCM |
+            CompactMBType::P16x16 |
+            CompactMBType::B16x16 => true,
+            _ => false,
         }
     }
 }
@@ -765,7 +767,7 @@ impl SliceState {
     }
     pub fn predict_direct_mb(&mut self, frame_refs: &FrameRefs, temporal_mv: bool, cur_id: u16) {
         let (col_mb, _, _) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
-        if col_mb.mb_type.is_16x16() || !temporal_mv {
+        if col_mb.mb_type.is_16x16_ref() || !temporal_mv {
             let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, temporal_mv, cur_id, 0);
             self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]);
             self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]);