[nihav.git] / nihav-duck / src / codecs / vpcommon.rs

use nihav_core::codecs::*;
use nihav_codec_support::codecs::blockdsp;
use nihav_codec_support::codecs::blockdsp::*;

pub const VP_YUVA420_FORMAT: NAPixelFormaton = NAPixelFormaton{
        model:      ColorModel::YUV(YUVSubmodel::YUVJ),
        components: 4,
        comp_info:  [
                Some(NAPixelChromaton{ h_ss: 0, v_ss: 0, packed: false, depth: 8, shift: 0, comp_offs: 0, next_elem: 1}),
                Some(NAPixelChromaton{ h_ss: 1, v_ss: 1, packed: false, depth: 8, shift: 0, comp_offs: 1, next_elem: 1}),
                Some(NAPixelChromaton{ h_ss: 1, v_ss: 1, packed: false, depth: 8, shift: 0, comp_offs: 2, next_elem: 1}),
                Some(NAPixelChromaton{ h_ss: 0, v_ss: 0, packed: false, depth: 8, shift: 0, comp_offs: 3, next_elem: 1}),
                None ],
        elem_size:  0,
        be:         false,
        alpha:      true,
        palette:    false
    };

#[derive(Clone,Copy,Debug,PartialEq,Default)]
#[allow(dead_code)]
pub enum VPMBType {
    #[default]
    Intra,
    InterNoMV,
    InterMV,
    InterNearest,
    InterNear,
    InterFourMV,
    GoldenNoMV,
    GoldenMV,
    GoldenNearest,
    GoldenNear,
}

pub const VP_REF_INTER: u8 = 1;
pub const VP_REF_GOLDEN: u8 = 2;

#[allow(dead_code)]
impl VPMBType {
    pub fn is_intra(self) -> bool { self == VPMBType::Intra }
    pub fn get_ref_id(self) -> u8 {
        match self {
            VPMBType::Intra         => 0,
            VPMBType::InterNoMV     |
            VPMBType::InterMV       |
            VPMBType::InterNearest  |
            VPMBType::InterNear     |
            VPMBType::InterFourMV   => VP_REF_INTER,
            _                       => VP_REF_GOLDEN,
        }
    }
}

#[derive(Default)]
pub struct VPShuffler {
    lastframe: Option<NAVideoBufferRef<u8>>,
    goldframe: Option<NAVideoBufferRef<u8>>,
}

impl VPShuffler {
    pub fn new() -> Self { VPShuffler { lastframe: None, goldframe: None } }
    pub fn clear(&mut self) { self.lastframe = None; self.goldframe = None; }
    pub fn add_frame(&mut self, buf: NAVideoBufferRef<u8>) {
        self.lastframe = Some(buf);
    }
    pub fn add_golden_frame(&mut self, buf: NAVideoBufferRef<u8>) {
        self.goldframe = Some(buf);
    }
    pub fn get_last(&mut self) -> Option<NAVideoBufferRef<u8>> {
        self.lastframe.as_ref().cloned()
    }
    pub fn get_golden(&mut self) -> Option<NAVideoBufferRef<u8>> {
        self.goldframe.as_ref().cloned()
    }
    pub fn has_refs(&self) -> bool {
        self.lastframe.is_some()
    }
}

pub const VP56_COEF_BASE: [i16; 6] = [ 5, 7, 11, 19, 35, 67 ];
pub const VP56_COEF_ADD_PROBS: [[u8; 12]; 6] = [
    [ 159, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0 ],
    [ 165, 145, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0 ],
    [ 173, 148, 140, 128,   0,   0,   0,   0,   0,   0,   0,   0 ],
    [ 176, 155, 140, 135, 128,   0,   0,   0,   0,   0,   0,   0 ],
    [ 180, 157, 141, 134, 130, 128,   0,   0,   0,   0,   0,   0 ],
    [ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 128 ],
];

#[allow(dead_code)]
pub struct BoolCoder<'a> {
    pub src:    &'a [u8],
    pos:    usize,
    value:  u32,
    range:  u32,
    bits:   i32,
}

#[allow(dead_code)]
impl<'a> BoolCoder<'a> {
    pub fn new(src: &'a [u8]) -> DecoderResult<Self> {
        if src.len() < 3 { return Err(DecoderError::ShortData); }
        let value = (u32::from(src[0]) << 24) | (u32::from(src[1]) << 16) | (u32::from(src[2]) << 8) | u32::from(src[3]);
        Ok(Self { src, pos: 4, value, range: 255, bits: 8 })
    }
    pub fn read_bool(&mut self) -> bool {
        self.read_prob(128)
    }
    pub fn read_prob(&mut self, prob: u8) -> bool {
        self.renorm();
        let split = 1 + (((self.range - 1) * u32::from(prob)) >> 8);
        let bit;
        if self.value < (split << 24) {
            self.range = split;
            bit = false;
        } else {
            self.range -= split;
            self.value -= split << 24;
            bit = true;
        }
        bit
    }
    pub fn read_bits(&mut self, bits: u8) -> u32 {
        let mut val = 0u32;
        for _ in 0..bits {
            val = (val << 1) | (self.read_prob(128) as u32);
        }
        val
    }
    pub fn read_byte(&mut self) -> u8 {
        let mut val = 0u8;
        for _ in 0..8 {
            val = (val << 1) | (self.read_prob(128) as u8);
        }
        val
    }
    pub fn read_sbits(&mut self, bits: u8) -> i32 {
        let mut val = if self.read_prob(128) { -1i32 } else { 0i32 };
        for _ in 1..bits {
            val = (val << 1) | (self.read_prob(128) as i32);
        }
        val
    }
    pub fn read_probability(&mut self) -> u8 {
        let val = self.read_bits(7) as u8;
        if val == 0 {
            1
        } else {
            val << 1
        }
    }
    fn renorm(&mut self) {
        let shift = self.range.leading_zeros() & 7;
        self.range <<= shift;
        self.value <<= shift;
        self.bits   -= shift as i32;
        if (self.bits <= 0) && (self.pos < self.src.len()) {
            self.value |= u32::from(self.src[self.pos]) << (-self.bits as u8);
            self.pos += 1;
            self.bits += 8;
        }
/*        while self.range < 0x80 {
            self.range <<= 1;
            self.value <<= 1;
            self.bits   -= 1;
            if (self.bits <= 0) && (self.pos < self.src.len()) {
                self.value |= u32::from(self.src[self.pos]);
                self.pos += 1;
                self.bits = 8;
            }
        }*/
    }
    pub fn skip_bytes(&mut self, nbytes: usize) {
        for _ in 0..nbytes {
            self.value <<= 8;
            if self.pos < self.src.len() {
                self.value |= u32::from(self.src[self.pos]);
                self.pos += 1;
            }
        }
    }
}

#[allow(dead_code)]
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn rescale_prob(prob: u8, weights: &[i16; 2], maxval: i32) -> u8 {
    (((i32::from(prob) * i32::from(weights[0]) + 128) >> 8) + i32::from(weights[1])).min(maxval).max(1) as u8
}

macro_rules! vp_tree {
    ($bc: expr, $prob: expr, $node1: expr, $node2: expr) => {
        if !$bc.read_prob($prob) {
            $node1
        } else {
            $node2
        }
    };
    ($leaf: expr) => { $leaf }
}

const C1S7: i32 = 64277;
const C2S6: i32 = 60547;
const C3S5: i32 = 54491;
const C4S4: i32 = 46341;
const C5S3: i32 = 36410;
const C6S2: i32 = 25080;
const C7S1: i32 = 12785;

fn mul16(a: i32, b: i32) -> i32 {
    (a * b) >> 16
}

macro_rules! idct_step {
    ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr,
     $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr,
     $bias:expr, $shift:expr, $otype:ty) => {
        let t_a  = mul16(C1S7, i32::from($s1)) + mul16(C7S1, i32::from($s7));
        let t_b  = mul16(C7S1, i32::from($s1)) - mul16(C1S7, i32::from($s7));
        let t_c  = mul16(C3S5, i32::from($s3)) + mul16(C5S3, i32::from($s5));
        let t_d  = mul16(C3S5, i32::from($s5)) - mul16(C5S3, i32::from($s3));
        let t_a1 = mul16(C4S4, t_a - t_c);
        let t_b1 = mul16(C4S4, t_b - t_d);
        let t_c  = t_a + t_c;
        let t_d  = t_b + t_d;
        let t_e  = mul16(C4S4, i32::from($s0 + $s4)) + $bias;
        let t_f  = mul16(C4S4, i32::from($s0 - $s4)) + $bias;
        let t_g  = mul16(C2S6, i32::from($s2)) + mul16(C6S2, i32::from($s6));
        let t_h  = mul16(C6S2, i32::from($s2)) - mul16(C2S6, i32::from($s6));
        let t_e1 = t_e  - t_g;
        let t_g  = t_e  + t_g;
        let t_a  = t_f  + t_a1;
        let t_f  = t_f  - t_a1;
        let t_b  = t_b1 - t_h;
        let t_h  = t_b1 + t_h;

        $d0 = ((t_g  + t_c) >> $shift) as $otype;
        $d7 = ((t_g  - t_c) >> $shift) as $otype;
        $d1 = ((t_a  + t_h) >> $shift) as $otype;
        $d2 = ((t_a  - t_h) >> $shift) as $otype;
        $d3 = ((t_e1 + t_d) >> $shift) as $otype;
        $d4 = ((t_e1 - t_d) >> $shift) as $otype;
        $d5 = ((t_f  + t_b) >> $shift) as $otype;
        $d6 = ((t_f  - t_b) >> $shift) as $otype;
    }
}

pub fn vp_idct(coeffs: &mut [i16; 64]) {
    let mut tmp = [0i32; 64];
    for (src, dst) in coeffs.chunks(8).zip(tmp.chunks_mut(8)) {
        idct_step!(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
                   dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], 0, 0, i32);
    }
    let src = &tmp;
    let dst = coeffs;
    for i in 0..8 {
        idct_step!(src[0 * 8 + i], src[1 * 8 + i], src[2 * 8 + i], src[3 * 8 + i],
                   src[4 * 8 + i], src[5 * 8 + i], src[6 * 8 + i], src[7 * 8 + i],
                   dst[0 * 8 + i], dst[1 * 8 + i], dst[2 * 8 + i], dst[3 * 8 + i],
                   dst[4 * 8 + i], dst[5 * 8 + i], dst[6 * 8 + i], dst[7 * 8 + i], 8, 4, i16);
    }
}

pub fn vp_idct_dc(coeffs: &mut [i16; 64]) {
    let dc = ((mul16(C4S4, mul16(C4S4, i32::from(coeffs[0]))) + 8) >> 4) as i16;
    for i in 0..64 {
        coeffs[i] = dc;
    }
}

pub fn unquant(coeffs: &mut [i16; 64], qmat: &[i16; 64]) {
    for i in 1..64 {
        coeffs[i] = coeffs[i].wrapping_mul(qmat[i]);
    }
}

pub fn vp_put_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
    vp_idct(coeffs);
    let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
    for y in 0..8 {
        for x in 0..8 {
            frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
        }
        off += frm.stride[plane];
    }
}

pub fn vp_put_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
    vp_idct(coeffs);
    let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
    for y in 0..8 {
        for x in 0..8 {
            frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
        }
        off += frm.stride[plane] * 2;
    }
}

pub fn vp_put_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
    vp_idct_dc(coeffs);
    let dc = (coeffs[0] + 128).min(255).max(0) as u8;
    let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
    for _ in 0..8 {
        for x in 0..8 {
            frm.data[off + x] = dc;
        }
        off += frm.stride[plane];
    }
}

pub fn vp_add_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
    vp_idct(coeffs);
    let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
    for y in 0..8 {
        for x in 0..8 {
            frm.data[off + x] = (coeffs[x + y * 8] + i16::from(frm.data[off + x])).min(255).max(0) as u8;
        }
        off += frm.stride[plane];
    }
}

pub fn vp_add_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
    vp_idct(coeffs);
    let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
    for y in 0..8 {
        for x in 0..8 {
            frm.data[off + x] = (coeffs[x + y * 8] + i16::from(frm.data[off + x])).min(255).max(0) as u8;
        }
        off += frm.stride[plane] * 2;
    }
}

pub fn vp_add_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
    vp_idct_dc(coeffs);
    let dc = coeffs[0];
    let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
    for _ in 0..8 {
        for x in 0..8 {
            frm.data[off + x] = (dc + i16::from(frm.data[off + x])).min(255).max(0) as u8;
        }
        off += frm.stride[plane];
    }
}

pub fn vp31_loop_filter(data: &mut [u8], mut off: usize, step: usize, stride: usize,
                        len: usize, loop_str: i16) {
    for _ in 0..len {
        let a = i16::from(data[off - step * 2]);
        let b = i16::from(data[off - step]);
        let c = i16::from(data[off]);
        let d = i16::from(data[off + step]);
        let mut diff = ((a - d) + 3 * (c - b) + 4) >> 3;
        if diff.abs() >= 2 * loop_str {
            diff = 0;
        } else if diff.abs() >= loop_str {
            if diff < 0 {
                diff = -diff - 2 * loop_str;
            } else {
                diff = -diff + 2 * loop_str;
            }
        }
        if diff != 0 {
            data[off - step] = (b + diff).max(0).min(255) as u8;
            data[off]        = (c - diff).max(0).min(255) as u8;
        }

        off += stride;
    }
}

pub fn vp_copy_block(dst: &mut NASimpleVideoFrame<u8>, src: NAVideoBufferRef<u8>, comp: usize,
                     dx: usize, dy: usize, mv_x: i16, mv_y: i16,
                     preborder: usize, postborder: usize, loop_str: i16,
                     mode: usize, interp: &[BlkInterpFunc], mut mc_buf: NAVideoBufferRef<u8>)
{
    let sx = (dx as isize) + (mv_x as isize);
    let sy = (dy as isize) + (mv_y as isize);
    if ((sx | sy) & 7) == 0 {
        copy_block(dst, src, comp, dx, dy, mv_x, mv_y, 8, 8, preborder, postborder, mode, interp);
        return;
    }
    let pre = preborder.max(2);
    let post = postborder.max(1);
    let bsize = 8 + pre + post;
    let src_x = sx - (pre as isize);
    let src_y = sy - (pre as isize);
    {
        let tmp_buf = NASimpleVideoFrame::from_video_buf(&mut mc_buf).unwrap();
        edge_emu(src.as_ref(), src_x, src_y, bsize, bsize, &mut tmp_buf.data[tmp_buf.offset[comp]..], tmp_buf.stride[comp], comp, 0);
//        copy_block(&mut tmp_buf, src, comp, 0, 0, src_x as i16, src_y as i16,
//                   bsize, bsize, 0, 0, 0, interp);
        if (sx & 7) != 0 {
            let foff = (8 - (sx & 7)) as usize;
            let off = pre + foff + tmp_buf.offset[comp];
            vp31_loop_filter(tmp_buf.data, off, 1, tmp_buf.stride[comp], bsize, loop_str);
        }
        if (sy & 7) != 0 {
            let foff = (8 - (sy & 7)) as usize;
            let off = (pre + foff) * tmp_buf.stride[comp] + tmp_buf.offset[comp];
            vp31_loop_filter(tmp_buf.data, off, tmp_buf.stride[comp], 1, bsize, loop_str);
        }
    }
    let dxoff = (pre as i16) - (dx as i16);
    let dyoff = (pre as i16) - (dy as i16);
    copy_block(dst, mc_buf, comp, dx, dy, dxoff, dyoff, 8, 8, preborder, postborder, mode, interp);
}

fn vp3_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
{
    let mut didx = 0;
    let mut sidx = 0;
    for _ in 0..bh {
        dst[didx..][..bw].copy_from_slice(&src[sidx..][..bw]);
        didx += dstride;
        sidx += sstride;
    }
}

fn vp3_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
{
    let mut didx = 0;
    let mut sidx = 0;
    for _ in 0..bh {
        for x in 0..bw { dst[didx + x] = ((u16::from(src[sidx + x]) + u16::from(src[sidx + x + 1])) >> 1) as u8; }
        didx += dstride;
        sidx += sstride;
    }
}

fn vp3_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
{
    let mut didx = 0;
    let mut sidx = 0;
    for _ in 0..bh {
        for x in 0..bw { dst[didx + x] = ((u16::from(src[sidx + x]) + u16::from(src[sidx + x + sstride])) >> 1) as u8; }
        didx += dstride;
        sidx += sstride;
    }
}

fn vp3_interp1x(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
{
    let mut didx = 0;
    let mut sidx = 0;
    for _ in 0..bh {
        for x in 0..bw {
            dst[didx + x] = ((u16::from(src[sidx + x]) +
                              u16::from(src[sidx + x + sstride + 1])) >> 1) as u8;
        }
        didx += dstride;
        sidx += sstride;
    }
}

fn vp3_interp1y(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
{
    let mut didx = 0;
    let mut sidx = 0;
    for _ in 0..bh {
        for x in 0..bw {
            dst[didx + x] = ((u16::from(src[sidx + x + 1]) +
                              u16::from(src[sidx + x + sstride])) >> 1) as u8;
        }
        didx += dstride;
        sidx += sstride;
    }
}

pub const VP3_INTERP_FUNCS: &[blockdsp::BlkInterpFunc] = &[ vp3_interp00, vp3_interp01, vp3_interp10, vp3_interp1x, vp3_interp1y ];
Commit	Line	Data
5b24175d	1	use nihav_core::codecs::*;
b4d5b851 KS	2	use nihav_codec_support::codecs::blockdsp;
b4d5b851 KS	3	use nihav_codec_support::codecs::blockdsp::*;
5b24175d	4
93bbc2b0 KS	5	pub const VP_YUVA420_FORMAT: NAPixelFormaton = NAPixelFormaton{
	6	model: ColorModel::YUV(YUVSubmodel::YUVJ),
	7	components: 4,
	8	comp_info: [
	9	Some(NAPixelChromaton{ h_ss: 0, v_ss: 0, packed: false, depth: 8, shift: 0, comp_offs: 0, next_elem: 1}),
	10	Some(NAPixelChromaton{ h_ss: 1, v_ss: 1, packed: false, depth: 8, shift: 0, comp_offs: 1, next_elem: 1}),
	11	Some(NAPixelChromaton{ h_ss: 1, v_ss: 1, packed: false, depth: 8, shift: 0, comp_offs: 2, next_elem: 1}),
	12	Some(NAPixelChromaton{ h_ss: 0, v_ss: 0, packed: false, depth: 8, shift: 0, comp_offs: 3, next_elem: 1}),
	13	None ],
	14	elem_size: 0,
	15	be: false,
	16	alpha: true,
	17	palette: false
	18	};
	19
e6aaad5c	20	#[derive(Clone,Copy,Debug,PartialEq,Default)]
5b24175d KS	21	#[allow(dead_code)]
5b24175d KS	22	pub enum VPMBType {
e6aaad5c	23	#[default]
5b24175d KS	24	Intra,
	25	InterNoMV,
	26	InterMV,
	27	InterNearest,
	28	InterNear,
	29	InterFourMV,
	30	GoldenNoMV,
	31	GoldenMV,
	32	GoldenNearest,
	33	GoldenNear,
	34	}
	35
3584b223 KS	36	pub const VP_REF_INTER: u8 = 1;
	37	pub const VP_REF_GOLDEN: u8 = 2;
	38
5b24175d KS	39	#[allow(dead_code)]
	40	impl VPMBType {
	41	pub fn is_intra(self) -> bool { self == VPMBType::Intra }
	42	pub fn get_ref_id(self) -> u8 {
	43	match self {
	44	VPMBType::Intra => 0,
	45	VPMBType::InterNoMV \|
	46	VPMBType::InterMV \|
	47	VPMBType::InterNearest \|
	48	VPMBType::InterNear \|
3584b223 KS	49	VPMBType::InterFourMV => VP_REF_INTER,
3584b223 KS	50	_ => VP_REF_GOLDEN,
5b24175d KS	51	}
	52	}
	53	}
	54
5b24175d KS	55	#[derive(Default)]
	56	pub struct VPShuffler {
	57	lastframe: Option<NAVideoBufferRef<u8>>,
	58	goldframe: Option<NAVideoBufferRef<u8>>,
	59	}
	60
	61	impl VPShuffler {
	62	pub fn new() -> Self { VPShuffler { lastframe: None, goldframe: None } }
	63	pub fn clear(&mut self) { self.lastframe = None; self.goldframe = None; }
	64	pub fn add_frame(&mut self, buf: NAVideoBufferRef<u8>) {
	65	self.lastframe = Some(buf);
	66	}
	67	pub fn add_golden_frame(&mut self, buf: NAVideoBufferRef<u8>) {
	68	self.goldframe = Some(buf);
	69	}
	70	pub fn get_last(&mut self) -> Option<NAVideoBufferRef<u8>> {
e6aaad5c	71	self.lastframe.as_ref().cloned()
5b24175d KS	72	}
5b24175d KS	73	pub fn get_golden(&mut self) -> Option<NAVideoBufferRef<u8>> {
e6aaad5c	74	self.goldframe.as_ref().cloned()
5b24175d	75	}
6e24ec0b KS	76	pub fn has_refs(&self) -> bool {
	77	self.lastframe.is_some()
	78	}
5b24175d KS	79	}
5b24175d KS	80
3f67638d KS	81	pub const VP56_COEF_BASE: [i16; 6] = [ 5, 7, 11, 19, 35, 67 ];
	82	pub const VP56_COEF_ADD_PROBS: [[u8; 12]; 6] = [
	83	[ 159, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
	84	[ 165, 145, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
	85	[ 173, 148, 140, 128, 0, 0, 0, 0, 0, 0, 0, 0 ],
	86	[ 176, 155, 140, 135, 128, 0, 0, 0, 0, 0, 0, 0 ],
	87	[ 180, 157, 141, 134, 130, 128, 0, 0, 0, 0, 0, 0 ],
	88	[ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 128 ],
	89	];
	90
3584b223 KS	91	#[allow(dead_code)]
	92	pub struct BoolCoder<'a> {
	93	pub src: &'a [u8],
	94	pos: usize,
	95	value: u32,
	96	range: u32,
	97	bits: i32,
	98	}
	99
	100	#[allow(dead_code)]
	101	impl<'a> BoolCoder<'a> {
	102	pub fn new(src: &'a [u8]) -> DecoderResult<Self> {
	103	if src.len() < 3 { return Err(DecoderError::ShortData); }
47933c6d	104	let value = (u32::from(src[0]) << 24) \| (u32::from(src[1]) << 16) \| (u32::from(src[2]) << 8) \| u32::from(src[3]);
3584b223 KS	105	Ok(Self { src, pos: 4, value, range: 255, bits: 8 })
	106	}
	107	pub fn read_bool(&mut self) -> bool {
	108	self.read_prob(128)
	109	}
	110	pub fn read_prob(&mut self, prob: u8) -> bool {
	111	self.renorm();
47933c6d	112	let split = 1 + (((self.range - 1) * u32::from(prob)) >> 8);
3584b223 KS	113	let bit;
	114	if self.value < (split << 24) {
	115	self.range = split;
	116	bit = false;
	117	} else {
	118	self.range -= split;
	119	self.value -= split << 24;
	120	bit = true;
	121	}
	122	bit
	123	}
	124	pub fn read_bits(&mut self, bits: u8) -> u32 {
	125	let mut val = 0u32;
	126	for _ in 0..bits {
	127	val = (val << 1) \| (self.read_prob(128) as u32);
	128	}
	129	val
	130	}
587a6d78 KS	131	pub fn read_byte(&mut self) -> u8 {
	132	let mut val = 0u8;
	133	for _ in 0..8 {
	134	val = (val << 1) \| (self.read_prob(128) as u8);
	135	}
	136	val
	137	}
	138	pub fn read_sbits(&mut self, bits: u8) -> i32 {
	139	let mut val = if self.read_prob(128) { -1i32 } else { 0i32 };
	140	for _ in 1..bits {
	141	val = (val << 1) \| (self.read_prob(128) as i32);
	142	}
	143	val
	144	}
3584b223 KS	145	pub fn read_probability(&mut self) -> u8 {
	146	let val = self.read_bits(7) as u8;
	147	if val == 0 {
	148	1
	149	} else {
	150	val << 1
	151	}
	152	}
	153	fn renorm(&mut self) {
	154	let shift = self.range.leading_zeros() & 7;
	155	self.range <<= shift;
	156	self.value <<= shift;
	157	self.bits -= shift as i32;
	158	if (self.bits <= 0) && (self.pos < self.src.len()) {
47933c6d	159	self.value \|= u32::from(self.src[self.pos]) << (-self.bits as u8);
3584b223 KS	160	self.pos += 1;
	161	self.bits += 8;
	162	}
	163	/* while self.range < 0x80 {
	164	self.range <<= 1;
	165	self.value <<= 1;
	166	self.bits -= 1;
	167	if (self.bits <= 0) && (self.pos < self.src.len()) {
47933c6d	168	self.value \|= u32::from(self.src[self.pos]);
3584b223 KS	169	self.pos += 1;
	170	self.bits = 8;
	171	}
	172	}*/
	173	}
	174	pub fn skip_bytes(&mut self, nbytes: usize) {
	175	for _ in 0..nbytes {
	176	self.value <<= 8;
	177	if self.pos < self.src.len() {
47933c6d	178	self.value \|= u32::from(self.src[self.pos]);
3584b223 KS	179	self.pos += 1;
	180	}
	181	}
	182	}
	183	}
	184
	185	#[allow(dead_code)]
47933c6d	186	#[allow(clippy::trivially_copy_pass_by_ref)]
3584b223	187	pub fn rescale_prob(prob: u8, weights: &[i16; 2], maxval: i32) -> u8 {
47933c6d	188	(((i32::from(prob) * i32::from(weights[0]) + 128) >> 8) + i32::from(weights[1])).min(maxval).max(1) as u8
3584b223 KS	189	}
3584b223 KS	190
3584b223 KS	191	macro_rules! vp_tree {
	192	($bc: expr, $prob: expr, $node1: expr, $node2: expr) => {
	193	if !$bc.read_prob($prob) {
	194	$node1
	195	} else {
	196	$node2
	197	}
	198	};
	199	($leaf: expr) => { $leaf }
	200	}
	201
5b24175d KS	202	const C1S7: i32 = 64277;
	203	const C2S6: i32 = 60547;
	204	const C3S5: i32 = 54491;
	205	const C4S4: i32 = 46341;
	206	const C5S3: i32 = 36410;
	207	const C6S2: i32 = 25080;
	208	const C7S1: i32 = 12785;
	209
	210	fn mul16(a: i32, b: i32) -> i32 {
	211	(a * b) >> 16
	212	}
	213
	214	macro_rules! idct_step {
	215	($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr,
	216	$d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr,
	217	$bias:expr, $shift:expr, $otype:ty) => {
	218	let t_a = mul16(C1S7, i32::from($s1)) + mul16(C7S1, i32::from($s7));
	219	let t_b = mul16(C7S1, i32::from($s1)) - mul16(C1S7, i32::from($s7));
	220	let t_c = mul16(C3S5, i32::from($s3)) + mul16(C5S3, i32::from($s5));
	221	let t_d = mul16(C3S5, i32::from($s5)) - mul16(C5S3, i32::from($s3));
	222	let t_a1 = mul16(C4S4, t_a - t_c);
	223	let t_b1 = mul16(C4S4, t_b - t_d);
	224	let t_c = t_a + t_c;
	225	let t_d = t_b + t_d;
	226	let t_e = mul16(C4S4, i32::from($s0 + $s4)) + $bias;
	227	let t_f = mul16(C4S4, i32::from($s0 - $s4)) + $bias;
	228	let t_g = mul16(C2S6, i32::from($s2)) + mul16(C6S2, i32::from($s6));
	229	let t_h = mul16(C6S2, i32::from($s2)) - mul16(C2S6, i32::from($s6));
	230	let t_e1 = t_e - t_g;
	231	let t_g = t_e + t_g;
	232	let t_a = t_f + t_a1;
	233	let t_f = t_f - t_a1;
	234	let t_b = t_b1 - t_h;
	235	let t_h = t_b1 + t_h;
	236
	237	$d0 = ((t_g + t_c) >> $shift) as $otype;
	238	$d7 = ((t_g - t_c) >> $shift) as $otype;
	239	$d1 = ((t_a + t_h) >> $shift) as $otype;
	240	$d2 = ((t_a - t_h) >> $shift) as $otype;
	241	$d3 = ((t_e1 + t_d) >> $shift) as $otype;
	242	$d4 = ((t_e1 - t_d) >> $shift) as $otype;
	243	$d5 = ((t_f + t_b) >> $shift) as $otype;
	244	$d6 = ((t_f - t_b) >> $shift) as $otype;
	245	}
	246	}
	247
	248	pub fn vp_idct(coeffs: &mut [i16; 64]) {
	249	let mut tmp = [0i32; 64];
	250	for (src, dst) in coeffs.chunks(8).zip(tmp.chunks_mut(8)) {
	251	idct_step!(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
	252	dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], 0, 0, i32);
	253	}
	254	let src = &tmp;
	255	let dst = coeffs;
	256	for i in 0..8 {
	257	idct_step!(src[0 * 8 + i], src[1 * 8 + i], src[2 * 8 + i], src[3 * 8 + i],
	258	src[4 * 8 + i], src[5 * 8 + i], src[6 * 8 + i], src[7 * 8 + i],
	259	dst[0 * 8 + i], dst[1 * 8 + i], dst[2 * 8 + i], dst[3 * 8 + i],
	260	dst[4 * 8 + i], dst[5 * 8 + i], dst[6 * 8 + i], dst[7 * 8 + i], 8, 4, i16);
	261	}
	262	}
	263
	264	pub fn vp_idct_dc(coeffs: &mut [i16; 64]) {
	265	let dc = ((mul16(C4S4, mul16(C4S4, i32::from(coeffs[0]))) + 8) >> 4) as i16;
266	for i in 0..64 {
267	coeffs[i] = dc;
268	}
269	}
270
271	pub fn unquant(coeffs: &mut [i16; 64], qmat: &[i16; 64]) {
272	for i in 1..64 {
273	coeffs[i] = coeffs[i].wrapping_mul(qmat[i]);
274	}
275	}
276
277	pub fn vp_put_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
278	vp_idct(coeffs);
279	let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
280	for y in 0..8 {
281	for x in 0..8 {
282	frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
283	}
284	off += frm.stride[plane];
285	}
286	}
287
3584b223 KS	288	pub fn vp_put_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
	289	vp_idct(coeffs);
	290	let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
	291	for y in 0..8 {
	292	for x in 0..8 {
	293	frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
	294	}
	295	off += frm.stride[plane] * 2;
	296	}
	297	}
	298
5b24175d KS	299	pub fn vp_put_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
	300	vp_idct_dc(coeffs);
	301	let dc = (coeffs[0] + 128).min(255).max(0) as u8;
	302	let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
	303	for _ in 0..8 {
	304	for x in 0..8 {
	305	frm.data[off + x] = dc;
	306	}
	307	off += frm.stride[plane];
	308	}
	309	}
	310
	311	pub fn vp_add_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
	312	vp_idct(coeffs);
	313	let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
	314	for y in 0..8 {
	315	for x in 0..8 {
47933c6d	316	frm.data[off + x] = (coeffs[x + y * 8] + i16::from(frm.data[off + x])).min(255).max(0) as u8;
5b24175d KS	317	}
	318	off += frm.stride[plane];
	319	}
	320	}
	321
3584b223 KS	322	pub fn vp_add_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
	323	vp_idct(coeffs);
	324	let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
	325	for y in 0..8 {
	326	for x in 0..8 {
47933c6d	327	frm.data[off + x] = (coeffs[x + y * 8] + i16::from(frm.data[off + x])).min(255).max(0) as u8;
3584b223 KS	328	}
	329	off += frm.stride[plane] * 2;
	330	}
	331	}
	332
5b24175d KS	333	pub fn vp_add_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
	334	vp_idct_dc(coeffs);
	335	let dc = coeffs[0];
	336	let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
	337	for _ in 0..8 {
	338	for x in 0..8 {
47933c6d	339	frm.data[off + x] = (dc + i16::from(frm.data[off + x])).min(255).max(0) as u8;
5b24175d KS	340	}
	341	off += frm.stride[plane];
	342	}
	343	}
8d8ddfe1 KS	344
	345	pub fn vp31_loop_filter(data: &mut [u8], mut off: usize, step: usize, stride: usize,
	346	len: usize, loop_str: i16) {
	347	for _ in 0..len {
47933c6d KS	348	let a = i16::from(data[off - step * 2]);
	349	let b = i16::from(data[off - step]);
	350	let c = i16::from(data[off]);
	351	let d = i16::from(data[off + step]);
8d8ddfe1 KS	352	let mut diff = ((a - d) + 3 * (c - b) + 4) >> 3;
	353	if diff.abs() >= 2 * loop_str {
	354	diff = 0;
	355	} else if diff.abs() >= loop_str {
	356	if diff < 0 {
	357	diff = -diff - 2 * loop_str;
	358	} else {
	359	diff = -diff + 2 * loop_str;
	360	}
	361	}
	362	if diff != 0 {
	363	data[off - step] = (b + diff).max(0).min(255) as u8;
	364	data[off] = (c - diff).max(0).min(255) as u8;
	365	}
	366
	367	off += stride;
	368	}
	369	}
	370
8e4b2f44 KS	371	pub fn vp_copy_block(dst: &mut NASimpleVideoFrame<u8>, src: NAVideoBufferRef<u8>, comp: usize,
	372	dx: usize, dy: usize, mv_x: i16, mv_y: i16,
	373	preborder: usize, postborder: usize, loop_str: i16,
	374	mode: usize, interp: &[BlkInterpFunc], mut mc_buf: NAVideoBufferRef<u8>)
	375	{
	376	let sx = (dx as isize) + (mv_x as isize);
	377	let sy = (dy as isize) + (mv_y as isize);
	378	if ((sx \| sy) & 7) == 0 {
	379	copy_block(dst, src, comp, dx, dy, mv_x, mv_y, 8, 8, preborder, postborder, mode, interp);
	380	return;
	381	}
	382	let pre = preborder.max(2);
	383	let post = postborder.max(1);
	384	let bsize = 8 + pre + post;
	385	let src_x = sx - (pre as isize);
	386	let src_y = sy - (pre as isize);
	387	{
900c9c57	388	let tmp_buf = NASimpleVideoFrame::from_video_buf(&mut mc_buf).unwrap();
86081fed	389	edge_emu(src.as_ref(), src_x, src_y, bsize, bsize, &mut tmp_buf.data[tmp_buf.offset[comp]..], tmp_buf.stride[comp], comp, 0);
900c9c57 KS	390	// copy_block(&mut tmp_buf, src, comp, 0, 0, src_x as i16, src_y as i16,
	391	// bsize, bsize, 0, 0, 0, interp);
	392	if (sx & 7) != 0 {
8e4b2f44	393	let foff = (8 - (sx & 7)) as usize;
900c9c57	394	let off = pre + foff + tmp_buf.offset[comp];
8e4b2f44 KS	395	vp31_loop_filter(tmp_buf.data, off, 1, tmp_buf.stride[comp], bsize, loop_str);
8e4b2f44 KS	396	}
900c9c57	397	if (sy & 7) != 0 {
547a8074	398	let foff = (8 - (sy & 7)) as usize;
900c9c57	399	let off = (pre + foff) * tmp_buf.stride[comp] + tmp_buf.offset[comp];
547a8074 KS	400	vp31_loop_filter(tmp_buf.data, off, tmp_buf.stride[comp], 1, bsize, loop_str);
547a8074 KS	401	}
8e4b2f44 KS	402	}
	403	let dxoff = (pre as i16) - (dx as i16);
	404	let dyoff = (pre as i16) - (dy as i16);
547a8074	405	copy_block(dst, mc_buf, comp, dx, dy, dxoff, dyoff, 8, 8, preborder, postborder, mode, interp);
8e4b2f44	406	}
3584b223 KS	407
	408	fn vp3_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
	409	{
	410	let mut didx = 0;
	411	let mut sidx = 0;
	412	for _ in 0..bh {
47933c6d	413	dst[didx..][..bw].copy_from_slice(&src[sidx..][..bw]);
3584b223 KS	414	didx += dstride;
	415	sidx += sstride;
	416	}
	417	}
	418
	419	fn vp3_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
	420	{
	421	let mut didx = 0;
	422	let mut sidx = 0;
	423	for _ in 0..bh {
47933c6d	424	for x in 0..bw { dst[didx + x] = ((u16::from(src[sidx + x]) + u16::from(src[sidx + x + 1])) >> 1) as u8; }
3584b223 KS	425	didx += dstride;
	426	sidx += sstride;
	427	}
	428	}
	429
	430	fn vp3_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
	431	{
	432	let mut didx = 0;
	433	let mut sidx = 0;
	434	for _ in 0..bh {
47933c6d	435	for x in 0..bw { dst[didx + x] = ((u16::from(src[sidx + x]) + u16::from(src[sidx + x + sstride])) >> 1) as u8; }
3584b223 KS	436	didx += dstride;
	437	sidx += sstride;
	438	}
	439	}
	440
3cc76ad5	441	fn vp3_interp1x(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
3584b223 KS	442	{
	443	let mut didx = 0;
	444	let mut sidx = 0;
	445	for _ in 0..bh {
	446	for x in 0..bw {
47933c6d KS	447	dst[didx + x] = ((u16::from(src[sidx + x]) +
47933c6d KS	448	u16::from(src[sidx + x + sstride + 1])) >> 1) as u8;
3584b223 KS	449	}
	450	didx += dstride;
	451	sidx += sstride;
	452	}
	453	}
	454
3cc76ad5 KS	455	fn vp3_interp1y(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
	456	{
	457	let mut didx = 0;
	458	let mut sidx = 0;
	459	for _ in 0..bh {
	460	for x in 0..bw {
47933c6d KS	461	dst[didx + x] = ((u16::from(src[sidx + x + 1]) +
47933c6d KS	462	u16::from(src[sidx + x + sstride])) >> 1) as u8;
3cc76ad5 KS	463	}
	464	didx += dstride;
	465	sidx += sstride;
	466	}
	467	}
	468
	469	pub const VP3_INTERP_FUNCS: &[blockdsp::BlkInterpFunc] = &[ vp3_interp00, vp3_interp01, vp3_interp10, vp3_interp1x, vp3_interp1y ];
3584b223	470