From 3952bfd9d2d5c2a64d50c2a89b02e93d9b97d541 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Sun, 5 Sep 2021 17:32:38 +0200 Subject: [PATCH] VP6 encoder --- nihav-allstuff/src/lib.rs | 2 + nihav-duck/Cargo.toml | 10 +- nihav-duck/src/codecs/mod.rs | 20 +- nihav-duck/src/codecs/vp6.rs | 120 +--- nihav-duck/src/codecs/vp6dsp.rs | 120 ++++ nihav-duck/src/codecs/vp6enc/coder.rs | 856 ++++++++++++++++++++++ nihav-duck/src/codecs/vp6enc/dsp.rs | 508 +++++++++++++ nihav-duck/src/codecs/vp6enc/huff.rs | 177 +++++ nihav-duck/src/codecs/vp6enc/mb.rs | 710 +++++++++++++++++++ nihav-duck/src/codecs/vp6enc/mod.rs | 907 ++++++++++++++++++++++++ nihav-duck/src/codecs/vp6enc/models.rs | 438 ++++++++++++ nihav-duck/src/codecs/vp6enc/ratectl.rs | 97 +++ nihav-duck/src/codecs/vp6enc/rdo.rs | 167 +++++ nihav-duck/src/lib.rs | 1 + 14 files changed, 4011 insertions(+), 122 deletions(-) create mode 100644 nihav-duck/src/codecs/vp6dsp.rs create mode 100644 nihav-duck/src/codecs/vp6enc/coder.rs create mode 100644 nihav-duck/src/codecs/vp6enc/dsp.rs create mode 100644 nihav-duck/src/codecs/vp6enc/huff.rs create mode 100644 nihav-duck/src/codecs/vp6enc/mb.rs create mode 100644 nihav-duck/src/codecs/vp6enc/mod.rs create mode 100644 nihav-duck/src/codecs/vp6enc/models.rs create mode 100644 nihav-duck/src/codecs/vp6enc/ratectl.rs create mode 100644 nihav-duck/src/codecs/vp6enc/rdo.rs diff --git a/nihav-allstuff/src/lib.rs b/nihav-allstuff/src/lib.rs index 79badd6..36d5d50 100644 --- a/nihav-allstuff/src/lib.rs +++ b/nihav-allstuff/src/lib.rs @@ -23,6 +23,7 @@ use nihav_commonfmt::generic_register_all_encoders; use nihav_commonfmt::generic_register_all_muxers; use nihav_duck::duck_register_all_decoders; +use nihav_duck::duck_register_all_encoders; use nihav_game::game_register_all_decoders; use nihav_game::game_register_all_demuxers; @@ -76,6 +77,7 @@ pub fn nihav_register_all_demuxers(rd: &mut RegisteredDemuxers) { /// Registers all known encoders. pub fn nihav_register_all_encoders(re: &mut RegisteredEncoders) { generic_register_all_encoders(re); + duck_register_all_encoders(re); ms_register_all_encoders(re); } diff --git a/nihav-duck/Cargo.toml b/nihav-duck/Cargo.toml index ef8a474..69aa800 100644 --- a/nihav-duck/Cargo.toml +++ b/nihav-duck/Cargo.toml @@ -12,10 +12,10 @@ path = "../nihav-codec-support" features = ["fft", "dsp_window", "blockdsp"] [dev-dependencies] -nihav_commonfmt = { path = "../nihav-commonfmt", default-features=false, features = ["all_demuxers"] } +nihav_commonfmt = { path = "../nihav-commonfmt", default-features=false, features = ["all_demuxers", "all_muxers"] } [features] -default = ["all_decoders"] +default = ["all_decoders", "all_encoders"] all_decoders = ["all_video_decoders", "all_audio_decoders"] all_video_decoders = ["decoder_truemotion1", "decoder_truemotionrt", "decoder_truemotion2", "decoder_truemotion2x", "decoder_vp3", "decoder_vp4", "decoder_vp5", "decoder_vp6", "decoder_vp7"] @@ -34,3 +34,9 @@ decoder_vp7 = ["decoders"] decoder_dk3_adpcm = ["decoders"] decoder_dk4_adpcm = ["decoders"] decoder_on2avc = ["decoders"] + +all_encoders = ["all_video_encoders"] +all_video_encoders = ["encoder_vp6"] +encoders = [] + +encoder_vp6 = ["encoders"] \ No newline at end of file diff --git a/nihav-duck/src/codecs/mod.rs b/nihav-duck/src/codecs/mod.rs index 8464c30..821334a 100644 --- a/nihav-duck/src/codecs/mod.rs +++ b/nihav-duck/src/codecs/mod.rs @@ -32,8 +32,10 @@ mod vp56; #[cfg(feature="decoder_vp5")] #[allow(clippy::needless_range_loop)] mod vp5; -#[cfg(feature="decoder_vp6")] +#[cfg(any(feature="decoder_vp6", feature="encoder_vp6"))] mod vp6data; +#[cfg(any(feature="decoder_vp6", feature="encoder_vp6"))] +mod vp6dsp; #[cfg(feature="decoder_vp6")] #[allow(clippy::needless_range_loop)] mod vp6; @@ -98,3 +100,19 @@ pub fn duck_register_all_decoders(rd: &mut RegisteredDecoders) { rd.add_decoder(*decoder); } } + +#[cfg(feature="encoder_vp6")] +#[allow(clippy::needless_range_loop)] +mod vp6enc; + +const DUCK_ENCODERS: &[EncoderInfo] = &[ +#[cfg(feature="encoder_vp6")] + EncoderInfo { name: "vp6", get_encoder: vp6enc::get_encoder }, +]; + +/// Registers all available encoders provided by this crate. +pub fn duck_register_all_encoders(re: &mut RegisteredEncoders) { + for encoder in DUCK_ENCODERS.iter() { + re.add_encoder(*encoder); + } +} diff --git a/nihav-duck/src/codecs/vp6.rs b/nihav-duck/src/codecs/vp6.rs index 08ebb87..15bd832 100644 --- a/nihav-duck/src/codecs/vp6.rs +++ b/nihav-duck/src/codecs/vp6.rs @@ -1,10 +1,10 @@ use nihav_core::codecs::*; use nihav_core::io::bitreader::*; use nihav_codec_support::codecs::{MV, ZIGZAG}; -use nihav_codec_support::codecs::blockdsp::edge_emu; use super::vpcommon::*; use super::vp56::*; use super::vp6data::*; +use super::vp6dsp::*; #[derive(Default)] struct VP6BR { @@ -473,124 +473,6 @@ fn decode_zero_run_huff(br: &mut BitReader, huff: &VP6Huff) -> DecoderResult, comp: usize, - dx: usize, dy: usize, mv_x: i16, mv_y: i16) -{ - let (w, h) = src.get_dimensions(comp); - let sx = (dx as isize) + (mv_x as isize); - let sy = (dy as isize) + (mv_y as isize); - - if (sx - 2 < 0) || (sx + 8 + 2 > (w as isize)) || - (sy - 2 < 0) || (sy + 8 + 2 > (h as isize)) { - edge_emu(&src, sx - 2, sy - 2, 8 + 2 + 2, 8 + 2 + 2, - dst, dstride, comp, 0); - } else { - let sstride = src.get_stride(comp); - let soff = src.get_offset(comp); - let sdta = src.get_data(); - let sbuf: &[u8] = sdta.as_slice(); - let saddr = soff + ((sx - 2) as usize) + ((sy - 2) as usize) * sstride; - let src = &sbuf[saddr..]; - for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(12) { - dline[..12].copy_from_slice(&sline[..12]); - } - } -} - -fn calc_variance(src: &[u8], stride: usize) -> u16 { - let mut sum = 0; - let mut ssum = 0; - for line in src.chunks(stride * 2).take(4) { - for el in line.iter().take(8).step_by(2) { - let pix = u32::from(*el); - sum += pix; - ssum += pix * pix; - } - } - ((ssum * 16 - sum * sum) >> 8) as u16 -} - -macro_rules! mc_filter { - (bilinear; $a: expr, $b: expr, $c: expr) => { - ((u16::from($a) * (8 - $c) + u16::from($b) * $c + 4) >> 3) as u8 - }; - (bicubic; $src: expr, $off: expr, $step: expr, $coeffs: expr) => { - ((i32::from($src[$off - $step] ) * i32::from($coeffs[0]) + - i32::from($src[$off] ) * i32::from($coeffs[1]) + - i32::from($src[$off + $step] ) * i32::from($coeffs[2]) + - i32::from($src[$off + $step * 2]) * i32::from($coeffs[3]) + 64) >> 7).min(255).max(0) as u8 - } -} - -//#[allow(snake_case)] -fn mc_bilinear(dst: &mut [u8], dstride: usize, src: &[u8], mut soff: usize, sstride: usize, mx: u16, my: u16) { - if my == 0 { - for dline in dst.chunks_mut(dstride).take(8) { - for i in 0..8 { - dline[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx); - } - soff += sstride; - } - } else if mx == 0 { - for dline in dst.chunks_mut(dstride).take(8) { - for i in 0..8 { - dline[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + sstride], my); - } - soff += sstride; - } - } else { - let mut tmp = [0u8; 8]; - for i in 0..8 { - tmp[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx); - } - soff += sstride; - for dline in dst.chunks_mut(dstride).take(8) { - for i in 0..8 { - let cur = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx); - dline[i] = mc_filter!(bilinear; tmp[i], cur, my); - tmp[i] = cur; - } - soff += sstride; - } - } -} - -#[allow(clippy::trivially_copy_pass_by_ref)] -fn mc_bicubic(dst: &mut [u8], dstride: usize, src: &[u8], mut soff: usize, sstride: usize, coeffs_w: &[i16; 4], coeffs_h: &[i16; 4]) { - if coeffs_h[1] == 128 { - for dline in dst.chunks_mut(dstride).take(8) { - for i in 0..8 { - dline[i] = mc_filter!(bicubic; src, soff + i, 1, coeffs_w); - } - soff += sstride; - } - } else if coeffs_w[1] == 128 { // horizontal-only interpolation - for dline in dst.chunks_mut(dstride).take(8) { - for i in 0..8 { - dline[i] = mc_filter!(bicubic; src, soff + i, sstride, coeffs_h); - } - soff += sstride; - } - } else { - let mut buf = [0u8; 16 * 11]; - soff -= sstride; - for dline in buf.chunks_mut(16) { - for i in 0..8 { - dline[i] = mc_filter!(bicubic; src, soff + i, 1, coeffs_w); - } - soff += sstride; - } - let mut soff = 16; - for dline in dst.chunks_mut(dstride).take(8) { - for i in 0..8 { - dline[i] = mc_filter!(bicubic; buf, soff + i, 16, coeffs_h); - } - soff += 16; - } - } -} - struct VP6Decoder { dec: VP56Decoder, info: NACodecInfoRef, diff --git a/nihav-duck/src/codecs/vp6dsp.rs b/nihav-duck/src/codecs/vp6dsp.rs new file mode 100644 index 0000000..dd62b86 --- /dev/null +++ b/nihav-duck/src/codecs/vp6dsp.rs @@ -0,0 +1,120 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::blockdsp::edge_emu; + +#[allow(clippy::too_many_arguments)] +pub fn get_block(dst: &mut [u8], dstride: usize, src: NAVideoBufferRef, comp: usize, + dx: usize, dy: usize, mv_x: i16, mv_y: i16) +{ + let (w, h) = src.get_dimensions(comp); + let sx = (dx as isize) + (mv_x as isize); + let sy = (dy as isize) + (mv_y as isize); + + if (sx - 2 < 0) || (sx + 8 + 2 > (w as isize)) || + (sy - 2 < 0) || (sy + 8 + 2 > (h as isize)) { + edge_emu(&src, sx - 2, sy - 2, 8 + 2 + 2, 8 + 2 + 2, + dst, dstride, comp, 0); + } else { + let sstride = src.get_stride(comp); + let soff = src.get_offset(comp); + let sdta = src.get_data(); + let sbuf: &[u8] = sdta.as_slice(); + let saddr = soff + ((sx - 2) as usize) + ((sy - 2) as usize) * sstride; + let src = &sbuf[saddr..]; + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(12) { + dline[..12].copy_from_slice(&sline[..12]); + } + } +} + +pub fn calc_variance(src: &[u8], stride: usize) -> u16 { + let mut sum = 0; + let mut ssum = 0; + for line in src.chunks(stride * 2).take(4) { + for el in line.iter().take(8).step_by(2) { + let pix = u32::from(*el); + sum += pix; + ssum += pix * pix; + } + } + ((ssum * 16 - sum * sum) >> 8) as u16 +} + +macro_rules! mc_filter { + (bilinear; $a: expr, $b: expr, $c: expr) => { + ((u16::from($a) * (8 - $c) + u16::from($b) * $c + 4) >> 3) as u8 + }; + (bicubic; $src: expr, $off: expr, $step: expr, $coeffs: expr) => { + ((i32::from($src[$off - $step] ) * i32::from($coeffs[0]) + + i32::from($src[$off] ) * i32::from($coeffs[1]) + + i32::from($src[$off + $step] ) * i32::from($coeffs[2]) + + i32::from($src[$off + $step * 2]) * i32::from($coeffs[3]) + 64) >> 7).min(255).max(0) as u8 + } +} + +//#[allow(snake_case)] +pub fn mc_bilinear(dst: &mut [u8], dstride: usize, src: &[u8], mut soff: usize, sstride: usize, mx: u16, my: u16) { + if my == 0 { + for dline in dst.chunks_mut(dstride).take(8) { + for i in 0..8 { + dline[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx); + } + soff += sstride; + } + } else if mx == 0 { + for dline in dst.chunks_mut(dstride).take(8) { + for i in 0..8 { + dline[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + sstride], my); + } + soff += sstride; + } + } else { + let mut tmp = [0u8; 8]; + for i in 0..8 { + tmp[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx); + } + soff += sstride; + for dline in dst.chunks_mut(dstride).take(8) { + for i in 0..8 { + let cur = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx); + dline[i] = mc_filter!(bilinear; tmp[i], cur, my); + tmp[i] = cur; + } + soff += sstride; + } + } +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +pub fn mc_bicubic(dst: &mut [u8], dstride: usize, src: &[u8], mut soff: usize, sstride: usize, coeffs_w: &[i16; 4], coeffs_h: &[i16; 4]) { + if coeffs_h[1] == 128 { + for dline in dst.chunks_mut(dstride).take(8) { + for i in 0..8 { + dline[i] = mc_filter!(bicubic; src, soff + i, 1, coeffs_w); + } + soff += sstride; + } + } else if coeffs_w[1] == 128 { // horizontal-only interpolation + for dline in dst.chunks_mut(dstride).take(8) { + for i in 0..8 { + dline[i] = mc_filter!(bicubic; src, soff + i, sstride, coeffs_h); + } + soff += sstride; + } + } else { + let mut buf = [0u8; 16 * 11]; + soff -= sstride; + for dline in buf.chunks_mut(16) { + for i in 0..8 { + dline[i] = mc_filter!(bicubic; src, soff + i, 1, coeffs_w); + } + soff += sstride; + } + let mut soff = 16; + for dline in dst.chunks_mut(dstride).take(8) { + for i in 0..8 { + dline[i] = mc_filter!(bicubic; buf, soff + i, 16, coeffs_h); + } + soff += 16; + } + } +} diff --git a/nihav-duck/src/codecs/vp6enc/coder.rs b/nihav-duck/src/codecs/vp6enc/coder.rs new file mode 100644 index 0000000..74b490b --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/coder.rs @@ -0,0 +1,856 @@ +use nihav_core::io::byteio::*; +use nihav_core::codecs::{EncoderResult, EncoderError}; +use nihav_codec_support::codecs::MV; +use super::super::vpcommon::*; +use super::super::vp6data::*; +use super::models::*; + +struct EncSeq { + bit: bool, + idx: u8, +} + +pub struct TokenSeq { + val: T, + seq: &'static [EncSeq], +} + +macro_rules! bit_entry { + (T; $idx:expr) => {EncSeq {bit: true, idx: $idx }}; + (F; $idx:expr) => {EncSeq {bit: false, idx: $idx }}; +} + +macro_rules! bit_seq { + ($val: expr; $( $bit:tt),* ; $( $idx:expr),* ) => { + TokenSeq { + val: $val, + seq: + &[ + $( + bit_entry!($bit; $idx), + )* + ] + } + }; +} + +pub const MODE_TREE: &[TokenSeq] = &[ + bit_seq!(VPMBType::Intra; T, F, F; 0, 2, 5), + bit_seq!(VPMBType::InterFourMV; T, F, T; 0, 2, 5), + bit_seq!(VPMBType::InterNoMV; F, F, F; 0, 1, 3), + bit_seq!(VPMBType::InterMV; F, F, T; 0, 1, 3), + bit_seq!(VPMBType::InterNearest; F, T, F; 0, 1, 4), + bit_seq!(VPMBType::InterNear; F, T, T; 0, 1, 4), + bit_seq!(VPMBType::GoldenNoMV; T, T, F, F; 0, 2, 6, 7), + bit_seq!(VPMBType::GoldenMV; T, T, F, T; 0, 2, 6, 7), + bit_seq!(VPMBType::GoldenNearest; T, T, T, F; 0, 2, 6, 8), + bit_seq!(VPMBType::GoldenNear; T, T, T, T; 0, 2, 6, 8), +]; + +const MODE_TREE_DIFF: &[TokenSeq] = &[ + bit_seq!(1; F, T; 0, 1), + bit_seq!(2; F, F; 0, 1), + bit_seq!(3; T, F, T; 0, 2, 3), + bit_seq!(4; T, F, F, T; 0, 2, 3, 4), + bit_seq!(5; T, F, F, F, T; 0, 2, 3, 4, 5), + bit_seq!(6; T, F, F, F, F; 0, 2, 3, 4, 5), + bit_seq!(7; T, T; 0, 2), +]; + +const MODE_TREE_DIFF_PROBS: &[u8; 6] = &[171, 83, 199, 140, 125, 104]; + +const SHORT_MV_TREE: &[TokenSeq] = &[ + bit_seq!(0; F, F, F; 0, 1, 2), + bit_seq!(1; F, F, T; 0, 1, 2), + bit_seq!(2; F, T, F; 0, 1, 3), + bit_seq!(3; F, T, T; 0, 1, 3), + bit_seq!(4; T, F, F; 0, 4, 5), + bit_seq!(5; T, F, T; 0, 4, 5), + bit_seq!(6; T, T, F; 0, 4, 6), + bit_seq!(7; T, T, T; 0, 4, 6), +]; + +const EOB: i8 = 42; + +const DC_TREE: &[TokenSeq] = &[ + bit_seq!( 0; F; 0), + bit_seq!( 1; T, F; 0, 2), + bit_seq!( 2; T, T, F, F; 0, 2, 3, 4), + bit_seq!( 3; T, T, F, T, F; 0, 2, 3, 4, 5), + bit_seq!( 4; T, T, F, T, T; 0, 2, 3, 4, 5), + bit_seq!( -1; T, T, T, F, F; 0, 2, 3, 6, 7), + bit_seq!( -2; T, T, T, F, T; 0, 2, 3, 6, 7), + bit_seq!( -3; T, T, T, T, F, F; 0, 2, 3, 6, 8, 9), + bit_seq!( -4; T, T, T, T, F, T; 0, 2, 3, 6, 8, 9), + bit_seq!( -5; T, T, T, T, T, F; 0, 2, 3, 6, 8, 10), + bit_seq!( -6; T, T, T, T, T, T; 0, 2, 3, 6, 8, 10), +]; + +const NZ_COEF_TREE: &[TokenSeq] = &[ + bit_seq!( 1; F; 2), + bit_seq!( 2; T, F, F; 2, 3, 4), + bit_seq!( 3; T, F, T, F; 2, 3, 4, 5), + bit_seq!( 4; T, F, T, T; 2, 3, 4, 5), + bit_seq!( -1; T, T, F, F; 2, 3, 6, 7), + bit_seq!( -2; T, T, F, T; 2, 3, 6, 7), + bit_seq!( -3; T, T, T, F, F; 2, 3, 6, 8, 9), + bit_seq!( -4; T, T, T, F, T; 2, 3, 6, 8, 9), + bit_seq!( -5; T, T, T, T, F; 2, 3, 6, 8, 10), + bit_seq!( -6; T, T, T, T, T; 2, 3, 6, 8, 10), +]; + +const COEF_TREE: &[TokenSeq] = &[ + bit_seq!( 0; F, T; 0, 1), + bit_seq!(EOB; F, F; 0, 1), + bit_seq!( 1; T, F; 0, 2), + bit_seq!( 2; T, T, F, F; 0, 2, 3, 4), + bit_seq!( 3; T, T, F, T, F; 0, 2, 3, 4, 5), + bit_seq!( 4; T, T, F, T, T; 0, 2, 3, 4, 5), + bit_seq!( -1; T, T, T, F, F; 0, 2, 3, 6, 7), + bit_seq!( -2; T, T, T, F, T; 0, 2, 3, 6, 7), + bit_seq!( -3; T, T, T, T, F, F; 0, 2, 3, 6, 8, 9), + bit_seq!( -4; T, T, T, T, F, T; 0, 2, 3, 6, 8, 9), + bit_seq!( -5; T, T, T, T, T, F; 0, 2, 3, 6, 8, 10), + bit_seq!( -6; T, T, T, T, T, T; 0, 2, 3, 6, 8, 10), +]; + +fn coef_to_cat(coef: i16) -> i8 { + match coef.abs() { + 0 ..=4 => coef.abs() as i8, + 5 ..=6 => -1, + 7 ..=10 => -2, + 11..=18 => -3, + 19..=34 => -4, + 35..=66 => -5, + _ => -6, + } +} + +const ZERO_RUN_TREE: &[TokenSeq] = &[ + bit_seq!(1; F, F, F; 0, 1, 2), + bit_seq!(2; F, F, T; 0, 1, 2), + bit_seq!(3; F, T, F; 0, 1, 3), + bit_seq!(4; F, T, T; 0, 1, 3), + bit_seq!(5; T, F, F, F; 0, 4, 5, 6), + bit_seq!(6; T, F, F, T; 0, 4, 5, 6), + bit_seq!(7; T, F, T, F; 0, 4, 5, 7), + bit_seq!(8; T, F, T, T; 0, 4, 5, 7), + bit_seq!(9; T, T; 0, 4), +]; + +pub struct BoolEncoder<'a, 'b> { + bw: &'a mut ByteWriter<'b>, + val: u32, + range: u32, + bits: u8, + saved: u8, + run: usize, +} + +impl<'a, 'b> BoolEncoder<'a, 'b> { + pub fn new(bw: &'a mut ByteWriter<'b>) -> Self { + Self { + bw, + val: 0, + range: 255, + bits: 0, + saved: 0, + run: 0, + } + } + pub fn put_bool(&mut self, bit: bool, prob: u8) -> EncoderResult<()> { + let split = 1 + (((self.range - 1) * u32::from(prob)) >> 8); + if bit { + self.range -= split; + self.val += split; + } else { + self.range = split; + } + + if self.range < 128 { + self.renorm()?; + } + Ok(()) + } + fn flush_run(&mut self, overflow: bool) -> EncoderResult<()> { + if self.run > 0 { + self.bw.write_byte(self.saved + (overflow as u8))?; + if !overflow { + for _ in 1..self.run { + self.bw.write_byte(0xFF)?; + } + } else { + for _ in 1..self.run { + self.bw.write_byte(0)?; + } + } + self.run = 0; + } + Ok(()) + } + fn renorm(&mut self) -> EncoderResult<()> { + let bits = (self.range.leading_zeros() & 7) as u8; + self.range <<= bits; + if self.bits + bits < 23 { + self.bits += bits; + self.val <<= bits; + } else { + for _ in 0..bits { + if (self.bits == 23) && ((self.val >> 31) != 0) { + self.flush_run(true)?; + } + self.val <<= 1; + self.bits += 1; + if self.bits == 24 { + let tbyte = (self.val >> 24) as u8; + let nbyte = (self.val >> 16) as u8; + if tbyte < 0xFF { + self.flush_run(false)?; + if nbyte < 0xFE { + self.bw.write_byte(tbyte)?; + } else { + self.saved = tbyte; + self.run = 1; + } + } else { + self.run += 1; + } + self.val &= 0xFFFFFF; + self.bits -= 8; + } + } + } + Ok(()) + } + pub fn flush(mut self) -> EncoderResult<()> { + self.flush_run(false)?; + self.val <<= 24 - self.bits; + self.bw.write_u32be(self.val)?; + Ok(()) + } + + pub fn put_bits(&mut self, val: u32, len: u8) -> EncoderResult<()> { + let mut mask = 1 << (len - 1); + while mask != 0 { + self.put_bool((val & mask) != 0, 128)?; + mask >>= 1; + } + Ok(()) + } + fn put_probability(&mut self, prob: u8) -> EncoderResult<()> { + self.put_bits(u32::from(prob >> 1), 7) + } + fn encode_probability(&mut self, new: u8, old: u8, prob: u8) -> EncoderResult<()> { + self.put_bool(new != old, prob)?; + if new != old { + self.put_probability(new)?; + } + Ok(()) + } + pub fn write_el(&mut self, el: T, tree: &[TokenSeq], probs: &[u8]) -> EncoderResult<()> { + for entry in tree.iter() { + if entry.val == el { + for seq in entry.seq.iter() { + self.put_bool(seq.bit, probs[seq.idx as usize])?; + } + return Ok(()); + } + } + Err(EncoderError::Bug) + } + fn write_cat(&mut self, cat: i8, tree: &[TokenSeq], tok_probs: &[u8], val_probs: &[u8; 11]) -> EncoderResult<()> { + for entry in tree.iter() { + if entry.val == cat { + for seq in entry.seq.iter() { + let prob = if seq.idx < 5 { + tok_probs[seq.idx as usize] + } else { + val_probs[seq.idx as usize] + }; + self.put_bool(seq.bit, prob)?; + } + return Ok(()); + } + } + Err(EncoderError::Bug) + } + fn write_large_coef(&mut self, val: i16, cat: usize) -> EncoderResult<()> { + let base = VP56_COEF_BASE[cat]; + let mut probs = VP56_COEF_ADD_PROBS[cat].iter(); + let add = val.abs() - base; + let mut mask = 1 << (VP6_COEF_ADD_BITS[cat] - 1); + while mask != 0 { + self.put_bool((add & mask) != 0, *probs.next().unwrap())?; + mask >>= 1; + } + self.put_bool(val < 0, 128)?; + + Ok(()) + } + fn write_dc(&mut self, val: i16, tok_probs: &[u8; 5], val_probs: &[u8; 11]) -> EncoderResult<()> { + let cat = coef_to_cat(val); + self.write_cat(cat, DC_TREE, tok_probs, val_probs)?; + if cat < 0 { + self.write_large_coef(val, (-cat - 1) as usize)?; + } else if val != 0 { + self.put_bool(val < 0, 128)?; + } + Ok(()) + } + fn write_ac(&mut self, val: i16, tree: &[TokenSeq], probs: &[u8; 11]) -> EncoderResult<()> { + let cat = coef_to_cat(val); + self.write_cat(cat, tree, probs, probs)?; + if cat < 0 { + self.write_large_coef(val, (-cat - 1) as usize)?; + } else if val != 0 { + self.put_bool(val < 0, 128)?; + } + Ok(()) + } + fn write_zero_run(&mut self, val: usize, probs: &[u8; 14]) -> EncoderResult<()> { + self.write_el(val.min(9) as u8, ZERO_RUN_TREE, probs)?; + if val >= 9 { + let add = val - 9; + for i in 0..6 { + self.put_bool(((add >> i) & 1) != 0, probs[i + 8])?; + } + } + Ok(()) + } +} + +fn rescale_mb_mode_prob(prob: u32, total: u32) -> u8 { + (255 * prob / (1 + total)) as u8 +} + +fn calc_mb_model_probs(prob_xmitted: &[u8; 20], mbtype_models: &mut [VP56MBTypeModel; 10]) { + for mode in 0..10 { + let mdl = &mut mbtype_models[mode]; + let mut cnt = [0u32; 10]; + let mut total = 0; + for i in 0..10 { + if i == mode { continue; } + cnt[i] = 100 * u32::from(prob_xmitted[i * 2]); + total += cnt[i]; + } + let sum = u32::from(prob_xmitted[mode * 2]) + u32::from(prob_xmitted[mode * 2 + 1]); + mdl.probs[9] = 255 - rescale_mb_mode_prob(u32::from(prob_xmitted[mode * 2 + 1]), sum); + + let inter_mv0_weight = (cnt[0] as u32) + (cnt[2] as u32); + let inter_mv1_weight = (cnt[3] as u32) + (cnt[4] as u32); + let gold_mv0_weight = (cnt[5] as u32) + (cnt[6] as u32); + let gold_mv1_weight = (cnt[8] as u32) + (cnt[9] as u32); + let mix_weight = (cnt[1] as u32) + (cnt[7] as u32); + mdl.probs[0] = 1 + rescale_mb_mode_prob(inter_mv0_weight + inter_mv1_weight, total); + mdl.probs[1] = 1 + rescale_mb_mode_prob(inter_mv0_weight, inter_mv0_weight + inter_mv1_weight); + mdl.probs[2] = 1 + rescale_mb_mode_prob(mix_weight, mix_weight + gold_mv0_weight + gold_mv1_weight); + mdl.probs[3] = 1 + rescale_mb_mode_prob(cnt[0] as u32, inter_mv0_weight); + mdl.probs[4] = 1 + rescale_mb_mode_prob(cnt[3] as u32, inter_mv1_weight); + mdl.probs[5] = 1 + rescale_mb_mode_prob(cnt[1], mix_weight); + mdl.probs[6] = 1 + rescale_mb_mode_prob(gold_mv0_weight, gold_mv0_weight + gold_mv1_weight); + mdl.probs[7] = 1 + rescale_mb_mode_prob(cnt[5], gold_mv0_weight); + mdl.probs[8] = 1 + rescale_mb_mode_prob(cnt[8], gold_mv1_weight); + } +} + +fn calc_mbtype_bits(prob_xmitted: &[u8; 20], stats: &[[usize; 10]; 10], mdl: &mut [VP56MBTypeModel; 10]) -> u32 { + const MB_TYPES: [VPMBType; 10] = [ + VPMBType::InterNoMV, + VPMBType::Intra, + VPMBType::InterMV, + VPMBType::InterNearest, + VPMBType::InterNear, + VPMBType::GoldenNoMV, + VPMBType::GoldenMV, + VPMBType::InterFourMV, + VPMBType::GoldenNearest, + VPMBType::GoldenNear + ]; + + calc_mb_model_probs(prob_xmitted, mdl); + let mut nits = 0; + for (last, (srow, mdl)) in stats.iter().zip(mdl.iter()).enumerate() { + for (cur, &ccount) in srow.iter().enumerate() { + let ccount = ccount as u32; + nits += Estimator::est_nits(cur == last, mdl.probs[9]) * ccount; + if cur != last { + for entry in MODE_TREE.iter() { + if entry.val == MB_TYPES[cur] { + for seq in entry.seq.iter() { + nits += Estimator::est_nits(seq.bit, mdl.probs[seq.idx as usize]) * ccount; + } + break; + } + } + } + } + } + + Estimator::nits_to_bits(nits) +} + +fn find_model_vq(prob_xmitted: &[u8; 20], vq: &[[u8; 20]; 16]) -> usize { + let mut best_idx = 0; + let mut best_dist = i16::MAX; + + for (idx, row) in vq.iter().enumerate() { + let mut dist = 0; + for i in 0..20 { + let a = prob_xmitted[i ^ 1]; + let b = row[i]; + dist += (i16::from(a) - i16::from(b)).abs(); + } + if dist == 0 { + return idx; + } + if dist < best_dist { + best_dist = dist; + best_idx = idx; + } + } + + best_idx +} + +// todo per-delta decision, incremental updates and such +fn deltas_bits(probs: &[u8; 20], base: &[u8; 20], stats: &[[usize; 10]; 10], tmp: &mut [VP56MBTypeModel; 10], deltas: &mut [i16; 20]) -> u32 { + const DELTA_PROBS: [u8; 8] = [ + PROB_BITS[205], + PROB_BITS[256 - 205] + PROB_BITS[171] + PROB_BITS[256 - 83] + PROB_BITS[128], + PROB_BITS[256 - 205] + PROB_BITS[171] + PROB_BITS[83] + PROB_BITS[128], + PROB_BITS[256 - 205] + PROB_BITS[256 - 171] + PROB_BITS[199] + PROB_BITS[256 - 140] + PROB_BITS[128], + PROB_BITS[256 - 205] + PROB_BITS[256 - 171] + PROB_BITS[199] + PROB_BITS[140] + PROB_BITS[256 - 125] + PROB_BITS[128], + PROB_BITS[256 - 205] + PROB_BITS[256 - 171] + PROB_BITS[199] + PROB_BITS[140] + PROB_BITS[125] + PROB_BITS[256 - 104] + PROB_BITS[128], + PROB_BITS[256 - 205] + PROB_BITS[256 - 171] + PROB_BITS[199] + PROB_BITS[140] + PROB_BITS[125] + PROB_BITS[104] + PROB_BITS[128], + PROB_BITS[256 - 205] + PROB_BITS[256 - 171] + PROB_BITS[256 - 199] + 8 * PROB_BITS[128], + ]; + + let mut nits = 0; + let mut tprobs = [0u8; 20]; + + for i in 0..20 { + let old = i16::from(base[i]); + let new = i16::from(probs[i]); + let mut diff = (new - old) & !3; + if old + diff > 255 { + diff -= 4; + } else if old + diff < 0 || (old + diff == 0 && new != 0) { + diff += 4; + } + tprobs[i] = (old + diff) as u8; + deltas[i] = diff; + nits += u32::from(DELTA_PROBS[(diff.abs() >> 2).min(7) as usize]); + } + + Estimator::nits_to_bits(nits) + calc_mbtype_bits(&tprobs, stats, tmp) + 5 +} + +pub fn encode_mode_prob_models(bc: &mut BoolEncoder, models: &mut VP56Models, pmodels: &VP56Models, stats: &[[[usize; 10]; 10]; 3]) -> EncoderResult<()> { + let mut tmp = [VP56MBTypeModel::default(); 10]; + let mut tprob = [0; 20]; + for ctx in 0..3 { + let mut models_changed = models.prob_xmitted[ctx] != pmodels.prob_xmitted[ctx]; + if models_changed { + let old_bits = calc_mbtype_bits(&pmodels.prob_xmitted[ctx], &stats[ctx], &mut tmp); + let new_bits = calc_mbtype_bits(&models.prob_xmitted[ctx], &stats[ctx], &mut tmp) + 4; + if new_bits < old_bits { + let idx = find_model_vq(&models.prob_xmitted[ctx], &VP56_MODE_VQ[ctx]); + for i in 0..20 { + tprob[i ^ 1] = VP56_MODE_VQ[ctx][idx][i]; + } + let vq_bits = calc_mbtype_bits(&tprob, &stats[ctx], &mut tmp) + 4; + if vq_bits < old_bits { + bc.put_bool(true, 174)?; + bc.put_bits(idx as u32, 4)?; + let mut diffs_present = tprob != models.prob_xmitted[ctx]; + let mut deltas = [0; 20]; + let delta_cost = deltas_bits(&models.prob_xmitted[ctx], &tprob, &stats[ctx], &mut tmp, &mut deltas); + if delta_cost + 1 >= new_bits { + diffs_present = false; + } + if diffs_present { + bc.put_bool(true, 254)?; + for i in 0..20 { + let diff = deltas[i ^ 1] >> 2; + bc.put_bool(diff != 0, 205)?; + if diff != 0 { + let d0 = diff.abs().min(7) as u8; + bc.put_bool(diff < 0, 128)?; + bc.write_el(d0, MODE_TREE_DIFF, MODE_TREE_DIFF_PROBS)?; + if d0 == 7 { + bc.put_bits(diff.abs() as u32, 7)?; + } + tprob[i ^ 1] = (i16::from(tprob[i ^ 1]) + deltas[i ^ 1]) as u8; + } + } + } + if !diffs_present { + bc.put_bool(false, 254)?; + } + } else { + models_changed = false; + } + } else { + models_changed = false; + } + } + if !models_changed { + bc.put_bool(false, 174)?; + bc.put_bool(false, 254)?; + models.prob_xmitted[ctx] = pmodels.prob_xmitted[ctx]; + } else { + models.prob_xmitted[ctx] = tprob; + } + } + for ctx in 0..3 { + let prob_xmitted = &models.prob_xmitted[ctx]; + calc_mb_model_probs(prob_xmitted, &mut models.mbtype_models[ctx]); + } + Ok(()) +} + +pub fn encode_mv_models(bc: &mut BoolEncoder, models: &[VP56MVModel; 2], pmodels: &[VP56MVModel; 2]) -> EncoderResult<()> { + for (i, (mdl, pmdl)) in models.iter().zip(pmodels.iter()).enumerate() { + bc.encode_probability(mdl.nz_prob, pmdl.nz_prob, HAS_NZ_PROB[i])?; + bc.encode_probability(mdl.sign_prob, pmdl.sign_prob, HAS_SIGN_PROB[i])?; + } + for (i, (mdl, pmdl)) in models.iter().zip(pmodels.iter()).enumerate() { + for (&coded_prob, (&prob, &pprob)) in HAS_TREE_PROB[i].iter().zip(mdl.tree_probs.iter().zip(pmdl.tree_probs.iter())) { + bc.encode_probability(prob, pprob, coded_prob)?; + } + } + for (i, (mdl, pmdl)) in models.iter().zip(pmodels.iter()).enumerate() { + for (&coded_prob, (&prob, &pprob)) in HAS_RAW_PROB[i].iter().zip(mdl.raw_probs.iter().zip(pmdl.raw_probs.iter())) { + bc.encode_probability(prob, pprob, coded_prob)?; + } + } + Ok(()) +} + +pub fn encode_coeff_models(bc: &mut BoolEncoder, models: &mut VP56Models, pmodels: &VP56Models, is_intra: bool, interlaced: bool) -> EncoderResult<()> { + let mut def_prob = [128u8; 11]; + for plane in 0..2 { + for i in 0..11 { + let pprob = pmodels.coeff_models[plane].dc_value_probs[i]; + let prob = models.coeff_models[plane].dc_value_probs[i]; + let changed = (is_intra && prob != def_prob[i]) || (!is_intra && prob != pprob); + bc.put_bool(changed, HAS_COEF_PROBS[plane][i])?; + if changed { + bc.put_probability(prob)?; + def_prob[i] = prob; + } + } + } + + bc.put_bool(false, 128)?; + reset_scan(&mut models.vp6models, interlaced); + /* for scan + for i in 1..64 { + if bc.read_prob(HAS_SCAN_UPD_PROBS[i]) { + models.vp6models.scan_order[i] = bc.read_bits(4) as usize; + } + } + update_scan(&mut models.vp6models); + */ + + for comp in 0..2 { + for i in 0..14 { + bc.encode_probability(models.vp6models.zero_run_probs[comp][i], pmodels.vp6models.zero_run_probs[comp][i], HAS_ZERO_RUN_PROBS[comp][i])?; + } + } + + for ctype in 0..3 { + for plane in 0..2 { + for group in 0..6 { + for i in 0..11 { + let pprob = pmodels.coeff_models[plane].ac_val_probs[ctype][group][i]; + let prob = models.coeff_models[plane].ac_val_probs[ctype][group][i]; + let changed = (is_intra && prob != def_prob[i]) || (!is_intra && prob != pprob); + bc.put_bool(changed, VP6_AC_PROBS[ctype][plane][group][i])?; + if changed { + bc.put_probability(prob)?; + def_prob[i] = prob; + } + } + } + } + } + + for plane in 0..2 { + let mdl = &mut models.coeff_models[plane]; + for i in 0..3 { + for k in 0..5 { + mdl.dc_token_probs[0][i][k] = rescale_prob(mdl.dc_value_probs[k], &VP6_DC_WEIGHTS[k][i], 255); + } + } + } + Ok(()) +} + +pub fn encode_block(bc: &mut BoolEncoder, blk: &[i16; 64], dc_mode: usize, model: &VP56CoeffModel, vp6model: &VP6Models) -> EncoderResult<()> { + let mut last = 64; + for i in (0..64).rev() { + if blk[vp6model.zigzag[i]] != 0 { + last = i; + break; + } + } + if last < 64 { + bc.write_dc(blk[0], &model.dc_token_probs[0][dc_mode], &model.dc_value_probs)?; + let mut idx = 1; + let mut last_idx = 0; + let mut last_val = blk[0]; + while idx <= last { + let val = blk[vp6model.zigzag[idx]]; + let has_nnz = (idx == 1) || (last_val != 0); + if (val != 0) || has_nnz { + if last_val == 0 && idx != 1 { + let zrun = idx - last_idx; + bc.write_zero_run(zrun, &vp6model.zero_run_probs[if last_idx + 1 >= 7 { 1 } else { 0 }])?; + } + let ac_band = VP6_IDX_TO_AC_BAND[idx]; + let ac_mode = last_val.abs().min(2) as usize; + let tree = if has_nnz { COEF_TREE } else { NZ_COEF_TREE }; + bc.write_ac(val, tree, &model.ac_val_probs[ac_mode][ac_band])?; + last_val = val; + last_idx = idx; + } + idx += 1; + } + if idx < 64 { + let ac_band = VP6_IDX_TO_AC_BAND[idx]; + let ac_mode = last_val.abs().min(2) as usize; + bc.write_el(EOB, COEF_TREE, &model.ac_val_probs[ac_mode][ac_band])?; + } + } else { + bc.write_cat(0, DC_TREE, &model.dc_token_probs[0][dc_mode], &model.dc_value_probs)?; + let ac_band = VP6_IDX_TO_AC_BAND[1]; + bc.write_el(EOB, COEF_TREE, &model.ac_val_probs[0][ac_band])?; + } + Ok(()) +} + +fn map_mb_type(mbtype: VPMBType) -> usize { + match mbtype { + VPMBType::InterNoMV => 0, + VPMBType::Intra => 1, + VPMBType::InterMV => 2, + VPMBType::InterNearest => 3, + VPMBType::InterNear => 4, + VPMBType::GoldenNoMV => 5, + VPMBType::GoldenMV => 6, + VPMBType::InterFourMV => 7, + VPMBType::GoldenNearest => 8, + VPMBType::GoldenNear => 9, + } +} + +pub fn encode_mb_type(bc: &mut BoolEncoder, mb_type: VPMBType, last_mb_type: VPMBType, ctx: usize, model: &VP56Models) -> EncoderResult<()> { + let probs = &model.mbtype_models[ctx][map_mb_type(last_mb_type)].probs; + bc.put_bool(mb_type == last_mb_type, probs[9])?; + if mb_type != last_mb_type { + bc.write_el(mb_type, MODE_TREE, probs)?; + } + Ok(()) +} + +fn encode_mv_component(bc: &mut BoolEncoder, mv: i16, model: &VP56MVModel) -> EncoderResult<()> { + let aval = mv.abs(); + bc.put_bool(aval >= 8, model.nz_prob)?; + if aval < 8 { + bc.write_el(aval as u8, SHORT_MV_TREE, &model.tree_probs)?; + } else { + for &ord in LONG_VECTOR_ORDER.iter() { + bc.put_bool(((aval >> ord) & 1) != 0, model.raw_probs[ord])?; + } + if (aval & 0xF0) != 0 { + bc.put_bool((aval & (1 << 3)) != 0, model.raw_probs[3])?; + } + } + if aval != 0 { + bc.put_bool(mv < 0, model.sign_prob)?; + } + Ok(()) +} + +pub fn encode_mv(bc: &mut BoolEncoder, mv: MV, model: &VP56Models) -> EncoderResult<()> { + encode_mv_component(bc, mv.x, &model.mv_models[0])?; + encode_mv_component(bc, mv.y, &model.mv_models[1])?; + Ok(()) +} + +struct Estimator {} + +impl Estimator { + fn new() -> Self { Self{} } + fn write_el(&self, el: T, tree: &[TokenSeq], probs: &mut [ProbCounter]) { + for entry in tree.iter() { + if entry.val == el { + for seq in entry.seq.iter() { + probs[seq.idx as usize].add(seq.bit); + } + return; + } + } + } + fn write_cat(&self, cat: i8, tree: &[TokenSeq], probs: &mut [ProbCounter; 11]) { + for entry in tree.iter() { + if entry.val == cat { + for seq in entry.seq.iter() { + probs[seq.idx as usize].add(seq.bit); + } + return; + } + } + } + fn write_dc(&self, val: i16, probs: &mut [ProbCounter; 11]) { + self.write_cat(coef_to_cat(val), DC_TREE, probs); + } + fn write_ac(&self, val: i16, tree: &[TokenSeq], probs: &mut [ProbCounter; 11]) { + self.write_cat(coef_to_cat(val), tree, probs); + } + fn write_zero_run(&self, val: usize, probs: &mut [ProbCounter; 14]) { + self.write_el(val.min(9) as u8, ZERO_RUN_TREE, probs); + if val >= 9 { + let add = val - 9; + for i in 0..6 { + probs[i + 8].add(((add >> i) & 1) != 0); + } + } + } + fn est_nits(bit: bool, prob: u8) -> u32 { + if !bit { + u32::from(PROB_BITS[prob as usize]) + } else { + u32::from(PROB_BITS[256 - (prob as usize)]) + } + } + fn nits_to_bits(nits: u32) -> u32 { (nits + 7) >> 3 } +} + +pub fn estimate_block(blk: &[i16; 64], _dc_mode: usize, model: &mut VP56CoeffModelStat, vp6model: &mut VP6ModelsStat, scan: &[usize; 64]) { + let bc = Estimator::new(); + + let mut last = 64; + for i in (0..64).rev() { + if blk[scan[i]] != 0 { + last = i; + break; + } + } + if last < 64 { + bc.write_dc(blk[0], &mut model.dc_value_probs); + let mut idx = 1; + let mut last_idx = 0; + let mut last_val = blk[0]; + while idx <= last { + let val = blk[scan[idx]]; + let has_nnz = (idx == 1) || (last_val != 0); + if (val != 0) || has_nnz { + if last_val == 0 && idx != 1 { + let zrun = idx - last_idx; + bc.write_zero_run(zrun, &mut vp6model.zero_run_probs[if last_idx + 1 >= 7 { 1 } else { 0 }]); + } + let ac_band = VP6_IDX_TO_AC_BAND[idx]; + let ac_mode = last_val.abs().min(2) as usize; + let tree = if has_nnz { COEF_TREE } else { NZ_COEF_TREE }; + bc.write_ac(val, tree, &mut model.ac_val_probs[ac_mode][ac_band]); + last_val = val; + last_idx = idx; + } + idx += 1; + } + if idx < 64 { + let ac_band = VP6_IDX_TO_AC_BAND[idx]; + let ac_mode = last_val.abs().min(2) as usize; + bc.write_el(EOB, COEF_TREE, &mut model.ac_val_probs[ac_mode][ac_band]); + } + } else { + bc.write_cat(0, DC_TREE, &mut model.dc_value_probs); + let ac_band = VP6_IDX_TO_AC_BAND[1]; + bc.write_el(EOB, COEF_TREE, &mut model.ac_val_probs[0][ac_band]); + } +} + +pub fn estimate_mb_type(mb_type: VPMBType, last_mb_type: VPMBType, ctx: usize, model: &mut VP56ModelsStat) { + model.mbtype_models[ctx][map_mb_type(last_mb_type)][map_mb_type(mb_type)] += 1; +} + +fn estimate_mv_component(mv: i16, model: &mut VP56MVModelStat) { + let bc = Estimator::new(); + let aval = mv.abs(); + model.nz_prob.add(aval >= 8); + if aval < 8 { + bc.write_el(aval as u8, SHORT_MV_TREE, &mut model.tree_probs); + } else { + for &ord in LONG_VECTOR_ORDER.iter() { + model.raw_probs[ord].add(((aval >> ord) & 1) != 0); + } + if (aval & 0xF0) != 0 { + model.raw_probs[3].add((aval & (1 << 3)) != 0); + } + } + if aval != 0 { + model.sign_prob.add(mv < 0); + } +} + +pub fn estimate_mv(mv: MV, model: &mut VP56ModelsStat) { + estimate_mv_component(mv.x, &mut model.mv_models[0]); + estimate_mv_component(mv.y, &mut model.mv_models[1]); +} + +const VP56_MODE_VQ: [[[u8; 20]; 16]; 3] = [ + [ + [ 9, 15, 32, 25, 7, 19, 9, 21, 1, 12, 14, 12, 3, 18, 14, 23, 3, 10, 0, 4 ], + [ 48, 39, 1, 2, 11, 27, 29, 44, 7, 27, 1, 4, 0, 3, 1, 6, 1, 2, 0, 0 ], + [ 21, 32, 1, 2, 4, 10, 32, 43, 6, 23, 2, 3, 1, 19, 1, 6, 12, 21, 0, 7 ], + [ 69, 83, 0, 0, 0, 2, 10, 29, 3, 12, 0, 1, 0, 3, 0, 3, 2, 2, 0, 0 ], + [ 11, 20, 1, 4, 18, 36, 43, 48, 13, 35, 0, 2, 0, 5, 3, 12, 1, 2, 0, 0 ], + [ 70, 44, 0, 1, 2, 10, 37, 46, 8, 26, 0, 2, 0, 2, 0, 2, 0, 1, 0, 0 ], + [ 8, 15, 0, 1, 8, 21, 74, 53, 22, 42, 0, 1, 0, 2, 0, 3, 1, 2, 0, 0 ], + [ 141, 42, 0, 0, 1, 4, 11, 24, 1, 11, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0 ], + [ 8, 19, 4, 10, 24, 45, 21, 37, 9, 29, 0, 3, 1, 7, 11, 25, 0, 2, 0, 1 ], + [ 46, 42, 0, 1, 2, 10, 54, 51, 10, 30, 0, 2, 0, 2, 0, 1, 0, 1, 0, 0 ], + [ 28, 32, 0, 0, 3, 10, 75, 51, 14, 33, 0, 1, 0, 2, 0, 1, 1, 2, 0, 0 ], + [ 100, 46, 0, 1, 3, 9, 21, 37, 5, 20, 0, 1, 0, 2, 1, 2, 0, 1, 0, 0 ], + [ 27, 29, 0, 1, 9, 25, 53, 51, 12, 34, 0, 1, 0, 3, 1, 5, 0, 2, 0, 0 ], + [ 80, 38, 0, 0, 1, 4, 69, 33, 5, 16, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 ], + [ 16, 20, 0, 0, 2, 8, 104, 49, 15, 33, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0 ], + [ 194, 16, 0, 0, 1, 1, 1, 9, 1, 3, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0 ], + ], [ + [ 41, 22, 1, 0, 1, 31, 0, 0, 0, 0, 0, 1, 1, 7, 0, 1, 98, 25, 4, 10 ], + [ 123, 37, 6, 4, 1, 27, 0, 0, 0, 0, 5, 8, 1, 7, 0, 1, 12, 10, 0, 2 ], + [ 26, 14, 14, 12, 0, 24, 0, 0, 0, 0, 55, 17, 1, 9, 0, 36, 5, 7, 1, 3 ], + [ 209, 5, 0, 0, 0, 27, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0 ], + [ 2, 5, 4, 5, 0, 121, 0, 0, 0, 0, 0, 3, 2, 4, 1, 4, 2, 2, 0, 1 ], + [ 175, 5, 0, 1, 0, 48, 0, 0, 0, 0, 0, 2, 0, 1, 0, 2, 0, 1, 0, 0 ], + [ 83, 5, 2, 3, 0, 102, 0, 0, 0, 0, 1, 3, 0, 2, 0, 1, 0, 0, 0, 0 ], + [ 233, 6, 0, 0, 0, 8, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 ], + [ 34, 16, 112, 21, 1, 28, 0, 0, 0, 0, 6, 8, 1, 7, 0, 3, 2, 5, 0, 2 ], + [ 159, 35, 2, 2, 0, 25, 0, 0, 0, 0, 3, 6, 0, 5, 0, 1, 4, 4, 0, 1 ], + [ 75, 39, 5, 7, 2, 48, 0, 0, 0, 0, 3, 11, 2, 16, 1, 4, 7, 10, 0, 2 ], + [ 212, 21, 0, 1, 0, 9, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 2, 2, 0, 0 ], + [ 4, 2, 0, 0, 0, 172, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 2, 0, 0, 0 ], + [ 187, 22, 1, 1, 0, 17, 0, 0, 0, 0, 3, 6, 0, 4, 0, 1, 4, 4, 0, 1 ], + [ 133, 6, 1, 2, 1, 70, 0, 0, 0, 0, 0, 2, 0, 4, 0, 3, 1, 1, 0, 0 ], + [ 251, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], + ], [ + [ 2, 3, 2, 3, 0, 2, 0, 2, 0, 0, 11, 4, 1, 4, 0, 2, 3, 2, 0, 4 ], + [ 49, 46, 3, 4, 7, 31, 42, 41, 0, 0, 2, 6, 1, 7, 1, 4, 2, 4, 0, 1 ], + [ 26, 25, 1, 1, 2, 10, 67, 39, 0, 0, 1, 1, 0, 14, 0, 2, 31, 26, 1, 6 ], + [ 103, 46, 1, 2, 2, 10, 33, 42, 0, 0, 1, 4, 0, 3, 0, 1, 1, 3, 0, 0 ], + [ 14, 31, 9, 13, 14, 54, 22, 29, 0, 0, 2, 6, 4, 18, 6, 13, 1, 5, 0, 1 ], + [ 85, 39, 0, 0, 1, 9, 69, 40, 0, 0, 0, 1, 0, 3, 0, 1, 2, 3, 0, 0 ], + [ 31, 28, 0, 0, 3, 14, 130, 34, 0, 0, 0, 1, 0, 3, 0, 1, 3, 3, 0, 1 ], + [ 171, 25, 0, 0, 1, 5, 25, 21, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 ], + [ 17, 21, 68, 29, 6, 15, 13, 22, 0, 0, 6, 12, 3, 14, 4, 10, 1, 7, 0, 3 ], + [ 51, 39, 0, 1, 2, 12, 91, 44, 0, 0, 0, 2, 0, 3, 0, 1, 2, 3, 0, 1 ], + [ 81, 25, 0, 0, 2, 9, 106, 26, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0 ], + [ 140, 37, 0, 1, 1, 8, 24, 33, 0, 0, 1, 2, 0, 2, 0, 1, 1, 2, 0, 0 ], + [ 14, 23, 1, 3, 11, 53, 90, 31, 0, 0, 0, 3, 1, 5, 2, 6, 1, 2, 0, 0 ], + [ 123, 29, 0, 0, 1, 7, 57, 30, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0 ], + [ 13, 14, 0, 0, 4, 20, 175, 20, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0 ], + [ 202, 23, 0, 0, 1, 3, 2, 9, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0 ], + ] +]; diff --git a/nihav-duck/src/codecs/vp6enc/dsp.rs b/nihav-duck/src/codecs/vp6enc/dsp.rs new file mode 100644 index 0000000..c1e8653 --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/dsp.rs @@ -0,0 +1,508 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::super::vpcommon::*; +use super::super::vp6dsp::*; +use super::super::vp6data::*; +use super::ResidueMB; + +use std::str::FromStr; + +#[derive(Debug,Clone,Copy,PartialEq)] +pub enum MVSearchMode { + Full, + Diamond, + Hexagon, +} + +impl Default for MVSearchMode { + fn default() -> Self { MVSearchMode::Hexagon } +} + +pub struct ParseError{} + +impl FromStr for MVSearchMode { + type Err = ParseError; + + #[allow(clippy::single_match)] + fn from_str(s: &str) -> Result { + match s { + "full" => Ok(MVSearchMode::Full), + "dia" => Ok(MVSearchMode::Diamond), + "hex" => Ok(MVSearchMode::Hexagon), + _ => Err(ParseError{}), + } + } +} + +impl std::fmt::Display for MVSearchMode { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + MVSearchMode::Full => write!(f, "full"), + MVSearchMode::Diamond => write!(f, "dia"), + MVSearchMode::Hexagon => write!(f, "hex"), + } + } +} + + +const C1S7: i32 = 64277; +const C2S6: i32 = 60547; +const C3S5: i32 = 54491; +const C4S4: i32 = 46341; +const C5S3: i32 = 36410; +const C6S2: i32 = 25080; +const C7S1: i32 = 12785; + +fn mul16(a: i32, b: i32) -> i32 { + let res = a * b; + (res + if res < 0 { 0xFFFF } else { 0 }) >> 16 +} + +macro_rules! fdct_step { + ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, + $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr) => { + let t_g = i32::from($s0) + i32::from($s7); + let t_c = i32::from($s0) - i32::from($s7); + let t_a = i32::from($s1) + i32::from($s2); + let t_h = i32::from($s1) - i32::from($s2); + let t_e1 = i32::from($s3) + i32::from($s4); + let t_d = i32::from($s3) - i32::from($s4); + let t_f = i32::from($s5) + i32::from($s6); + let t_b = i32::from($s5) - i32::from($s6); + + let t_b1 = t_h + t_b; + let t_h = t_h - t_b; + let t_a1 = t_a - t_f; + let t_f = t_a + t_f; + let t_e = t_g + t_e1; + let t_g = t_g - t_e1; + + $d2 = (mul16(C2S6, t_g) + mul16(C6S2, t_h)).max(-32768).min(32767) as i16; + $d6 = (mul16(C6S2, t_g) - mul16(C2S6, t_h)).max(-32768).min(32767) as i16; + $d0 = mul16(C4S4, t_e + t_f).max(-32768).min(32767) as i16; + $d4 = mul16(C4S4, t_e - t_f).max(-32768).min(32767) as i16; + let t_a = t_c + mul16(C4S4, t_a1); + let t_c = t_c - mul16(C4S4, t_a1); + let t_b = t_d + mul16(C4S4, t_b1); + let t_d = t_d - mul16(C4S4, t_b1); + $d3 = (mul16(C3S5, t_c) - mul16(C5S3, t_d)).max(-32768).min(32767) as i16; + $d5 = (mul16(C5S3, t_c) + mul16(C3S5, t_d)).max(-32768).min(32767) as i16; + $d1 = (mul16(C1S7, t_a) + mul16(C7S1, t_b)).max(-32768).min(32767) as i16; + $d7 = (mul16(C7S1, t_a) - mul16(C1S7, t_b)).max(-32768).min(32767) as i16; + } +} + +#[allow(clippy::erasing_op)] +pub fn vp_fdct(blk: &mut [i16; 64]) { + for row in blk.chunks_mut(8) { + fdct_step!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], + row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]); + } + for i in 0..8 { + fdct_step!(blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], + blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i], + blk[0 * 8 + i], blk[1 * 8 + i], blk[2 * 8 + i], blk[3 * 8 + i], + blk[4 * 8 + i], blk[5 * 8 + i], blk[6 * 8 + i], blk[7 * 8 + i]); + } +} + +const MAX_DIST: u32 = std::u32::MAX; +const DIST_THRESH: u32 = 256; + +trait FromPixels { + fn from_pixels(self) -> Self; +} + +impl FromPixels for MV { + fn from_pixels(self) -> MV { + MV { x: self.x * 4, y: self.y * 4 } + } +} + +pub trait MVSearch { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32); + fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32); +} + +pub struct FullMVSearch {} + +impl FullMVSearch { + pub fn new() -> Self { Self{} } +} + +impl MVSearch for FullMVSearch { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + cur_mv.y = dy * 4; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + cur_mv.x = dx * 4; + + let dist = mv_est.sad_mb(cur_blk, mb_x, mb_y, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + } + } + } + (best_mv, best_dist) + } + fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { + let mut best_dist = MAX_DIST; + let mut best_mv = ZERO_MV; + + let mut cur_mv = ZERO_MV; + for ytry in 0..mv_est.mv_range * 2 + 1 { + let dy = if (ytry & 1) == 0 { ytry >> 1 } else { -((ytry + 1) >> 1) }; + cur_mv.y = dy * 4; + for xtry in 0..mv_est.mv_range * 2 + 1 { + let dx = if (xtry & 1) == 0 { xtry >> 1 } else { -((xtry + 1) >> 1) }; + cur_mv.x = dx * 4; + + let dist = mv_est.sad_blk(cur_blk, xpos, ypos, cur_mv, best_dist); + + if dist < best_dist { + best_dist = dist; + best_mv = cur_mv; + } + } + } + (best_mv, best_dist) + } +} + +const DIA_PATTERN: [MV; 9] = [ + ZERO_MV, + MV {x: -2, y: 0}, + MV {x: -1, y: 1}, + MV {x: 0, y: 2}, + MV {x: 1, y: 1}, + MV {x: 2, y: 0}, + MV {x: 1, y: -1}, + MV {x: 0, y: -2}, + MV {x: -1, y: -1} +]; + +const HEX_PATTERN: [MV; 7] = [ + ZERO_MV, + MV {x: -2, y: 0}, + MV {x: -1, y: 2}, + MV {x: 1, y: 2}, + MV {x: 2, y: 0}, + MV {x: 1, y: -2}, + MV {x: -1, y: -2} +]; + +const REFINEMENT: [MV; 4] = [ + MV {x: -1, y: 0}, + MV {x: 0, y: 1}, + MV {x: 1, y: 0}, + MV {x: 0, y: -1} +]; + +macro_rules! search_template { + ($self: expr, $mv_est: expr, $cur_blk: expr, $mb_x: expr, $mb_y: expr, $sad_func: ident) => ({ + let mut best_dist = MAX_DIST; + let mut best_mv; + + let mut min_dist; + let mut min_idx; + + $self.reset(); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point.from_pixels(), cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= DIST_THRESH { + break; + } + } + } + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= DIST_THRESH { + break; + } + } + } + if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range || $self.point[min_idx].y.abs() >= $mv_est.mv_range { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); + } + best_dist = min_dist; + best_mv = $self.point[min_idx]; + if best_dist <= DIST_THRESH { + return (best_mv.from_pixels(), best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv.from_pixels(), MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + best_mv = best_mv.from_pixels(); + if best_dist <= DIST_THRESH { + return (best_mv, best_dist); + } + + // subpel refinement + $self.set_new_point(best_mv, best_dist); + loop { + let mut cur_best_dist = best_dist; + for (dist, &point) in $self.dist.iter_mut().zip($self.point.iter()) { + if *dist == MAX_DIST { + *dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, point, cur_best_dist); + cur_best_dist = cur_best_dist.min(*dist); + if *dist <= DIST_THRESH { + break; + } + } + } + min_dist = $self.dist[0]; + min_idx = 0; + for (i, &dist) in $self.dist.iter().enumerate().skip(1) { + if dist < min_dist { + min_dist = dist; + min_idx = i; + if dist <= DIST_THRESH { + break; + } + } + } + if min_dist <= DIST_THRESH || min_idx == 0 || best_dist == min_dist || $self.point[min_idx].x.abs() >= $mv_est.mv_range * 4 || $self.point[min_idx].y.abs() >= $mv_est.mv_range * 4 { + break; + } + best_dist = min_dist; + $self.update($self.steps[min_idx]); + } + best_dist = min_dist; + best_mv = $self.point[min_idx]; + if best_dist <= DIST_THRESH { + return (best_mv, best_dist); + } + for &step in REFINEMENT.iter() { + let mv = best_mv + step; + let dist = $mv_est.$sad_func($cur_blk, $mb_x, $mb_y, mv, MAX_DIST); + if best_dist > dist { + best_dist = dist; + best_mv = mv; + } + } + (best_mv, best_dist) + }) +} + +macro_rules! pattern_search { + ($struct_name: ident, $patterns: expr) => { + pub struct $struct_name { + point: [MV; $patterns.len()], + dist: [u32; $patterns.len()], + steps: &'static [MV; $patterns.len()], + } + + impl $struct_name { + pub fn new() -> Self { + Self { + point: $patterns, + dist: [MAX_DIST; $patterns.len()], + steps: &$patterns, + } + } + fn reset(&mut self) { + self.point = $patterns; + self.dist = [MAX_DIST; $patterns.len()]; + } + fn set_new_point(&mut self, start: MV, dist: u32) { + for (dst, &src) in self.point.iter_mut().zip(self.steps.iter()) { + *dst = src + start; + } + self.dist = [MAX_DIST; $patterns.len()]; + self.dist[0] = dist; + } + fn update(&mut self, step: MV) { + let mut new_point = self.point; + let mut new_dist = [MAX_DIST; $patterns.len()]; + + for point in new_point.iter_mut() { + *point += step; + } + + for (new_point, new_dist) in new_point.iter_mut().zip(new_dist.iter_mut()) { + for (&old_point, &old_dist) in self.point.iter().zip(self.dist.iter()) { + if *new_point == old_point { + *new_dist = old_dist; + break; + } + } + } + self.point = new_point; + self.dist = new_dist; + } + } + + impl MVSearch for $struct_name { + fn search_mb(&mut self, mv_est: &mut MVEstimator, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize) -> (MV, u32) { + search_template!(self, mv_est, cur_blk, mb_x, mb_y, sad_mb) + } + fn search_blk(&mut self, mv_est: &mut MVEstimator, cur_blk: &[u8; 64], xpos: usize, ypos: usize) -> (MV, u32) { + search_template!(self, mv_est, cur_blk, xpos, ypos, sad_blk) + } + } + } +} + +pattern_search!(DiaSearch, DIA_PATTERN); +pattern_search!(HexSearch, HEX_PATTERN); + +pub struct MVEstimator { + pub ref_blk: [[u8; 64]; 6], + mc_buf: NAVideoBufferRef, + ref_frame: NAVideoBufferRef, + adv_profile: bool, + bicubic: bool, + autosel_pm: bool, + mv_thresh: u8, + var_thresh: u16, + filter_alpha: usize, + loop_thr: i16, + mv_range: i16, +pub count: usize, +pub count2: usize, +} + +impl MVEstimator { + pub fn new(ref_frame: NAVideoBufferRef, mc_buf: NAVideoBufferRef, loop_thr: i16, mv_range: i16) -> Self { + Self { + ref_blk: [[0; 64]; 6], + ref_frame, mc_buf, + adv_profile: false, + bicubic: false, + autosel_pm: false, + mv_thresh: 0, + var_thresh: 0, + filter_alpha: 0, + loop_thr, + mv_range, +count: 0, +count2: 0, + } + } + pub fn mc_block(&mut self, dst_idx: usize, plane: usize, x: usize, y: usize, mv: MV) { + let is_luma = (plane != 1) && (plane != 2); + let (sx, sy, mx, my, msx, msy) = if is_luma { + (mv.x >> 2, mv.y >> 2, (mv.x & 3) << 1, (mv.y & 3) << 1, mv.x / 4, mv.y / 4) + } else { + (mv.x >> 3, mv.y >> 3, mv.x & 7, mv.y & 7, mv.x / 8, mv.y / 8) + }; + let tmp_blk = self.mc_buf.get_data_mut().unwrap(); + get_block(tmp_blk, 16, self.ref_frame.clone(), plane, x, y, sx, sy); + if (msx & 7) != 0 { + let foff = (8 - (sx & 7)) as usize; + let off = 2 + foff; + vp31_loop_filter(tmp_blk, off, 1, 16, 12, self.loop_thr); + } + if (msy & 7) != 0 { + let foff = (8 - (sy & 7)) as usize; + let off = (2 + foff) * 16; + vp31_loop_filter(tmp_blk, off, 16, 1, 12, self.loop_thr); + } + let copy_mode = (mx == 0) && (my == 0); + let mut bicubic = !copy_mode && is_luma && self.bicubic; + if is_luma && !copy_mode && self.adv_profile { + if !self.autosel_pm { + bicubic = true; + } else { + let mv_limit = 1 << (self.mv_thresh + 1); + if (mv.x.abs() <= mv_limit) && (mv.y.abs() <= mv_limit) { + let mut var_off = 16 * 2 + 2; + if mv.x < 0 { var_off += 1; } + if mv.y < 0 { var_off += 16; } + let var = calc_variance(&tmp_blk[var_off..], 16); + if var >= self.var_thresh { + bicubic = true; + } + } + } + } + let dst = &mut self.ref_blk[dst_idx]; + if copy_mode { + let src = &tmp_blk[2 * 16 + 2..]; + for (dline, sline) in dst.chunks_mut(8).zip(src.chunks(16)).take(8) { + dline.copy_from_slice(&sline[..8]); + } + } else if bicubic { + let coeff_h = &VP6_BICUBIC_COEFFS[self.filter_alpha][mx as usize]; + let coeff_v = &VP6_BICUBIC_COEFFS[self.filter_alpha][my as usize]; + mc_bicubic(dst, 8, tmp_blk, 16 * 2 + 2, 16, coeff_h, coeff_v); + } else { + mc_bilinear(dst, 8, tmp_blk, 16 * 2 + 2, 16, mx as u16, my as u16); + } + } + fn sad_mb(&mut self, cur_blk: &[[u8; 64]; 6], mb_x: usize, mb_y: usize, cur_mv: MV, best_dist: u32) -> u32 { + let mut dist = 0; + for i in 0..4 { + self.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, cur_mv); + dist += sad(&cur_blk[i], &self.ref_blk[i]); +self.count2 += 1; + if dist > best_dist { + break; + } + } + if dist <= best_dist { + for plane in 1..3 { + self.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, cur_mv); + dist += sad(&cur_blk[plane + 3], &self.ref_blk[plane + 3]); +self.count2 += 1; + if dist > best_dist { + break; + } + } + } +self.count += 1; + dist + } + fn sad_blk(&mut self, cur_blk: &[u8; 64], xpos: usize, ypos: usize, cur_mv: MV, _: u32) -> u32 { + self.mc_block(0, 0, xpos, ypos, cur_mv); + sad(cur_blk, &self.ref_blk[0]) + } +} + +fn sad(src1: &[u8; 64], src2: &[u8; 64]) -> u32 { + let mut sum = 0; + for (&p1, &p2) in src1.iter().zip(src2.iter()) { + sum += (i32::from(p1) - i32::from(p2)).abs() as u32; + } + sum +} + +pub fn sub_blk(dst: &mut [i16; 64], src1: &[u8; 64], src2: &[u8; 64]) { + for (dst, (&p1, &p2)) in dst.iter_mut().zip(src1.iter().zip(src2.iter())) { + *dst = i16::from(p1) - i16::from(p2); + } +} + +pub fn calc_mb_dist(mb1: &ResidueMB, mb2: &ResidueMB) -> u32 { + let mut sum = 0; + for (blk1, blk2) in mb1.coeffs.iter().zip(mb2.coeffs.iter()) { + for (&c1, &c2) in blk1.iter().zip(blk2.iter()) { + sum += (i32::from(c1) - i32::from(c2)).abs() as u32; + } + } + sum +} diff --git a/nihav-duck/src/codecs/vp6enc/huff.rs b/nihav-duck/src/codecs/vp6enc/huff.rs new file mode 100644 index 0000000..43b6d65 --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/huff.rs @@ -0,0 +1,177 @@ +use nihav_core::io::byteio::*; +use nihav_core::codecs::EncoderResult; +use super::super::vpcommon::*; +use super::super::vp6data::*; +use super::models::{VP6HuffModels, VP6Huff}; + +#[derive(Default)] +pub struct HuffState { + pub dc_zero_run: [usize; 2], + pub dc_zr_coded: [bool; 2], + pub ac_zero_run: [usize; 2], + pub ac_zr_coded: [bool; 2], +} + +impl HuffState { + pub fn new() -> Self { Self::default() } +} + +pub const MAX_EOB_RUN: usize = 63 + 10; + +pub struct HuffEncoder<'a, 'b> { + bw: &'a mut ByteWriter<'b>, + bitbuf: u32, + bits: u8, +} + +impl<'a, 'b> HuffEncoder<'a, 'b> { + pub fn new(bw: &'a mut ByteWriter<'b>) -> Self { + Self { + bitbuf: 0, + bits: 0, + bw + } + } + pub fn flush(mut self) -> EncoderResult<()> { + while self.bits > 0 { + self.bw.write_byte((self.bitbuf >> 24) as u8)?; + self.bitbuf <<= 8; + self.bits = self.bits.saturating_sub(8); + } + Ok(()) + } + fn put_bits(&mut self, val: u16, bits: u8) -> EncoderResult<()> { + self.bitbuf |= u32::from(val) << (32 - self.bits - bits); + self.bits += bits; + while self.bits >= 8 { + self.bw.write_byte((self.bitbuf >> 24) as u8)?; + self.bitbuf <<= 8; + self.bits -= 8; + } + Ok(()) + } + fn encode_eob(&mut self, mdl: &VP6Huff) -> EncoderResult<()> { + self.put_bits(mdl.codes[11], mdl.bits[11]) + } + fn encode_val(&mut self, val: i16, mdl: &VP6Huff) -> EncoderResult<()> { + let idx = match val.abs() { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + 5..= 6 => 5, + 7..=10 => 6, + 11..=18 => 7, + 19..=34 => 8, + 35..=66 => 9, + _ => 10, + }; + self.put_bits(mdl.codes[idx], mdl.bits[idx])?; + if idx >= 5 { + self.put_bits((val.abs() - VP56_COEF_BASE[idx - 5]) as u16, VP6_COEF_ADD_BITS[idx - 5])?; + } + if idx > 0 { + self.put_bits((val < 0) as u16, 1)?; + } + Ok(()) + } + fn encode_eob_run(&mut self, val: usize) -> EncoderResult<()> { + match val { + 0 => { self.put_bits(0, 2)?; }, + 1 => { self.put_bits(1, 2)?; }, + 2..=5 => { + self.put_bits(2, 2)?; + self.put_bits((val - 2) as u16, 2)?; + }, + 6..=9 => { + self.put_bits(3, 2)?; + self.put_bits(0, 1)?; + self.put_bits((val - 6) as u16, 2)?; + }, + _ => { + self.put_bits(3, 2)?; + self.put_bits(1, 1)?; + self.put_bits((val - 10) as u16, 6)?; + }, + }; + Ok(()) + } + fn encode_zero_run(&mut self, val: usize, mdl: &VP6Huff) -> EncoderResult<()> { + self.put_bits(mdl.codes[val.min(8)], mdl.bits[val.min(8)])?; + if val >= 8 { + self.put_bits((val - 8) as u16, 6)?; + } + Ok(()) + } +} + +pub fn encode_block_huff(huff: &mut HuffEncoder, scan: &[usize; 64], coeffs: &[i16; 64], plane: usize, hstate: &mut HuffState, model: &VP6HuffModels) -> EncoderResult<()> { + let mut last_idx = 64; + for i in (0..64).rev() { + if coeffs[scan[i]] != 0 { + last_idx = i; + break; + } + } + + if !hstate.dc_zr_coded[plane] { + let mdl = &model.dc_token_tree[plane]; + huff.encode_val(coeffs[0], mdl)?; + if coeffs[0] == 0 { + huff.encode_eob_run(hstate.dc_zero_run[plane])?; + hstate.dc_zr_coded[plane] = hstate.dc_zero_run[plane] > 0; + } + } else { + hstate.dc_zero_run[plane] -= 1; + if hstate.dc_zero_run[plane] == 0 { + hstate.dc_zr_coded[plane] = false; + } + } + if hstate.ac_zr_coded[plane] { + hstate.ac_zero_run[plane] -= 1; + if hstate.ac_zero_run[plane] == 0 { + hstate.ac_zr_coded[plane] = false; + } + return Ok(()); + } + + let mut last_val = coeffs[0]; + + if last_idx == 0 || last_idx == 64 { + let ac_band = VP6_IDX_TO_AC_BAND[1].min(3); + let ac_mode = last_val.abs().min(2) as usize; + let mdl = &model.ac_token_tree[plane][ac_mode][ac_band]; + huff.encode_eob(mdl)?; + huff.encode_eob_run(hstate.ac_zero_run[plane])?; + hstate.ac_zr_coded[plane] = hstate.ac_zero_run[plane] > 0; + return Ok(()); + } + + let mut idx = 1; + while idx < 64 { + let ac_band = VP6_IDX_TO_AC_BAND[idx].min(3); + let ac_mode = last_val.abs().min(2) as usize; + let mdl = &model.ac_token_tree[plane][ac_mode][ac_band]; + if idx > last_idx { + huff.encode_eob(mdl)?; + break; + } + let val = coeffs[scan[idx]]; + huff.encode_val(val, mdl)?; + + idx += 1; + last_val = val; + + if val == 0 { + let first_idx = idx; + while idx < 64 && coeffs[scan[idx]] == 0 { + idx += 1; + } + let zrun = idx - first_idx; + huff.encode_zero_run(zrun, &model.zero_run_tree[if first_idx >= 7 { 1 } else { 0 }])?; + } + } + + Ok(()) +} diff --git a/nihav-duck/src/codecs/vp6enc/mb.rs b/nihav-duck/src/codecs/vp6enc/mb.rs new file mode 100644 index 0000000..de480ca --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/mb.rs @@ -0,0 +1,710 @@ +use nihav_core::frame::*; +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::super::vpcommon::*; +use super::VP56DCPred; +use super::dsp::*; +use super::rdo::*; + +/*#[cfg(debug_assertions)] +use std::io::Write; +#[cfg(debug_assertions)] +use std::fs::File; +#[cfg(debug_assertions)] +pub fn dump_pgm(vbuf: &NAVideoBuffer, name: &str) { + let dst = vbuf.get_data(); + let (w, h) = vbuf.get_dimensions(0); + let mut file = File::create(name).unwrap(); + file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).unwrap(); + for row in dst[vbuf.get_offset(0)..].chunks(vbuf.get_stride(0)).take(h).rev() { + file.write_all(row).unwrap(); + } + for (row1, row2) in dst[vbuf.get_offset(1)..].chunks(vbuf.get_stride(1)).take(h / 2).zip(dst[vbuf.get_offset(2)..].chunks(vbuf.get_stride(2))).rev() { + file.write_all(row1).unwrap(); + file.write_all(row2).unwrap(); + } +}*/ + +pub type Coeffs = [[i16; 64]; 6]; + +#[derive(Clone)] +pub struct ResidueMB { + pub coeffs: Coeffs, +} + +impl ResidueMB { + fn new() -> Self { + Self { + coeffs: [[0; 64]; 6], + } + } + fn fdct(&mut self) { + for blk in self.coeffs.iter_mut() { + vp_fdct(blk); + } + } + fn idct(&mut self) { + for blk in self.coeffs.iter_mut() { + vp_idct(blk); + } + } + fn quant(&mut self, q: usize) { + for blk in self.coeffs.iter_mut() { + if blk[0] != 0 { + blk[0] /= VP56_DC_QUANTS[q] * 4; + } + for coef in blk[1..].iter_mut() { + if *coef != 0 { + *coef /= VP56_AC_QUANTS[q] * 4; + } + } + } + } + fn dequant(&mut self, q: usize) { + for blk in self.coeffs.iter_mut() { + if blk[0] != 0 { + blk[0] *= VP56_DC_QUANTS[q] * 4; + } + for coef in blk[1..].iter_mut() { + if *coef != 0 { + *coef *= VP56_AC_QUANTS[q] * 4; + } + } + } + } + fn dequant_from(&mut self, src: &Self, q: usize) { + for (dblk, sblk) in self.coeffs.iter_mut().zip(src.coeffs.iter()) { + dblk[0] = if sblk[0] != 0 { sblk[0] * VP56_DC_QUANTS[q] * 4 } else { 0 }; + for (dcoef, &scoef) in dblk[1..].iter_mut().zip(sblk[1..].iter()) { + *dcoef = if scoef != 0 { scoef * VP56_AC_QUANTS[q] * 4 } else { 0 }; + } + } + } + fn fill(&self, dst: &mut [[u8; 64]; 6]) { + for (dblk, sblk) in dst.iter_mut().zip(self.coeffs.iter()) { + for (dcoef, &scoef) in dblk.iter_mut().zip(sblk.iter()) { + *dcoef = scoef as u8; + } + } + } +} + +#[derive(Clone)] +pub struct InterMB { + pub residue: ResidueMB, + pub reference: Coeffs, + pub mv: [MV; 4], +} + +impl InterMB { + fn new() -> Self { + Self { + residue: ResidueMB::new(), + reference: [[0; 64]; 6], + mv: [ZERO_MV; 4], + } + } +} + +const VP56_DC_QUANTS: [i16; 64] = [ + 47, 47, 47, 47, 45, 43, 43, 43, + 43, 43, 42, 41, 41, 40, 40, 40, + 40, 35, 35, 35, 35, 33, 33, 33, + 33, 32, 32, 32, 27, 27, 26, 26, + 25, 25, 24, 24, 23, 23, 19, 19, + 19, 19, 18, 18, 17, 16, 16, 16, + 16, 16, 15, 11, 11, 11, 10, 10, + 9, 8, 7, 5, 3, 3, 2, 2 +]; +const VP56_AC_QUANTS: [i16; 64] = [ + 94, 92, 90, 88, 86, 82, 78, 74, + 70, 66, 62, 58, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 40, 39, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1 +]; + +const VP56_FILTER_LIMITS: [u8; 64] = [ + 14, 14, 13, 13, 12, 12, 10, 10, + 10, 10, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 7, 7, 7, 7, + 7, 7, 6, 6, 6, 6, 6, 6, + 5, 5, 5, 5, 4, 4, 4, 4, + 4, 4, 4, 3, 3, 3, 3, 2 +]; + +#[derive(Default)] +pub struct FrameEncoder { + pub quant: usize, + pub src_mbs: Vec, + pub intra_mbs: Vec, + pub inter_mbs: Vec, + pub fourmv_mbs: Vec, + pub golden_mbs: Vec, + + pub mb_types: Vec, + pub num_mv: Vec, + pub coded_mv: Vec<[MV; 4]>, + pub fmv_sub: Vec<[VPMBType; 4]>, + + pub mb_w: usize, + pub mb_h: usize, + + pub me_mode: MVSearchMode, + pub me_range: i16, +} + +macro_rules! read_block { + ($dst: expr, $src: expr, $stride: expr) => { + for (drow, srow) in $dst.chunks_mut(8).zip($src.chunks($stride).take(8)) { + for (dst, &src) in drow.iter_mut().zip(srow.iter()) { + *dst = i16::from(src); + } + } + } +} + +macro_rules! write_block { + ($dst: expr, $src: expr, $stride: expr) => { + for (drow, srow) in $dst.chunks_mut($stride).take(8).zip($src.chunks(8)) { + drow[..8].copy_from_slice(srow); + } + } +} + +impl FrameEncoder { + pub fn new() -> Self { Self::default() } + pub fn resize(&mut self, mb_w: usize, mb_h: usize) { + self.mb_w = mb_w; + self.mb_h = mb_h; + + let num_mbs = self.mb_w * self.mb_h; + self.src_mbs.clear(); + self.src_mbs.reserve(num_mbs); + self.intra_mbs.clear(); + self.intra_mbs.reserve(num_mbs); + self.inter_mbs.clear(); + self.inter_mbs.reserve(num_mbs); + self.fourmv_mbs.clear(); + self.fourmv_mbs.reserve(num_mbs); + self.golden_mbs.clear(); + self.golden_mbs.reserve(num_mbs); + + self.mb_types.clear(); + self.mb_types.reserve(num_mbs); + self.num_mv.clear(); + self.num_mv.reserve(num_mbs); + self.coded_mv.clear(); + self.coded_mv.reserve(num_mbs); + self.fmv_sub.clear(); + self.fmv_sub.reserve(num_mbs); + } + pub fn set_quant(&mut self, quant: usize) { self.quant = quant; } + pub fn read_mbs(&mut self, vbuf: &NAVideoBuffer) { + let src = vbuf.get_data(); + let y = &src[vbuf.get_offset(0)..]; + let ystride = vbuf.get_stride(0); + let u = &src[vbuf.get_offset(1)..]; + let ustride = vbuf.get_stride(1); + let v = &src[vbuf.get_offset(2)..]; + let vstride = vbuf.get_stride(2); + let (w, _) = vbuf.get_dimensions(0); + + self.src_mbs.clear(); + for (ys, (us, vs)) in y.chunks(ystride * 16).zip(u.chunks(ustride * 8).zip(v.chunks(vstride * 8))) { + for x in (0..w).step_by(16) { + let mut mb = ResidueMB::new(); + for (i, blk) in mb.coeffs[..4].iter_mut().enumerate() { + read_block!(blk, ys[x + (i & 1) * 8 + (i >> 1) * 8 * ystride..], ystride); + } + read_block!(mb.coeffs[4], us[x/2..], ustride); + read_block!(mb.coeffs[5], vs[x/2..], vstride); + self.src_mbs.push(mb); + } + } + } + pub fn reconstruct_frame(&mut self, dc_pred: &mut VP56DCPred, mut vbuf: NAVideoBufferRef) { + let mut blocks = [[0u8; 64]; 6]; + + let mut yoff = vbuf.get_offset(0); + let mut uoff = vbuf.get_offset(1); + let mut voff = vbuf.get_offset(2); + let ystride = vbuf.get_stride(0); + let ustride = vbuf.get_stride(1); + let vstride = vbuf.get_stride(2); + let dst = vbuf.get_data_mut().unwrap(); + + dc_pred.reset(); + + let quant = self.quant; + let mut mb_pos = 0; + for _mb_y in 0..self.mb_h { + for mb_x in 0..self.mb_w { + let mb_type = self.mb_types[mb_pos]; + let mb = self.get_mb_mut(mb_pos); + for (i, blk) in mb.coeffs.iter_mut().enumerate() { + dc_pred.predict_dc(mb_type, i, blk, false); + } + mb.dequant(quant); + mb.idct(); + let mb = self.get_mb(mb_pos); + if mb_type.is_intra() { + for (dblk, sblk) in blocks.iter_mut().zip(mb.coeffs.iter()) { + for (dcoef, &scoef) in dblk.iter_mut().zip(sblk.iter()) { + *dcoef = (scoef + 128).max(0).min(255) as u8; + } + } + } else { + let res_mb = match mb_type.get_ref_id() { + 0 => unreachable!(), + 1 => if mb_type != VPMBType::InterFourMV { + &self.inter_mbs[mb_pos].reference + } else { + &self.fourmv_mbs[mb_pos].reference + }, + _ => &self.golden_mbs[mb_pos].reference, + }; + + for (dblk, (sblk1, sblk2)) in blocks.iter_mut().zip(mb.coeffs.iter().zip(res_mb.iter())) { + for (dcoef, (&scoef1, &scoef2)) in dblk.iter_mut().zip(sblk1.iter().zip(sblk2.iter())) { + *dcoef = (scoef1 + scoef2).max(0).min(255) as u8; + } + } + } + + for i in 0..4 { + write_block!(&mut dst[yoff + mb_x * 16 + (i & 1) * 8 + (i >> 1) * 8 * ystride..], + blocks[i], ystride); + } + write_block!(&mut dst[uoff + mb_x * 8..], blocks[4], ustride); + write_block!(&mut dst[voff + mb_x * 8..], blocks[5], vstride); + + dc_pred.next_mb(); + mb_pos += 1; + } + yoff += ystride * 16; + uoff += ustride * 8; + voff += vstride * 8; + dc_pred.update_row(); + } + /*#[cfg(debug_assertions)] + dump_pgm(&vbuf, "/home/kst/devel/NihAV-rust/assets/test_out/debug.pgm");*/ + } + pub fn get_mb(&self, mb_pos: usize) -> &ResidueMB { + let mb_type = self.mb_types[mb_pos]; + match mb_type.get_ref_id() { + 0 => &self.intra_mbs[mb_pos], + 1 => if mb_type != VPMBType::InterFourMV { + &self.inter_mbs[mb_pos].residue + } else { + &self.fourmv_mbs[mb_pos].residue + }, + _ => &self.golden_mbs[mb_pos].residue, + } + } + fn get_mb_mut(&mut self, mb_pos: usize) -> &mut ResidueMB { + let mb_type = self.mb_types[mb_pos]; + match mb_type.get_ref_id() { + 0 => &mut self.intra_mbs[mb_pos], + 1 => if mb_type != VPMBType::InterFourMV { + &mut self.inter_mbs[mb_pos].residue + } else { + &mut self.fourmv_mbs[mb_pos].residue + }, + _ => &mut self.golden_mbs[mb_pos].residue, + } + } + pub fn prepare_intra_blocks(&mut self) { + self.intra_mbs.clear(); + self.mb_types.clear(); + for smb in self.src_mbs.iter() { + let mut dmb = smb.clone(); + dmb.fdct(); + for blk in dmb.coeffs.iter_mut() { + blk[0] -= 4096; + } + dmb.quant(self.quant); + self.mb_types.push(VPMBType::Intra); + self.intra_mbs.push(dmb); + } + } + pub fn prepare_inter_blocks(&mut self, golden: bool) { + let inter_mbs = if !golden { &mut self.inter_mbs } else { &mut self.golden_mbs }; + for (mb_idx, mb) in inter_mbs.iter_mut().enumerate() { + mb.residue.fdct(); + mb.residue.quant(self.quant); + self.mb_types[mb_idx] = VPMBType::InterMV; + } + } + pub fn estimate_mvs(&mut self, ref_frame: NAVideoBufferRef, mc_buf: NAVideoBufferRef, golden: bool) { + let loop_thr = i16::from(VP56_FILTER_LIMITS[self.quant as usize]); + + let inter_mbs = if !golden { &mut self.inter_mbs } else { &mut self.golden_mbs }; + + if inter_mbs.is_empty() { + for _ in 0..self.mb_w * self.mb_h { + inter_mbs.push(InterMB::new()); + } + } + + let mut cur_blk = [[0u8; 64]; 6]; + + let mut mv_est = MVEstimator::new(ref_frame, mc_buf, loop_thr, self.me_range); + + let mut mv_search: Box = match self.me_mode { + MVSearchMode::Full => Box::new(FullMVSearch::new()), + MVSearchMode::Diamond => Box::new(DiaSearch::new()), + MVSearchMode::Hexagon => Box::new(HexSearch::new()), + }; + let mut mb_pos = 0; + for (mb_y, row) in inter_mbs.chunks_mut(self.mb_w).enumerate() { + for (mb_x, mb) in row.iter_mut().enumerate() { + self.src_mbs[mb_pos].fill(&mut cur_blk); + + let (best_mv, _best_dist) = mv_search.search_mb(&mut mv_est, &cur_blk, mb_x, mb_y); + mb.mv[3] = best_mv; + + for i in 0..4 { + mv_est.mc_block(i, 0, mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8, best_mv); + sub_blk(&mut mb.residue.coeffs[i], &cur_blk[i], &mv_est.ref_blk[i]); + } + for plane in 1..3 { + mv_est.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, best_mv); + sub_blk(&mut mb.residue.coeffs[plane + 3], &cur_blk[plane + 3], &mv_est.ref_blk[plane + 3]); + } + + for (dblk, sblk) in mb.reference.iter_mut().zip(mv_est.ref_blk.iter()) { + for (dst, &src) in dblk.iter_mut().zip(sblk.iter()) { + *dst = i16::from(src); + } + } + mb_pos += 1; + } + } + } + fn estimate_fourmv(&mut self, ref_frame: NAVideoBufferRef, mc_buf: NAVideoBufferRef, mb_x: usize, mb_y: usize) -> bool { + let loop_thr = i16::from(VP56_FILTER_LIMITS[self.quant as usize]); + + if self.fourmv_mbs.is_empty() { + for _ in 0..self.mb_w * self.mb_h { + self.fourmv_mbs.push(InterMB::new()); + } + } + if self.fmv_sub.is_empty() { + self.fmv_sub.resize(self.mb_w * self.mb_h, [VPMBType::Intra; 4]); + } + + let mb_pos = mb_x + mb_y * self.mb_w; + let mb = &mut self.fourmv_mbs[mb_pos]; + + let mut cur_blk = [[0u8; 64]; 6]; + self.src_mbs[mb_pos].fill(&mut cur_blk); + + let mut mv_est = MVEstimator::new(ref_frame, mc_buf, loop_thr, self.me_range); + + let mut mv_search: Box = match self.me_mode { + MVSearchMode::Full => Box::new(FullMVSearch::new()), + MVSearchMode::Diamond => Box::new(DiaSearch::new()), + MVSearchMode::Hexagon => Box::new(HexSearch::new()), + }; + + for i in 0..4 { + let xpos = mb_x * 16 + (i & 1) * 8; + let ypos = mb_y * 16 + (i >> 1) * 8; + let (best_mv, _best_dist) = mv_search.search_blk(&mut mv_est, &cur_blk[i], xpos, ypos); + mb.mv[i] = best_mv; + } + let mvsum = mb.mv[0] + mb.mv[1] + mb.mv[2] + mb.mv[3]; + let chroma_mv = MV{ x: mvsum.x / 4, y: mvsum.y / 4}; + + for (i, blk) in mb.residue.coeffs[..4].iter_mut().enumerate() { + let xpos = mb_x * 16 + (i & 1) * 8; + let ypos = mb_y * 16 + (i >> 1) * 8; + mv_est.mc_block(i, 0, xpos, ypos, mb.mv[i]); + sub_blk(blk, &cur_blk[i], &mv_est.ref_blk[i]); + } + for plane in 1..3 { + mv_est.mc_block(plane + 3, plane, mb_x * 8, mb_y * 8, chroma_mv); + sub_blk(&mut mb.residue.coeffs[plane + 3], &cur_blk[plane + 3], &mv_est.ref_blk[plane + 3]); + } + + for (dblk, sblk) in mb.reference.iter_mut().zip(mv_est.ref_blk.iter()) { + for (dst, &src) in dblk.iter_mut().zip(sblk.iter()) { + *dst = i16::from(src); + } + } + + (mb.mv[0] != mb.mv[1]) || (mb.mv[0] != mb.mv[2]) || (mb.mv[0] != mb.mv[3]) + } + pub fn select_inter_blocks(&mut self, ref_frame: NAVideoBufferRef, mc_buf: NAVideoBufferRef, has_golden_frame: bool, lambda: f32) { + let mut tmp_mb = ResidueMB::new(); + for mb_idx in 0..self.mb_w * self.mb_h { + tmp_mb.dequant_from(&self.intra_mbs[mb_idx], self.quant); + tmp_mb.idct(); + for blk in tmp_mb.coeffs.iter_mut() { + for coef in blk.iter_mut() { + *coef = (*coef + 128).max(0).min(255); + } + } + let intra_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb); + let intra_nits = estimate_intra_mb_nits(&self.intra_mbs[mb_idx].coeffs, self.quant); + let intra_cost = (intra_dist as f32) + lambda * (intra_nits as f32); + + tmp_mb.dequant_from(&self.inter_mbs[mb_idx].residue, self.quant); + tmp_mb.idct(); + for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.inter_mbs[mb_idx].reference.iter()) { + for (coef, add) in blk.iter_mut().zip(res.iter()) { + *coef = (*coef + add).max(0).min(255); + } + } + let inter_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb); + let mut inter_nits = estimate_inter_mb_nits(&self.inter_mbs[mb_idx], self.quant, false); + if self.inter_mbs[mb_idx].mv[3] != ZERO_MV { + inter_nits += estimate_mv_nits(self.inter_mbs[mb_idx].mv[3]); + } + let mut inter_cost = (inter_dist as f32) + lambda * (inter_nits as f32); + + if inter_cost < intra_cost { + self.mb_types[mb_idx] = VPMBType::InterMV; + + if inter_dist > 512 { + self.estimate_fourmv(ref_frame.clone(), mc_buf.clone(), mb_idx % self.mb_w, mb_idx / self.mb_w); + self.fourmv_mbs[mb_idx].residue.fdct(); + self.fourmv_mbs[mb_idx].residue.quant(self.quant); + + tmp_mb.dequant_from(&self.fourmv_mbs[mb_idx].residue, self.quant); + tmp_mb.idct(); + for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.fourmv_mbs[mb_idx].reference.iter()) { + for (coef, add) in blk.iter_mut().zip(res.iter()) { + *coef = (*coef + add).max(0).min(255); + } + } + let fourmv_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb); + let fourmv_nits = estimate_inter_mb_nits(&self.fourmv_mbs[mb_idx], self.quant, true); + let fourmv_cost = (fourmv_dist as f32) + lambda * (fourmv_nits as f32); + if fourmv_cost < inter_cost { + self.mb_types[mb_idx] = VPMBType::InterFourMV; + inter_cost = fourmv_cost; + } + } + } + + if has_golden_frame { + tmp_mb.dequant_from(&self.golden_mbs[mb_idx].residue, self.quant); + tmp_mb.idct(); + for (blk, res) in tmp_mb.coeffs.iter_mut().zip(self.golden_mbs[mb_idx].reference.iter()) { + for (coef, add) in blk.iter_mut().zip(res.iter()) { + *coef = (*coef + add).max(0).min(255); + } + } + let golden_dist = calc_mb_dist(&self.src_mbs[mb_idx], &tmp_mb); + let golden_nits = estimate_inter_mb_nits(&self.golden_mbs[mb_idx], self.quant, false); + let golden_cost = (golden_dist as f32) + lambda * (golden_nits as f32); + + if (self.mb_types[mb_idx].is_intra() && golden_cost < intra_cost) || + (!self.mb_types[mb_idx].is_intra() && golden_cost < inter_cost) { + self.mb_types[mb_idx] = VPMBType::GoldenMV; + } + } + } + } + pub fn decide_frame_type(&self) -> (bool, bool) { + let mut intra_count = 0usize; + let mut non_intra = 0usize; + for mb_type in self.mb_types.iter() { + if mb_type.is_intra() { + intra_count += 1; + } else { + non_intra += 1; + } + } + (intra_count > non_intra * 3, intra_count > non_intra) + } + fn find_mv_pred(&self, mb_x: usize, mb_y: usize, ref_id: u8) -> (usize, MV, MV, MV) { + const CAND_POS: [(i8, i8); 12] = [ + (-1, 0), ( 0, -1), + (-1, -1), (-1, 1), + (-2, 0), ( 0, -2), + (-1, -2), (-2, -1), + (-2, 1), (-1, 2), + (-2, -2), (-2, 2) + ]; + + let mut nearest_mv = ZERO_MV; + let mut near_mv = ZERO_MV; + let mut pred_mv = ZERO_MV; + let mut num_mv: usize = 0; + + for (i, (yoff, xoff)) in CAND_POS.iter().enumerate() { + let cx = (mb_x as isize) + (*xoff as isize); + let cy = (mb_y as isize) + (*yoff as isize); + if (cx < 0) || (cy < 0) { + continue; + } + let cx = cx as usize; + let cy = cy as usize; + if (cx >= self.mb_w) || (cy >= self.mb_h) { + continue; + } + let mb_pos = cx + cy * self.mb_w; + let mv = match self.mb_types[mb_pos].get_ref_id() { + 0 => ZERO_MV, + 1 => if self.mb_types[mb_pos] != VPMBType::InterFourMV { + self.inter_mbs[mb_pos].mv[3] + } else { + self.fourmv_mbs[mb_pos].mv[3] + }, + _ => self.golden_mbs[mb_pos].mv[3], + }; + if (self.mb_types[mb_pos].get_ref_id() != ref_id) || (mv == ZERO_MV) { + continue; + } + if num_mv == 0 { + nearest_mv = mv; + num_mv += 1; + if i < 2 { + pred_mv = mv; + } + } else if mv != nearest_mv { + near_mv = mv; + num_mv += 1; + break; + } + } + + (num_mv, nearest_mv, near_mv, pred_mv) + } + pub fn predict_mvs(&mut self) { + let mut mb_idx = 0; + self.num_mv.clear(); + if self.coded_mv.is_empty() { + self.coded_mv.resize(self.mb_w * self.mb_h, [ZERO_MV; 4]); + } + for mb_y in 0..self.mb_h { + for mb_x in 0..self.mb_w { + let (num_mv, nearest_mv, near_mv, pred_mv) = self.find_mv_pred(mb_x, mb_y, VP_REF_INTER); + let mb_type = self.mb_types[mb_idx]; + self.num_mv.push(num_mv as u8); + let golden = mb_type.get_ref_id() == VP_REF_GOLDEN; + let mv = if !golden { self.inter_mbs[mb_idx].mv[3] } else { self.golden_mbs[mb_idx].mv[3] }; + + let mb_type = if mb_type == VPMBType::Intra { + VPMBType::Intra + } else if mb_type == VPMBType::InterFourMV { + for i in 0..4 { + let mv = self.fourmv_mbs[mb_idx].mv[i]; + self.coded_mv[mb_idx][i] = ZERO_MV; + if mv == ZERO_MV { + self.fmv_sub[mb_idx][i] = VPMBType::InterNoMV; + } else { + self.fmv_sub[mb_idx][i] = match num_mv { + 0 => { + self.coded_mv[mb_idx][i] = mv - pred_mv; + VPMBType::InterMV + }, + 1 => { + if nearest_mv == mv { + VPMBType::InterNearest + } else { + self.coded_mv[mb_idx][i] = mv - pred_mv; + VPMBType::InterMV + } + }, + _ => { + if nearest_mv == mv { + VPMBType::InterNearest + } else if near_mv == mv { + VPMBType::InterNear + } else { + self.coded_mv[mb_idx][i] = mv - pred_mv; + VPMBType::InterMV + } + }, + }; + } + } + VPMBType::InterFourMV + } else if mv == ZERO_MV { + if !golden { + VPMBType::InterNoMV + } else { + VPMBType::GoldenNoMV + } + } else if mb_type.get_ref_id() == VP_REF_INTER { + self.coded_mv[mb_idx][3] = mv; + match num_mv { + 0 => VPMBType::InterMV, + 1 => { + if nearest_mv == mv { + VPMBType::InterNearest + } else { + self.coded_mv[mb_idx][3] = mv - pred_mv; + VPMBType::InterMV + } + }, + _ => { + if nearest_mv == mv { + VPMBType::InterNearest + } else if near_mv == mv { + VPMBType::InterNear + } else { + self.coded_mv[mb_idx][3] = mv - pred_mv; + VPMBType::InterMV + } + }, + } + } else { + let (num_mv, nearest_mv, near_mv, pred_mv) = self.find_mv_pred(mb_x, mb_y, VP_REF_GOLDEN); + self.coded_mv[mb_idx][3] = ZERO_MV; + match num_mv { + 0 => { + self.coded_mv[mb_idx][3] = mv - pred_mv; + VPMBType::GoldenMV + }, + 1 => { + if nearest_mv == mv { + VPMBType::GoldenNearest + } else { + self.coded_mv[mb_idx][3] = mv - pred_mv; + VPMBType::GoldenMV + } + }, + _ => { + if nearest_mv == mv { + VPMBType::GoldenNearest + } else if near_mv == mv { + VPMBType::GoldenNear + } else { + self.coded_mv[mb_idx][3] = mv - pred_mv; + VPMBType::GoldenMV + } + }, + } + }; + self.mb_types[mb_idx] = mb_type; + mb_idx += 1; + } + } + } + pub fn apply_dc_prediction(&mut self, dc_pred: &mut VP56DCPred) { + dc_pred.reset(); + + let mut mb_idx = 0; + for _mb_y in 0..self.mb_h { + for _mb_x in 0..self.mb_w { + let mb_type = self.mb_types[mb_idx]; + let mb = self.get_mb_mut(mb_idx); + for (i, blk) in mb.coeffs.iter_mut().enumerate() { + dc_pred.predict_dc(mb_type, i, blk, true); + } + dc_pred.next_mb(); + mb_idx += 1; + } + dc_pred.update_row(); + } + } +} diff --git a/nihav-duck/src/codecs/vp6enc/mod.rs b/nihav-duck/src/codecs/vp6enc/mod.rs new file mode 100644 index 0000000..0ffceb2 --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/mod.rs @@ -0,0 +1,907 @@ +use nihav_core::codecs::*; +use nihav_core::io::byteio::*; +use super::vp6data::*; +use super::vpcommon::*; + +mod coder; +use coder::*; +mod dsp; +use dsp::MVSearchMode; +mod huff; +use huff::*; +mod mb; +use mb::*; +mod models; +use models::*; +mod ratectl; +use ratectl::*; +mod rdo; + +enum VP6Writer<'a, 'b> { + BoolCoder(BoolEncoder<'a, 'b>), + Huffman(HuffEncoder<'a, 'b>), +} + +#[derive(Default)] +pub struct VP56DCPred { + dc_y: Vec, + dc_u: Vec, + dc_v: Vec, + ldc_y: [i16; 2], + ldc_u: i16, + ldc_v: i16, + ref_y: Vec, + ref_c: Vec, + ref_left: u8, + y_idx: usize, + c_idx: usize, + + last_dc: [[i16; 4]; 3], +} + +const INVALID_REF: u8 = 42; + +impl VP56DCPred { + fn new() -> Self { Self::default() } + fn resize(&mut self, mb_w: usize) { + self.dc_y.resize(mb_w * 2 + 2, 0); + self.dc_u.resize(mb_w + 2, 0); + self.dc_v.resize(mb_w + 2, 0); + self.ref_y.resize(mb_w * 2 + 2, INVALID_REF); + self.ref_c.resize(mb_w + 2, INVALID_REF); + self.ref_c[0] = 0; + } + fn reset(&mut self) { + self.update_row(); + for el in self.ref_y.iter_mut().skip(1) { *el = INVALID_REF; } + for el in self.ref_c.iter_mut().skip(1) { *el = INVALID_REF; } + + self.last_dc = [[0; 4]; 3]; + self.last_dc[0][1] = 0x80; + self.last_dc[0][2] = 0x80; + } + fn update_row(&mut self) { + self.y_idx = 1; + self.c_idx = 1; + self.ldc_y = [0; 2]; + self.ldc_u = 0; + self.ldc_v = 0; + self.ref_left = INVALID_REF; + } + fn next_mb(&mut self) { + self.y_idx += 2; + self.c_idx += 1; + } + fn predict_dc(&mut self, mb_type: VPMBType, blk_no: usize, coeffs: &mut [i16; 64], fwd: bool) { + let is_luma = blk_no < 4; + let (plane, dcs) = match blk_no { + 4 => (1, &mut self.dc_u), + 5 => (2, &mut self.dc_v), + _ => (0, &mut self.dc_y), + }; + let (dc_ref, dc_idx) = if is_luma { + (&mut self.ref_y, self.y_idx + (blk_no & 1)) + } else { + (&mut self.ref_c, self.c_idx) + }; + let ref_id = mb_type.get_ref_id(); + let mut dc_pred = 0; + let mut count = 0; + let has_left_blk = is_luma && ((blk_no & 1) == 1); + if has_left_blk || self.ref_left == ref_id { + dc_pred += match blk_no { + 0 | 1 => self.ldc_y[0], + 2 | 3 => self.ldc_y[1], + 4 => self.ldc_u, + _ => self.ldc_v, + }; + count += 1; + } + if dc_ref[dc_idx] == ref_id { + dc_pred += dcs[dc_idx]; + count += 1; + } + if count == 0 { + dc_pred = self.last_dc[ref_id as usize][plane]; + } else if count == 2 { + dc_pred /= 2; + } + if !fwd { + coeffs[0] += dc_pred; + } + + let dc = coeffs[0]; + if blk_no != 4 { // update top block reference only for the second chroma component + dc_ref[dc_idx] = ref_id; + } + match blk_no { + 0 | 1 => { + self.ldc_y[0] = dc; + }, + 2 | 3 => { + self.ldc_y[1] = dc; + }, + 4 => { + self.ldc_u = dc; + }, + _ => { + self.ldc_v = dc; + self.ref_left = ref_id; + }, + }; + dcs[dc_idx] = dc; + + self.last_dc[ref_id as usize][plane] = dc; + if fwd { + coeffs[0] -= dc_pred; + } + } +} + +trait ZeroBlock { + fn no_dc(&self) -> bool; + fn no_ac(&self) -> bool; +} + +impl ZeroBlock for [i16; 64] { + fn no_dc(&self) -> bool { + self[0] == 0 + } + fn no_ac(&self) -> bool { + for &el in self[1..].iter() { + if el != 0 { + return false; + } + } + true + } +} + +struct VP6Encoder { + stream: Option, + pkt: Option, + key_int: u8, + frmcount: u8, + mb_w: usize, + mb_h: usize, + dc_pred: VP56DCPred, + top_ctx: [Vec; 4], + mc_buf: NAVideoBufferRef, + fenc: FrameEncoder, + ratectl: RateControl, + + huffman: bool, + + version: u8, + profile: u8, + + models: VP56Models, + stats: VP56ModelsStat, + pmodels: VP56Models, + + last_frame: NABufferType, + gold_frame: NABufferType, + last_gold: bool, + me_mode: MVSearchMode, + me_range: i16, + + force_q: Option, +} + +impl VP6Encoder { + fn new() -> Self { + let vt = alloc_video_buffer(NAVideoInfo::new(24, 24, false, VP_YUVA420_FORMAT), 4).unwrap(); + let mc_buf = vt.get_vbuf().unwrap(); + Self { + stream: None, + pkt: None, + key_int: 10, + frmcount: 0, + mb_w: 0, + mb_h: 0, + dc_pred: VP56DCPred::new(), + top_ctx: [Vec::new(), Vec::new(), Vec::new(), Vec::new()], + fenc: FrameEncoder::new(), + ratectl: RateControl::new(), + mc_buf, + + huffman: false, + + version: VERSION_VP60, + profile: VP6_SIMPLE_PROFILE, + + models: VP56Models::new(), + pmodels: VP56Models::new(), + stats: VP56ModelsStat::new(), + + last_frame: NABufferType::None, + gold_frame: NABufferType::None, + last_gold: false, + me_mode: MVSearchMode::default(), + me_range: 16, + + force_q: None, + } + } + fn decide_encoding(&mut self) -> bool { + false + } + fn estimate_blocks(&mut self, is_intra: bool) { + for top_ctx in self.top_ctx.iter_mut() { + for el in top_ctx.iter_mut() { + *el = false; + } + } + let mut last_mbt = VPMBType::InterNoMV; + let mut mb_idx = 0; + for _mb_y in 0..self.mb_h { + let mut left_dc = [false; 4]; + for mb_x in 0..self.mb_w { + let mb_type = self.fenc.mb_types[mb_idx]; + if !is_intra { + estimate_mb_type(mb_type, last_mbt, ((self.fenc.num_mv[mb_idx] + 1) % 3) as usize, &mut self.stats); + last_mbt = mb_type; + match mb_type { + VPMBType::InterMV | VPMBType::GoldenMV => { + estimate_mv(self.fenc.coded_mv[mb_idx][3], &mut self.stats); + }, + VPMBType::InterFourMV => { + for (&sub_type, &mv) in self.fenc.fmv_sub[mb_idx].iter().zip(self.fenc.coded_mv[mb_idx].iter()) { + if sub_type == VPMBType::InterMV { + estimate_mv(mv, &mut self.stats); + } + } + }, + _ => {}, + }; + } + let mb = self.fenc.get_mb(mb_idx); + for i in 0..4 { + let cur_idx = mb_x * 2 + (i & 1); + let mut dc_mode = 0; + if self.top_ctx[0][cur_idx] { + dc_mode += 1; + } + if left_dc[i >> 1] { + dc_mode += 1; + } + self.top_ctx[0][cur_idx] = mb.coeffs[i][0] != 0; + left_dc[i >> 1] = mb.coeffs[i][0] != 0; + estimate_block(&mb.coeffs[i], dc_mode, &mut self.stats.coeff_models[0], &mut self.stats.vp6models, &self.models.vp6models.zigzag); + } + + let mut dc_mode = 0; + if self.top_ctx[1][mb_x] { + dc_mode += 1; + } + if left_dc[2] { + dc_mode += 1; + } + self.top_ctx[1][mb_x] = mb.coeffs[4][0] != 0; + left_dc[2] = mb.coeffs[4][0] != 0; + estimate_block(&mb.coeffs[4], dc_mode, &mut self.stats.coeff_models[1], &mut self.stats.vp6models, &self.models.vp6models.zigzag); + + let mut dc_mode = 0; + if self.top_ctx[2][mb_x] { + dc_mode += 1; + } + if left_dc[3] { + dc_mode += 1; + } + self.top_ctx[2][mb_x] = mb.coeffs[5][0] != 0; + left_dc[3] = mb.coeffs[5][0] != 0; + estimate_block(&mb.coeffs[5], dc_mode, &mut self.stats.coeff_models[1], &mut self.stats.vp6models, &self.models.vp6models.zigzag); + + mb_idx += 1; + } + } + } + fn prepare_huff_models(&mut self) { + for i in 0..2 { + self.models.vp6huff.dc_token_tree[i].build_codes(&self.models.coeff_models[i].dc_value_probs); + } + for i in 0..2 { + for mode in 0..3 { + for band in 0..6 { + self.models.vp6huff.ac_token_tree[i][mode][band].build_codes(&self.models.coeff_models[i].ac_val_probs[mode][band]); + } + } + } + for i in 0..2 { + self.models.vp6huff.zero_run_tree[i].build_codes_zero_run(&self.models.vp6models.zero_run_probs[i]); + } + } + fn determine_coeff_runs_luma(&self, hstate: &mut HuffState, mb_pos: usize, blk: usize) { + let mb = self.fenc.get_mb(mb_pos); + + if !hstate.dc_zr_coded[0] { + if mb.coeffs[blk].no_dc() { + hstate.dc_zero_run[0] = 0; + let mut blk_no = (blk + 1) & 3; + let mut mb_no = mb_pos + ((blk + 1) >> 2); + let mut cmb = mb; + let mut last_mb_no = mb_pos; + while (hstate.dc_zero_run[0] < MAX_EOB_RUN) && (mb_no < self.mb_w * self.mb_h) { + if mb_no != last_mb_no { + cmb = self.fenc.get_mb(mb_no); + last_mb_no = mb_no; + } + if !cmb.coeffs[blk_no].no_dc() { + break; + } + hstate.dc_zero_run[0] += 1; + blk_no += 1; + if blk_no == 4 { + blk_no = 0; + mb_no += 1; + } + } + } + } + if !hstate.ac_zr_coded[0] { + if mb.coeffs[blk].no_ac() { + hstate.ac_zero_run[0] = 0; + let mut blk_no = (blk + 1) & 3; + let mut mb_no = mb_pos + ((blk + 1) >> 2); + let mut cmb = mb; + let mut last_mb_no = mb_pos; + while (hstate.ac_zero_run[0] < MAX_EOB_RUN) && (mb_no < self.mb_w * self.mb_h) { + if mb_no != last_mb_no { + cmb = self.fenc.get_mb(mb_no); + last_mb_no = mb_no; + } + if !cmb.coeffs[blk_no].no_ac() { + break; + } + hstate.ac_zero_run[0] += 1; + blk_no += 1; + if blk_no == 4 { + blk_no = 0; + mb_no += 1; + } + } + } + } + } + fn determine_coeff_runs_chroma(&self, hstate: &mut HuffState, mb_pos: usize, plane: usize) { + let mb = self.fenc.get_mb(mb_pos); + let blk = plane + 3; + + if !hstate.dc_zr_coded[1] { + if mb.coeffs[blk].no_dc() { + hstate.dc_zero_run[1] = 0; + let mut blk_no = if blk == 4 { 5 } else { 4 }; + let mut mb_no = mb_pos + if blk == 4 { 0 } else { 1 }; + while (hstate.dc_zero_run[1] < MAX_EOB_RUN) && (mb_no < self.mb_w * self.mb_h) { + let mb = self.fenc.get_mb(mb_no); + if !mb.coeffs[blk_no].no_dc() { + break; + } + hstate.dc_zero_run[1] += 1; + blk_no += 1; + if blk_no == 6 { + blk_no = 4; + mb_no += 1; + } + } + } + } + if !hstate.ac_zr_coded[1] { + if mb.coeffs[blk].no_ac() { + hstate.ac_zero_run[1] = 0; + let mut blk_no = if blk == 4 { 5 } else { 4 }; + let mut mb_no = mb_pos + if blk == 4 { 0 } else { 1 }; + while (hstate.ac_zero_run[1] < MAX_EOB_RUN) && (mb_no < self.mb_w * self.mb_h) { + let mb = self.fenc.get_mb(mb_no); + if !mb.coeffs[blk_no].no_ac() { + break; + } + hstate.ac_zero_run[1] += 1; + blk_no += 1; + if blk_no == 6 { + blk_no = 4; + mb_no += 1; + } + } + } + } + } + fn encode_intra(&mut self, bw: &mut ByteWriter, quant: usize) -> EncoderResult { + self.models.reset(false); + self.models.reset_mbtype_models(); + self.stats.reset(); + + self.pmodels.reset(false); + self.pmodels.reset_mbtype_models(); + + let multistream = self.huffman; + + self.fenc.prepare_intra_blocks(); + self.fenc.apply_dc_prediction(&mut self.dc_pred); + self.estimate_blocks(true); + self.stats.generate(&mut self.models, true); + + // header + bw.write_byte(((quant as u8) << 1) | (multistream as u8))?; + bw.write_byte((self.version << 3) | (self.profile << 1))?; + bw.write_u16be(0)?; // part 2 offset placeholder + + let mut bc = BoolEncoder::new(bw); + + bc.put_bits(self.mb_h as u32, 8)?; + bc.put_bits(self.mb_w as u32, 8)?; + bc.put_bits(self.mb_h as u32, 8)?; // display MB height + bc.put_bits(self.mb_w as u32, 8)?; // display MB width + bc.put_bits(0, 2)?; // scaline mode + // todo other advanced profile bits + bc.put_bits(self.huffman as u32, 1)?; // Huffman mode + + encode_coeff_models(&mut bc, &mut self.models, &self.pmodels, true, false)?; + self.pmodels = self.models.clone(); + + if multistream || (self.profile == VP6_SIMPLE_PROFILE) { + bc.flush()?; + + // patch coefficient offset + let offset = bw.tell(); + if offset >= 65535 { + return Err(EncoderError::Bug); + } + bw.seek(SeekFrom::Start(2))?; + bw.write_u16be(offset as u16)?; + bw.seek(SeekFrom::End(0))?; + + bc = BoolEncoder::new(bw); + } + let writer = if !self.huffman { + VP6Writer::BoolCoder(bc) + } else { + VP6Writer::Huffman(HuffEncoder::new(bw)) + }; + self.encode_coeffs(writer)?; + Ok(true) + } + fn encode_inter(&mut self, bw: &mut ByteWriter, quant: usize) -> EncoderResult { + self.stats.reset(); + + let multistream = !self.huffman; + let loop_filter = false; + + self.fenc.prepare_intra_blocks(); + self.fenc.prepare_inter_blocks(false); + if !self.last_gold { + self.fenc.prepare_inter_blocks(true); + } + let lambda = if self.force_q.is_some() { 1.0 } else { self.ratectl.lambda }; + self.fenc.select_inter_blocks(self.last_frame.get_vbuf().unwrap(), self.mc_buf.clone(), !self.last_gold, lambda); + // todo implement forced intra + let (_force_intra, golden_frame) = self.fenc.decide_frame_type(); + self.fenc.apply_dc_prediction(&mut self.dc_pred); + self.fenc.predict_mvs(); + self.estimate_blocks(false); + + self.stats.generate(&mut self.models, false); + + // header + bw.write_byte(0x80 | ((quant as u8) << 1) | (multistream as u8))?; + bw.write_u16be(0)?; // part 2 offset placeholder + + let mut bc = BoolEncoder::new(bw); + + bc.put_bits(golden_frame as u32, 1)?; // refresh golden frame + if self.profile == VP6_ADVANCED_PROFILE { + bc.put_bits(loop_filter as u32, 1)?; // use loop filter + if loop_filter { + bc.put_bits(0, 1)?; // loop filter selector + } + if self.version == VERSION_VP62 { + bc.put_bits(0, 1)?; // auto select PM + } + } + // todo other advanced profile bits + bc.put_bits(self.huffman as u32, 1)?; + + encode_mode_prob_models(&mut bc, &mut self.models, &self.pmodels, &self.stats.mbtype_models)?; + encode_mv_models(&mut bc, &self.models.mv_models, &self.pmodels.mv_models)?; + encode_coeff_models(&mut bc, &mut self.models, &self.pmodels, false, false)?; + self.pmodels = self.models.clone(); + + let mut last_mbt = VPMBType::InterNoMV; + for mb_idx in 0..self.mb_w * self.mb_h { + let mb_type = self.fenc.mb_types[mb_idx]; + encode_mb_type(&mut bc, self.fenc.mb_types[mb_idx], last_mbt, ((self.fenc.num_mv[mb_idx] + 1) % 3) as usize, &self.models)?; + last_mbt = mb_type; + match mb_type { + VPMBType::InterMV | VPMBType::GoldenMV => { + encode_mv(&mut bc, self.fenc.coded_mv[mb_idx][3], &self.models)?; + }, + VPMBType::InterFourMV => { + for &sub_type in self.fenc.fmv_sub[mb_idx].iter() { + let id = match sub_type { + VPMBType::InterNoMV => 0, + VPMBType::InterMV => 1, + VPMBType::InterNearest => 2, + VPMBType::InterNear => 3, + _ => unreachable!(), + }; + bc.put_bits(id, 2)?; + } + for (&sub_type, &mv) in self.fenc.fmv_sub[mb_idx].iter().zip(self.fenc.coded_mv[mb_idx].iter()) { + if sub_type == VPMBType::InterMV { + encode_mv(&mut bc, mv, &self.models)?; + } + } + }, + _ => {}, + }; + } + + if multistream || (self.profile == VP6_SIMPLE_PROFILE) { + bc.flush()?; + + // patch coefficient offset + let offset = bw.tell(); + if offset >= 65535 { + return Err(EncoderError::Bug); + } + bw.seek(SeekFrom::Start(1))?; + bw.write_u16be(offset as u16)?; + bw.seek(SeekFrom::End(0))?; + + bc = BoolEncoder::new(bw); + } + let writer = if !self.huffman { + VP6Writer::BoolCoder(bc) + } else { + VP6Writer::Huffman(HuffEncoder::new(bw)) + }; + self.encode_coeffs(writer)?; + Ok(golden_frame) + } + fn encode_coeffs(&mut self, mut writer: VP6Writer) -> EncoderResult<()> { + if self.huffman { + self.prepare_huff_models(); + } + + let mut hstate = HuffState::new(); + for top_ctx in self.top_ctx.iter_mut() { + for el in top_ctx.iter_mut() { + *el = false; + } + } + let mut mb_pos = 0; + for _mb_y in 0..self.mb_h { + let mut left_dc = [false; 4]; + for mb_x in 0..self.mb_w { + let mb = self.fenc.get_mb(mb_pos); + for i in 0..4 { + let cur_idx = mb_x * 2 + (i & 1); + let mut dc_mode = 0; + if self.top_ctx[0][cur_idx] { + dc_mode += 1; + } + if left_dc[i >> 1] { + dc_mode += 1; + } + self.top_ctx[0][cur_idx] = mb.coeffs[i][0] != 0; + left_dc[i >> 1] = mb.coeffs[i][0] != 0; + if self.huffman { + self.determine_coeff_runs_luma(&mut hstate, mb_pos, i); + } + match writer { + VP6Writer::BoolCoder(ref mut bc) => encode_block(bc, &mb.coeffs[i], dc_mode, &self.models.coeff_models[0], &self.models.vp6models)?, + VP6Writer::Huffman(ref mut huff) => encode_block_huff(huff, &self.models.vp6models.zigzag, &mb.coeffs[i], 0, &mut hstate, &self.models.vp6huff)?, + }; + } + + for plane in 1..3 { + let mut dc_mode = 0; + if self.top_ctx[plane][mb_x] { + dc_mode += 1; + } + if left_dc[plane + 1] { + dc_mode += 1; + } + self.top_ctx[plane][mb_x] = mb.coeffs[plane + 3][0] != 0; + left_dc[plane + 1] = mb.coeffs[plane + 3][0] != 0; + if self.huffman { + self.determine_coeff_runs_chroma(&mut hstate, mb_pos, plane); + } + match writer { + VP6Writer::BoolCoder(ref mut bc) => encode_block(bc, &mb.coeffs[plane + 3], dc_mode, &self.models.coeff_models[1], &self.models.vp6models)?, + VP6Writer::Huffman(ref mut huff) => encode_block_huff(huff, &self.models.vp6models.zigzag, &mb.coeffs[plane + 3], 1, &mut hstate, &self.models.vp6huff)?, + }; + } + + mb_pos += 1; + } + } + + match writer { + VP6Writer::BoolCoder(bc) => bc.flush()?, + VP6Writer::Huffman(huff) => huff.flush()?, + }; + + Ok(()) + } +} + +impl NAEncoder for VP6Encoder { + fn negotiate_format(&self, encinfo: &EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => { + let mut ofmt = EncodeParameters::default(); + ofmt.format = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, true, YUV420_FORMAT)); + Ok(ofmt) + }, + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + let outinfo = NAVideoInfo::new((vinfo.width + 3) & !3, (vinfo.height + 3) & !3, true, YUV420_FORMAT); + let mut ofmt = *encinfo; + ofmt.format = NACodecTypeInfo::Video(outinfo); + Ok(ofmt) + } + } + } + fn init(&mut self, stream_id: u32, encinfo: EncodeParameters) -> EncoderResult { + match encinfo.format { + NACodecTypeInfo::None => Err(EncoderError::FormatError), + NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError), + NACodecTypeInfo::Video(vinfo) => { + if vinfo.format != YUV420_FORMAT { + return Err(EncoderError::FormatError); + } + if ((vinfo.width | vinfo.height) & 15) != 0 { + return Err(EncoderError::FormatError); + } + if (vinfo.width | vinfo.height) >= (1 << 12) { + return Err(EncoderError::FormatError); + } + + let out_info = NAVideoInfo::new(vinfo.width, vinfo.height, true, vinfo.format); + let info = NACodecInfo::new("vp6", NACodecTypeInfo::Video(out_info), None); + let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den, 0); + stream.set_num(stream_id as usize); + let stream = stream.into_ref(); + + self.last_frame = alloc_video_buffer(out_info, 4)?; + self.gold_frame = alloc_video_buffer(out_info, 4)?; + + self.stream = Some(stream.clone()); + + self.mb_w = (vinfo.width + 15) >> 4; + self.mb_h = (vinfo.height + 15) >> 4; + self.fenc.resize(self.mb_w, self.mb_h); + self.ratectl.init(self.mb_w, self.mb_h, encinfo.bitrate, encinfo.tb_num, encinfo.tb_den); + + self.dc_pred.resize(self.mb_w); + self.top_ctx = [vec![false; self.mb_w * 2], vec![false; self.mb_w], vec![false; self.mb_w], vec![false; self.mb_w * 2]]; + + self.version = VERSION_VP60; + self.profile = VP6_SIMPLE_PROFILE; + + Ok(stream) + }, + } + } + fn encode(&mut self, frm: &NAFrame) -> EncoderResult<()> { + let buf = frm.get_buffer(); + if let Some(ref vbuf) = buf.get_vbuf() { + let mut dbuf = Vec::with_capacity(4); + let mut gw = GrowableMemoryWriter::new_write(&mut dbuf); + let mut bw = ByteWriter::new(&mut gw); + + // todo integrate with rate control + let is_intra = (self.frmcount == 0) || self.decide_encoding(); + let quant = if let Some(q) = self.force_q { + q + } else { + self.ratectl.guess_quant(is_intra, self.huffman) + }; + + self.fenc.read_mbs(vbuf); + self.fenc.set_quant(quant); + self.fenc.me_mode = self.me_mode; + self.fenc.me_range = self.me_range; + let golden_frame = if is_intra { + self.encode_intra(&mut bw, quant)? + } else { + self.fenc.estimate_mvs(self.last_frame.get_vbuf().unwrap(), self.mc_buf.clone(), false); + if !self.last_gold { + self.fenc.estimate_mvs(self.gold_frame.get_vbuf().unwrap(), self.mc_buf.clone(), true); + } + self.encode_inter(&mut bw, quant)? + }; + self.fenc.reconstruct_frame(&mut self.dc_pred, self.last_frame.get_vbuf().unwrap()); + self.last_gold = golden_frame; + if golden_frame { + let mut dfrm = self.gold_frame.get_vbuf().unwrap(); + let src = self.last_frame.get_vbuf().unwrap(); + + let dst = dfrm.get_data_mut().unwrap(); + dst.copy_from_slice(src.get_data()); + } + + if self.force_q.is_none() { + self.ratectl.update(dbuf.len() * 8); + } + + self.pkt = Some(NAPacket::new(self.stream.clone().unwrap(), frm.ts, is_intra, dbuf)); + if self.key_int > 0 { + self.frmcount += 1; + } + if self.frmcount == self.key_int { + self.frmcount = 0; + } + Ok(()) + } else { + Err(EncoderError::InvalidParameters) + } + } + fn get_packet(&mut self) -> EncoderResult> { + let mut npkt = None; + std::mem::swap(&mut self.pkt, &mut npkt); + Ok(npkt) + } + fn flush(&mut self) -> EncoderResult<()> { + self.frmcount = 0; + Ok(()) + } +} + +const HUFFMAN_OPTION: &str = "huffman"; +const QUANT_OPTION: &str = "quant"; +const MV_SEARCH_OPTION: &str = "mv_mode"; +const MV_RANGE_OPTION: &str = "mv_range"; + +const ENCODER_OPTS: &[NAOptionDefinition] = &[ + NAOptionDefinition { + name: KEYFRAME_OPTION, description: KEYFRAME_OPTION_DESC, + opt_type: NAOptionDefinitionType::Int(Some(0), Some(128)) }, + NAOptionDefinition { + name: HUFFMAN_OPTION, description: "use Huffman encoding", + opt_type: NAOptionDefinitionType::Bool }, + NAOptionDefinition { + name: QUANT_OPTION, description: "force fixed quantiser for encoding", + opt_type: NAOptionDefinitionType::Int(Some(-1), Some(63)) }, + NAOptionDefinition { + name: MV_SEARCH_OPTION, description: "motion search mode", + opt_type: NAOptionDefinitionType::String(Some(&["full", "dia", "hex"])) }, + NAOptionDefinition { + name: MV_RANGE_OPTION, description: "motion search range (in pixels)", + opt_type: NAOptionDefinitionType::Int(Some(0), Some(30)) }, +]; + +impl NAOptionHandler for VP6Encoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { ENCODER_OPTS } + fn set_options(&mut self, options: &[NAOption]) { + for option in options.iter() { + for opt_def in ENCODER_OPTS.iter() { + if opt_def.check(option).is_ok() { + match option.name { + KEYFRAME_OPTION => { + if let NAValue::Int(intval) = option.value { + self.key_int = intval as u8; + } + }, + HUFFMAN_OPTION => { + if let NAValue::Bool(bval) = option.value { + self.huffman = bval; + } + }, + QUANT_OPTION => { + if let NAValue::Int(intval) = option.value { + self.force_q = if intval < 0 { None } else { Some(intval as usize) }; + } + }, + MV_SEARCH_OPTION => { + if let NAValue::String(ref string) = option.value { + if let Ok(mv_mode) = string.parse::() { + self.me_mode = mv_mode; + } + } + }, + MV_RANGE_OPTION => { + if let NAValue::Int(intval) = option.value { + self.me_range = intval as i16; + } + }, + _ => {}, + }; + } + } + } + } + fn query_option_value(&self, name: &str) -> Option { + match name { + KEYFRAME_OPTION => Some(NAValue::Int(i64::from(self.key_int))), + HUFFMAN_OPTION => Some(NAValue::Bool(self.huffman)), + QUANT_OPTION => if let Some(q) = self.force_q { + Some(NAValue::Int(q as i64)) + } else { + Some(NAValue::Int(-1)) + }, + MV_SEARCH_OPTION => Some(NAValue::String(self.me_mode.to_string())), + MV_RANGE_OPTION => Some(NAValue::Int(i64::from(self.me_range))), + _ => None, + } + } +} + +pub fn get_encoder() -> Box { + Box::new(VP6Encoder::new()) +} + +#[cfg(test)] +mod test { + use nihav_core::codecs::*; + use nihav_core::demuxers::*; + use nihav_core::muxers::*; + use crate::*; + use nihav_commonfmt::*; + use nihav_codec_support::test::enc_video::*; + + fn encode_test(out_name: &'static str, enc_options: &[NAOption], hash: &[u32; 4]) { + let mut dmx_reg = RegisteredDemuxers::new(); + generic_register_all_demuxers(&mut dmx_reg); + let mut dec_reg = RegisteredDecoders::new(); + duck_register_all_decoders(&mut dec_reg); + let mut mux_reg = RegisteredMuxers::new(); + generic_register_all_muxers(&mut mux_reg); + let mut enc_reg = RegisteredEncoders::new(); + duck_register_all_encoders(&mut enc_reg); + + let dec_config = DecoderTestParams { + demuxer: "avi", + in_name: "assets/Duck/ot171_vp40.avi", + stream_type: StreamType::Video, + limit: Some(1), + dmx_reg, dec_reg, + }; + let enc_config = EncoderTestParams { + muxer: "avi", + enc_name: "vp6", + out_name, + mux_reg, enc_reg, + }; + let dst_vinfo = NAVideoInfo { + width: 0, + height: 0, + format: YUV420_FORMAT, + flipped: true, + bits: 12, + }; + let enc_params = EncodeParameters { + format: NACodecTypeInfo::Video(dst_vinfo), + quality: 0, + bitrate: 25000, + tb_num: 0, + tb_den: 0, + flags: 0, + }; + //test_encoding_to_file(&dec_config, &enc_config, enc_params, enc_options); + test_encoding_md5(&dec_config, &enc_config, enc_params, enc_options, + hash); + } + #[test] + fn test_vp6_encoder_bc() { + let enc_options = &[ + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(42) }, + ]; + encode_test("vp6-bool.avi", enc_options, &[0x3649ebc5, 0x4ed1cd7d, 0x1ad40c7b, 0xadd30276]); + } + #[test] + fn test_vp6_encoder_rc() { + let enc_options = &[ + ]; + encode_test("vp6-rc.avi", enc_options, &[0x97f3ea9d, 0x5374d30f, 0xf900a594, 0xbfa34b0f]); + } + #[test] + fn test_vp6_encoder_huff() { + let enc_options = &[ + NAOption { name: super::HUFFMAN_OPTION, value: NAValue::Bool(true) }, + NAOption { name: super::QUANT_OPTION, value: NAValue::Int(42) }, + ]; + encode_test("vp6-huff.avi", enc_options, &[0x4558af0a, 0x4d260b6b, 0x16b7c501, 0x178f42c5]); + } +} diff --git a/nihav-duck/src/codecs/vp6enc/models.rs b/nihav-duck/src/codecs/vp6enc/models.rs new file mode 100644 index 0000000..c6344a3 --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/models.rs @@ -0,0 +1,438 @@ +use nihav_codec_support::codecs::ZIGZAG; +use super::super::vp6data::*; + +#[derive(Clone,Copy,Default)] +pub struct VP56MVModel { + pub nz_prob: u8, + pub sign_prob: u8, + pub raw_probs: [u8; 8], + pub tree_probs: [u8; 7], +} + +#[derive(Clone,Copy,Default)] +pub struct VP56MBTypeModel { + pub probs: [u8; 10], +} + +#[derive(Clone,Copy,Default)] +pub struct VP56CoeffModel { + pub dc_token_probs: [[[u8; 5]; 6]; 6], + pub dc_value_probs: [u8; 11], + pub ac_val_probs: [[[u8; 11]; 6]; 3], +} + +#[derive(Clone)] +pub struct VP6Models { + pub scan_order: [usize; 64], + pub scan: [usize; 64], + pub zigzag: [usize; 64], + pub zero_run_probs: [[u8; 14]; 2], +} + +const MAX_HUFF_ELEMS: usize = 12; +#[derive(Clone,Copy,Default)] +pub struct VP6Huff { + pub codes: [u16; MAX_HUFF_ELEMS], + pub bits: [u8; MAX_HUFF_ELEMS], +} + +#[derive(Clone,Copy,Default)] +struct Node { + weight: u16, + sym: i8, + ch0: usize, + ch1: usize, +} + +fn prob2weight(a: u8, b: u8) -> u8 { + let w = ((u16::from(a) * u16::from(b)) >> 8) as u8; + if w == 0 { + 1 + } else { + w + } +} + +impl VP6Huff { + pub fn build_codes(&mut self, probs: &[u8; 11]) { + let mut weights = [0u8; 12]; + + weights[11] = prob2weight( probs[0], probs[ 1]); + weights[ 0] = prob2weight( probs[0], !probs[ 1]); + weights[ 1] = prob2weight(!probs[0], probs[ 2]); + let lvroot = prob2weight(!probs[0], !probs[ 2]); + let tworoot = prob2weight( lvroot, probs[ 3]); + let hlroot = prob2weight( lvroot, !probs[ 3]); + weights[ 2] = prob2weight( tworoot, probs[ 4]); + let root34 = prob2weight( tworoot, !probs[ 4]); + weights[ 3] = prob2weight( root34, probs[ 5]); + weights[ 4] = prob2weight( root34, !probs[ 5]); + let c1root = prob2weight( hlroot, probs[ 6]); + let c34root = prob2weight( hlroot, !probs[ 6]); + weights[ 5] = prob2weight( c1root, probs[ 7]); + weights[ 6] = prob2weight( c1root, !probs[ 7]); + let c3root = prob2weight( c34root, probs[ 8]); + let c4root = prob2weight( c34root, !probs[ 8]); + weights[ 7] = prob2weight( c3root, probs[ 9]); + weights[ 8] = prob2weight( c3root, !probs[ 9]); + weights[ 9] = prob2weight( c4root, probs[10]); + weights[10] = prob2weight( c4root, !probs[10]); + + self.build(&weights); + } + pub fn build_codes_zero_run(&mut self, probs: &[u8; 14]) { + let mut weights = [0u8; 9]; + + let root = prob2weight( probs[0], probs[1]); + weights[0] = prob2weight( root, probs[2]); + weights[1] = prob2weight( root, !probs[2]); + + let root = prob2weight( probs[0], !probs[1]); + weights[2] = prob2weight( root, probs[3]); + weights[3] = prob2weight( root, !probs[3]); + + let root = prob2weight(!probs[0], probs[4]); + weights[8] = prob2weight(!probs[0], !probs[4]); + let root1 = prob2weight( root, probs[5]); + let root2 = prob2weight( root, !probs[5]); + weights[4] = prob2weight( root1, probs[6]); + weights[5] = prob2weight( root1, !probs[6]); + weights[6] = prob2weight( root2, probs[7]); + weights[7] = prob2weight( root2, !probs[7]); + + self.build(&weights); + } + fn build(&mut self, weights: &[u8]) { + let mut nodes = [Node::default(); MAX_HUFF_ELEMS * 2]; + let mut nlen = 0; + + for w in weights.iter().rev() { + let weight = u16::from(*w); + let mut pos = nlen; + for i in 0..nlen { + if nodes[i].weight > weight { + pos = i; + break; + } + } + for j in (pos..nlen).rev() { + nodes[j + 1] = nodes[j]; + } + nodes[pos].weight = weight; + nodes[pos].sym = (weights.len() - nlen - 1) as i8; + nlen += 1; + } + + let mut low = 0; + for _ in 0..nlen-1 { + let nnode = Node { + weight: nodes[low + 0].weight + nodes[low + 1].weight, + sym: -1, + ch0: low + 0, + ch1: low + 1, + }; + low += 2; + let mut pos = low; + while (pos < nlen) && (nodes[pos].weight < nnode.weight) { + pos += 1; + } + for j in (pos..nlen).rev() { + nodes[j + 1] = nodes[j]; + } + nodes[pos] = nnode; + nlen += 1; + } + self.get_codes(&nodes, nlen - 1, 0, 0); + for i in nlen..self.codes.len() { + self.codes[i] = self.codes[0]; + self.bits[i] = self.bits[0]; + } + } + fn get_codes(&mut self, nodes: &[Node], pos: usize, code: u16, len: u8) { + if nodes[pos].sym >= 0 { + self.codes[nodes[pos].sym as usize] = code; + self.bits [nodes[pos].sym as usize] = len; + } else { + self.get_codes(nodes, nodes[pos].ch0, (code << 1) | 0, len + 1); + self.get_codes(nodes, nodes[pos].ch1, (code << 1) | 1, len + 1); + } + } +} + +#[derive(Clone,Copy,Default)] +pub struct VP6HuffModels { + pub dc_token_tree: [VP6Huff; 2], + pub ac_token_tree: [[[VP6Huff; 6]; 3]; 2], + pub zero_run_tree: [VP6Huff; 2], +} + +impl VP6Models { + fn new() -> Self { + Self { + scan_order: [0; 64], + scan: [0; 64], + zigzag: [0; 64], + zero_run_probs: [[0; 14]; 2], + } + } +} + +#[derive(Clone)] +pub struct VP56Models { + pub mv_models: [VP56MVModel; 2], + pub mbtype_models: [[VP56MBTypeModel; 10]; 3], + pub coeff_models: [VP56CoeffModel; 2], + pub prob_xmitted: [[u8; 20]; 3], + pub vp6models: VP6Models, + pub vp6huff: VP6HuffModels, +} + +impl VP56Models { + pub fn new() -> Self { + Self { + mv_models: [VP56MVModel::default(); 2], + mbtype_models: [[VP56MBTypeModel::default(); 10]; 3], + coeff_models: [VP56CoeffModel::default(); 2], + prob_xmitted: [[0; 20]; 3], + vp6models: VP6Models::new(), + vp6huff: VP6HuffModels::default(), + } + } + pub fn reset(&mut self, interlaced: bool) { + for (i, mdl) in self.mv_models.iter_mut().enumerate() { + mdl.nz_prob = NZ_PROBS[i]; + mdl.sign_prob = 128; + mdl.raw_probs.copy_from_slice(&RAW_PROBS[i]); + mdl.tree_probs.copy_from_slice(&TREE_PROBS[i]); + } + + for mdl in self.coeff_models.iter_mut() { + mdl.dc_value_probs = [128; 11]; + mdl.ac_val_probs = [[[128; 11]; 6]; 3]; + } + self.vp6models.zero_run_probs.copy_from_slice(&ZERO_RUN_PROBS); + reset_scan(&mut self.vp6models, interlaced); + } + pub fn reset_mbtype_models(&mut self) { + const DEFAULT_XMITTED_PROBS: [[u8; 20]; 3] = [ + [ 42, 69, 2, 1, 7, 1, 42, 44, 22, 6, 3, 1, 2, 0, 5, 1, 1, 0, 0, 0 ], + [ 8, 229, 1, 1, 8, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 1, 1, 0, 0 ], + [ 35, 122, 1, 1, 6, 1, 34, 46, 0, 0, 2, 1, 1, 0, 1, 0, 1, 1, 0, 0 ] + ]; + self.prob_xmitted.copy_from_slice(&DEFAULT_XMITTED_PROBS); + } +} + +pub fn reset_scan(model: &mut VP6Models, interlaced: bool) { + if !interlaced { + model.scan_order.copy_from_slice(&VP6_DEFAULT_SCAN_ORDER); + } else { + model.scan_order.copy_from_slice(&VP6_INTERLACED_SCAN_ORDER); + } + for i in 0..64 { model.scan[i] = i; } + model.zigzag.copy_from_slice(&ZIGZAG); +} + +#[derive(Clone,Copy,Default)] +pub struct ProbCounter { + zeroes: u32, + total: u32, +} + +// bits to code zero probability multiplied by eight +pub const PROB_BITS: [u8; 256] = [ + 0, 64, 56, 51, 48, 45, 43, 42, + 40, 39, 37, 36, 35, 34, 34, 33, + 32, 31, 31, 30, 29, 29, 28, 28, + 27, 27, 26, 26, 26, 25, 25, 24, + 24, 24, 23, 23, 23, 22, 22, 22, + 21, 21, 21, 21, 20, 20, 20, 20, + 19, 19, 19, 19, 18, 18, 18, 18, + 18, 17, 17, 17, 17, 17, 16, 16, + 16, 16, 16, 15, 15, 15, 15, 15, + 15, 14, 14, 14, 14, 14, 14, 14, + 13, 13, 13, 13, 13, 13, 13, 12, + 12, 12, 12, 12, 12, 12, 12, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +]; + +impl ProbCounter { + pub fn add(&mut self, b: bool) { + if !b { + self.zeroes += 1; + } + self.total += 1; + } + pub fn to_prob(self) -> u8 { + if self.total > 0 { + (((self.zeroes << 8) / self.total).min(254) & !1).max(1) as u8 + } else { + 128 + } + } + pub fn to_prob_worthy(&self, old_prob: u8) -> u8 { + if self.total > 0 { + let new_prob = self.to_prob(); + let new_bits = Self::est_bits(new_prob, self.zeroes, self.total); + let old_bits = Self::est_bits(old_prob, self.zeroes, self.total); + + if new_bits + 7 < old_bits { + new_prob + } else { + old_prob + } + } else { + old_prob + } + } + fn est_bits(prob: u8, zeroes: u32, total: u32) -> u32 { + (u32::from(PROB_BITS[prob as usize]) * zeroes + u32::from(PROB_BITS[256 - (prob as usize)]) * (total - zeroes) + 7) >> 3 + } +} + +#[derive(Clone,Copy,Default)] +pub struct VP56MVModelStat { + pub nz_prob: ProbCounter, + pub sign_prob: ProbCounter, + pub raw_probs: [ProbCounter; 8], + pub tree_probs: [ProbCounter; 7], +} + +#[derive(Clone,Copy,Default)] +pub struct VP56CoeffModelStat { + pub dc_token_probs: [[[ProbCounter; 5]; 6]; 6], + pub dc_value_probs: [ProbCounter; 11], + pub ac_val_probs: [[[ProbCounter; 11]; 6]; 3], +} + +#[derive(Default)] +pub struct VP6ModelsStat { + pub zero_run_probs: [[ProbCounter; 14]; 2], +} + +pub struct VP56ModelsStat { + pub mv_models: [VP56MVModelStat; 2], + pub mbtype_models: [[[usize; 10]; 10]; 3], + pub coeff_models: [VP56CoeffModelStat; 2], + pub vp6models: VP6ModelsStat, +} + +impl VP56ModelsStat { + pub fn new() -> Self { + Self { + mv_models: [VP56MVModelStat::default(); 2], + mbtype_models: [[[0; 10]; 10]; 3], + coeff_models: [VP56CoeffModelStat::default(); 2], + vp6models: VP6ModelsStat::default(), + } + } + pub fn reset(&mut self) { + self.mv_models = [VP56MVModelStat::default(); 2]; + self.mbtype_models = [[[0; 10]; 10]; 3]; + self.coeff_models = [VP56CoeffModelStat::default(); 2]; + self.vp6models = VP6ModelsStat::default(); + } + pub fn generate(&self, dst: &mut VP56Models, is_intra: bool) { + if !is_intra { + for (dmv, smv) in dst.mv_models.iter_mut().zip(self.mv_models.iter()) { + dmv.nz_prob = smv.nz_prob.to_prob_worthy(dmv.nz_prob); + dmv.sign_prob = smv.sign_prob.to_prob_worthy(dmv.sign_prob); + for (dp, sp) in dmv.raw_probs.iter_mut().zip(smv.raw_probs.iter()) { + *dp = sp.to_prob_worthy(*dp); + } + for (dp, sp) in dmv.tree_probs.iter_mut().zip(smv.tree_probs.iter()) { + *dp = sp.to_prob_worthy(*dp); + } + } + for (xmit, mdl) in dst.prob_xmitted.iter_mut().zip(self.mbtype_models.iter()) { + Self::generate_prob_xmitted(xmit, mdl); + } + } + for (dmv, smv) in dst.coeff_models.iter_mut().zip(self.coeff_models.iter()) { + for (dp, sp) in dmv.dc_value_probs.iter_mut().zip(smv.dc_value_probs.iter()) { + *dp = sp.to_prob_worthy(*dp); + } + for (dp, sp) in dmv.ac_val_probs.iter_mut().zip(smv.ac_val_probs.iter()) { + for (dp, sp) in dp.iter_mut().zip(sp.iter()) { + for (dp, sp) in dp.iter_mut().zip(sp.iter()) { + *dp = sp.to_prob_worthy(*dp); + } + } + } + } + for (dp, sp) in dst.vp6models.zero_run_probs.iter_mut().zip(self.vp6models.zero_run_probs.iter()) { + for (dp, sp) in dp.iter_mut().zip(sp.iter()) { + *dp = sp.to_prob_worthy(*dp); + } + } + } + /* + VPMBType::InterNoMV => 0, + VPMBType::Intra => 1, + VPMBType::InterMV => 2, + VPMBType::InterNearest => 3, + VPMBType::InterNear => 4, + VPMBType::GoldenNoMV => 5, + VPMBType::GoldenMV => 6, + VPMBType::InterFourMV => 7, + VPMBType::GoldenNearest => 8, + VPMBType::GoldenNear => 9, + */ + fn generate_prob_xmitted(probs: &mut [u8; 20], mbtype: &[[usize; 10]; 10]) { + let mut sums = [0; 20]; + let mut total = 0; + for (last, row) in mbtype.iter().enumerate() { + for (cur, &count) in row.iter().enumerate() { + if last == cur { + sums[cur * 2 + 1] = count; + } else { + sums[cur * 2] += count; + } + total += count; + } + } + if total != 0 { + let mut sum = 0; + for (dprob, &sprob) in probs.iter_mut().zip(sums.iter()) { + if sprob != 0 { + *dprob = ((sprob * 256 + total - 1) / total).min(255) as u8; + sum += u16::from(*dprob); + } else { + *dprob = 0; + } + } + while sum > 256 { + for prob in probs.iter_mut() { + if *prob > 1 { + *prob -= 1; + sum -= 1; + if sum == 256 { + break; + } + } + } + } + } else { + *probs = [0; 20]; + } + } +} diff --git a/nihav-duck/src/codecs/vp6enc/ratectl.rs b/nihav-duck/src/codecs/vp6enc/ratectl.rs new file mode 100644 index 0000000..9fb6701 --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/ratectl.rs @@ -0,0 +1,97 @@ +use super::rdo::*; + +pub struct RateControl { + pub lambda: f32, + tgt_br: u32, + budget: isize, + cur_time: u32, + ts_num: u32, + ts_den: u32, + mb_w: usize, + mb_h: usize, + projected: usize, +} + +// todo intra/inter decision, better allocation for intra frames +impl RateControl { + pub fn new() -> Self { + Self { + lambda: 1.0, + tgt_br: 0, + budget: 0, + cur_time: 0, + ts_num: 0, + ts_den: 0, + mb_w: 0, + mb_h: 0, + projected: 0, + } + } + pub fn init(&mut self, mb_w: usize, mb_h: usize, bitrate: u32, ts_num: u32, ts_den: u32) { + self.mb_w = mb_w; + self.mb_h = mb_h; + self.lambda = 1.0; + self.cur_time = 0; + if bitrate == 0 || ts_num == 0 || ts_den == 0 { + self.tgt_br = 0; + self.budget = 0; + } else { + self.tgt_br = bitrate; + self.budget = bitrate as isize; + self.ts_num = ts_num; + self.ts_den = ts_den; + } + } + pub fn guess_quant(&mut self, intra: bool, huffman: bool) -> usize { + let fsize = self.get_target_frame_size(intra); + self.projected = fsize; + if fsize > 0 { + for q in 0..64 { + let est_fsize = estimate_frame_size(intra, huffman, q, self.mb_w, self.mb_h); + if fsize < est_fsize - est_fsize / 10 { + return q.saturating_sub(1); + } + if fsize < est_fsize + est_fsize / 10 { + return q; + } + } + 63 + } else { + 42 + } + } + pub fn update(&mut self, dsize: usize) { + const LAMBDA_STEP: f32 = 1.0 / 32.0; + + if self.tgt_br == 0 { + return; + } + if (self.projected > dsize + dsize / 10) && self.lambda > LAMBDA_STEP { + self.lambda -= LAMBDA_STEP; + } else if self.projected < dsize - dsize / 10 { + self.lambda += LAMBDA_STEP; + } + self.budget -= dsize as isize; + self.cur_time += self.ts_num; + while self.cur_time >= self.ts_den { + self.cur_time -= self.ts_den; + self.budget += self.tgt_br as isize; + } + } + fn get_target_frame_size(&self, intra: bool) -> usize { + if self.tgt_br == 0 { + 0 + } else { + let mut avg_fsize = self.budget / ((self.ts_den - self.cur_time) as isize); + if avg_fsize > 0 { + // todo better intra/inter selection + if intra { + avg_fsize *= 3; + } + avg_fsize as usize + } else { + (self.tgt_br as usize) * (self.ts_num as usize) / (self.ts_den as usize) / 2 + } + } + } +} diff --git a/nihav-duck/src/codecs/vp6enc/rdo.rs b/nihav-duck/src/codecs/vp6enc/rdo.rs new file mode 100644 index 0000000..d5f252b --- /dev/null +++ b/nihav-duck/src/codecs/vp6enc/rdo.rs @@ -0,0 +1,167 @@ +use nihav_codec_support::codecs::{MV, ZERO_MV}; +use super::mb::{Coeffs, InterMB}; +//use super::super::vpcommon::VPMBType; + +// todo replace with an approximation formula +const AVG_NITS_PER_MB_INTRA: [u32; 64] = [ + 143, 146, 148, 150, 154, 163, 169, 177, + 185, 193, 206, 218, 232, 238, 242, 246, + 250, 260, 265, 270, 275, 283, 290, 297, + 304, 319, 327, 343, 358, 368, 381, 393, + 405, 417, 432, 445, 460, 476, 501, 520, + 543, 564, 591, 617, 645, 678, 711, 746, + 786, 833, 892, 966, 1031, 1102, 1187, 1281, + 1395, 1539, 1711, 1938, 2247, 2655, 3314, 4625 +]; +const AVG_NITS_PER_MB_INTER: [u32; 64] = [ + 112, 112, 112, 112, 110, 111, 111, 112, + 112, 113, 112, 118, 121, 117, 118, 118, + 119, 118, 118, 119, 120, 122, 123, 124, + 126, 126, 128, 132, 138, 141, 139, 141, + 148, 151, 152, 155, 158, 162, 172, 177, + 183, 189, 195, 203, 210, 220, 231, 243, + 256, 272, 289, 313, 337, 363, 396, 435, + 481, 539, 613, 710, 864, 1079, 1430, 2225 +]; +const AVG_NITS_PER_MB_INTRA_HUFF: [u32; 64] = [ + 176, 179, 181, 184, 188, 198, 205, 213, + 223, 233, 247, 260, 276, 283, 288, 292, + 297, 307, 313, 318, 324, 334, 341, 349, + 357, 374, 383, 401, 417, 429, 441, 455, + 469, 482, 499, 513, 530, 547, 572, 594, + 621, 644, 673, 703, 734, 771, 807, 847, + 893, 948, 1016, 1097, 1168, 1244, 1335, 1436, + 1555, 1708, 1890, 2130, 2461, 2897, 3604, 5014 +]; +const AVG_NITS_PER_MB_INTER_HUFF: [u32; 64] = [ + 126, 126, 126, 126, 125, 125, 126, 127, + 127, 129, 127, 134, 137, 134, 134, 135, + 136, 135, 136, 137, 138, 141, 142, 143, + 145, 146, 147, 152, 160, 163, 161, 164, + 171, 174, 176, 180, 183, 188, 199, 206, + 212, 219, 227, 236, 244, 255, 268, 281, + 296, 315, 334, 362, 389, 419, 456, 500, + 550, 613, 695, 799, 971, 1205, 1581, 2437 +]; + +// todo replace with an approximation formula +const HDR_SIZES: [(u32, u32); 64] = [ + (5867, 797), (5971, 802), (5975, 798), (5986, 814), + (6152, 802), (6269, 821), (6457, 827), (6620, 855), + (6865, 865), (6852, 878), (7103, 885), (7295, 924), + (7658, 952), (7710, 950), (7851, 959), (8119, 963), + (8119, 962), (8292, 914), (8238, 920), (8260, 949), + (8174, 943), (8515, 952), (8625, 976), (8784, 979), + (8687, 987), (8853, 1007), (8934, 998), (9270, 1013), + (9524, 1082), (9595, 1117), (9889, 1060), (10187, 1070), + (10241, 1143), (10527, 1164), (10691, 1151), (10694, 1188), + (10959, 1132), (11092, 1168), (11197, 1237), (11443, 1267), + (11573, 1295), (11856, 1309), (12165, 1340), (12299, 1364), + (12767, 1344), (13108, 1373), (13262, 1407), (13497, 1420), + (14019, 1458), (14442, 1525), (14935, 1574), (15155, 1626), + (15682, 1691), (15936, 1754), (16394, 1859), (16953, 1944), + (17407, 2078), (18143, 2195), (18850, 2339), (19677, 2525), + (20546, 2786), (21228, 3093), (22549, 3469), (24164, 4144) +]; + +pub fn estimate_frame_size(intra: bool, huffman: bool, q: usize, mb_w: usize, mb_h: usize) -> usize { + let hdr_nits = if intra { HDR_SIZES[q].0 } else { HDR_SIZES[q].1 } as usize; + let avg_mb_nits = match (huffman, intra) { + (false, true) => AVG_NITS_PER_MB_INTRA[q], + (false, false) => AVG_NITS_PER_MB_INTER[q], + (true, true) => AVG_NITS_PER_MB_INTRA_HUFF[q], + (true, false) => AVG_NITS_PER_MB_INTER_HUFF[q], + } as usize; + (hdr_nits + avg_mb_nits * mb_w * mb_h + 7) >> 3 +} + +pub fn estimate_mv_nits(mv: MV) -> u32 { + (mv.x.abs() + mv.y.abs()).min(32) as u32 +} + +// todo make an approximation +const NITS_PER_BLOCK: [[u32; 65]; 16] = [ + [ + 0, 42, 103, 150, 197, 245, 290, 332, 362, 399, 438, 485, 516, 555, 568, 615, 645, 704, 752, 775, 829, 807, 850, 847, 847, 424, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938, 938 + ], [ + 0, 44, 103, 151, 198, 245, 292, 340, 373, 405, 452, 498, 527, 570, 604, 638, 658, 697, 727, 764, 807, 842, 887, 986, 893, 985, 1048, 1018, 1018, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013, 1013 + ], [ + 0, 47, 105, 154, 200, 246, 295, 342, 380, 419, 458, 498, 547, 585, 626, 664, 693, 734, 772, 809, 842, 853, 864, 920, 955, 981, 1031, 1045, 1096, 1152, 1184, 1188, 1206, 1338, 1270, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175, 1175 + ], [ + 0, 49, 105, 155, 203, 248, 294, 342, 383, 416, 464, 510, 544, 595, 620, 668, 714, 747, 773, 824, 865, 889, 919, 934, 969, 1029, 1010, 1063, 1088, 1158, 1175, 1204, 1207, 1303, 1283, 1358, 1512, 1405, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337, 1337 + ], [ + 0, 50, 106, 156, 203, 252, 297, 345, 387, 423, 463, 509, 552, 594, 627, 668, 722, 758, 788, 824, 870, 898, 936, 963, 983, 1019, 1023, 1100, 1101, 1140, 1199, 1206, 1240, 1278, 1301, 1368, 1405, 1365, 1445, 1436, 1483, 1562, 1562, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620, 1620 + ], [ + 1, 51, 107, 156, 204, 252, 298, 347, 391, 424, 465, 509, 553, 591, 634, 685, 709, 747, 805, 826, 881, 914, 937, 997, 1023, 1024, 1056, 1091, 1108, 1145, 1214, 1207, 1246, 1260, 1332, 1400, 1391, 1426, 1486, 1552, 1482, 1573, 1626, 1610, 1610, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692, 1692 + ], [ + 1, 53, 109, 158, 205, 253, 301, 352, 397, 425, 470, 505, 557, 595, 642, 682, 720, 755, 805, 849, 878, 930, 964, 995, 1034, 1072, 1093, 1142, 1180, 1194, 1219, 1243, 1286, 1318, 1325, 1379, 1403, 1471, 1477, 1473, 1554, 1585, 1593, 1646, 1677, 1692, 1763, 1825, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817, 1817 + ], [ + 1, 54, 111, 159, 206, 255, 305, 355, 406, 428, 472, 516, 555, 593, 636, 688, 730, 765, 807, 842, 892, 933, 987, 1001, 1050, 1097, 1111, 1167, 1175, 1223, 1255, 1294, 1297, 1342, 1361, 1404, 1408, 1471, 1495, 1560, 1553, 1611, 1635, 1692, 1783, 1695, 1765, 1764, 1906, 1906, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018 + ], [ + 1, 55, 113, 160, 209, 255, 306, 357, 408, 435, 477, 521, 557, 598, 641, 685, 730, 773, 814, 844, 889, 940, 982, 1008, 1065, 1102, 1124, 1188, 1228, 1251, 1287, 1297, 1350, 1399, 1380, 1436, 1484, 1514, 1537, 1590, 1599, 1642, 1634, 1726, 1762, 1755, 1799, 1862, 1919, 2086, 1712, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984, 1984 + ], [ + 1, 56, 116, 164, 211, 259, 307, 359, 414, 437, 481, 525, 564, 608, 645, 685, 734, 769, 808, 858, 906, 943, 987, 1000, 1058, 1101, 1145, 1183, 1211, 1274, 1274, 1357, 1384, 1412, 1469, 1454, 1505, 1543, 1563, 1617, 1639, 1667, 1681, 1762, 1770, 1797, 1869, 1853, 1909, 1978, 2077, 2034, 1967, 2139, 2139, 2139, 2139, 2139, 2139, 2139, 2139, 2139, 2139, 2139, 2139 + ], [ + 1, 59, 118, 166, 213, 261, 309, 360, 416, 437, 483, 524, 570, 611, 651, 686, 732, 768, 806, 844, 895, 938, 983, 1025, 1071, 1103, 1149, 1193, 1226, 1258, 1312, 1340, 1405, 1425, 1479, 1547, 1555, 1588, 1612, 1641, 1662, 1723, 1739, 1797, 1841, 1854, 1888, 1879, 1947, 2025, 2065, 2104, 2158, 2077, 2380, 2177, 2336, 2078, 2078, 2078, 2078, 2078, 2078, 2078, 2078 + ], [ + 1, 61, 120, 170, 219, 265, 314, 366, 425, 439, 483, 528, 573, 615, 656, 693, 737, 772, 807, 852, 897, 929, 976, 1008, 1053, 1111, 1140, 1192, 1219, 1279, 1314, 1356, 1387, 1449, 1484, 1513, 1552, 1592, 1673, 1704, 1773, 1784, 1809, 1843, 1903, 1902, 1962, 2002, 2014, 2054, 2109, 2161, 2148, 2198, 2235, 2353, 2360, 2511, 2282, 2282, 2282, 2282, 2282, 2282, 2282 + ], [ + 2, 65, 123, 173, 225, 273, 322, 373, 438, 448, 488, 533, 580, 616, 661, 703, 739, 779, 819, 855, 897, 943, 983, 1013, 1040, 1086, 1132, 1167, 1217, 1258, 1299, 1345, 1391, 1452, 1474, 1526, 1574, 1602, 1646, 1721, 1764, 1803, 1825, 1921, 1919, 2030, 2049, 2067, 2134, 2152, 2209, 2225, 2274, 2299, 2375, 2379, 2374, 2462, 2536, 2584, 2822, 2822, 2822, 2822, 2822 + ], [ + 2, 74, 126, 177, 230, 284, 332, 386, 454, 462, 507, 542, 592, 630, 673, 717, 753, 799, 841, 874, 913, 955, 996, 1031, 1073, 1103, 1138, 1172, 1219, 1249, 1300, 1325, 1383, 1428, 1446, 1504, 1550, 1585, 1636, 1702, 1728, 1784, 1841, 1905, 1934, 1978, 2055, 2130, 2165, 2231, 2291, 2417, 2405, 2494, 2491, 2525, 2587, 2626, 2658, 2658, 2708, 2713, 2667, 2867, 2867 + ], [ + 4, 88, 137, 193, 244, 302, 353, 416, 495, 490, 534, 579, 615, 663, 716, 757, 792, 833, 884, 918, 962, 1006, 1058, 1087, 1125, 1169, 1203, 1258, 1286, 1326, 1365, 1405, 1437, 1477, 1520, 1554, 1597, 1642, 1664, 1709, 1759, 1800, 1842, 1894, 1941, 2002, 2062, 2124, 2138, 2225, 2310, 2377, 2448, 2584, 2628, 2731, 2824, 2846, 2960, 3016, 3147, 3099, 3197, 3107, 3107 + ], [ + 18, 145, 182, 239, 297, 360, 415, 485, 584, 579, 638, 684, 737, 782, 834, 878, 924, 967, 1016, 1062, 1111, 1145, 1206, 1258, 1302, 1357, 1388, 1460, 1493, 1538, 1577, 1619, 1684, 1732, 1770, 1823, 1879, 1909, 1959, 2022, 2053, 2113, 2176, 2207, 2272, 2316, 2367, 2428, 2479, 2548, 2622, 2671, 2748, 2813, 2894, 2984, 3064, 3153, 3258, 3392, 3507, 3669, 3870, 4082, 4255 + ] +]; + +pub fn estimate_intra_mb_nits(mb: &Coeffs, q: usize) -> u32 { + let mut count = 0; + for blk in mb.iter() { + let mut nz = 0; + for &el in blk.iter() { + if el != 0 { + nz += 1; + } + } + count += NITS_PER_BLOCK[q >> 2][nz]; + } + count +} + +pub fn estimate_inter_mb_nits(mb: &InterMB, q: usize, fourmv: bool) -> u32 { + let mut count = 0; + for blk in mb.residue.coeffs.iter() { + let mut nz = 0; + for &el in blk.iter() { + if el != 0 { + nz += 1; + } + } + count += NITS_PER_BLOCK[q >> 2][nz]; + } + if !fourmv { + if mb.mv[3] != ZERO_MV { + count += estimate_mv_nits(mb.mv[3]); + } + } else { + count += 4 * 2 * u32::from(super::models::PROB_BITS[128]); + for &mv in mb.mv.iter() { + if mv != ZERO_MV { + count += estimate_mv_nits(mv); + } + } + } + count +} + +/*pub fn estimate_mb_hdr_size(mb_type: VPMBType) -> u32 { + match mb_type { + VPMBType::Intra => 60, + VPMBType::InterFourMV => 34, + VPMBType::InterNoMV => 3, + VPMBType::InterMV | VPMBType::InterNearest | VPMBType::InterNear => 20, + _ => 50, + } +}*/ diff --git a/nihav-duck/src/lib.rs b/nihav-duck/src/lib.rs index 366a2db..eb8560e 100644 --- a/nihav-duck/src/lib.rs +++ b/nihav-duck/src/lib.rs @@ -10,6 +10,7 @@ extern crate nihav_codec_support; mod codecs; pub use crate::codecs::duck_register_all_decoders; +pub use crate::codecs::duck_register_all_encoders; #[cfg(test)] extern crate nihav_commonfmt; -- 2.39.5