X-Git-Url: https://git.nihav.org/?p=nihav.git;a=blobdiff_plain;f=nihav-rad%2Fsrc%2Fcodecs%2Fbink2.rs;h=9ae02a710468ddaa349d0f004dc1e2d526557424;hp=e200a0c3835afd53ca55566597236a7163ce7e2b;hb=78fb6560c73965d834b215fb0b49505ae5443288;hpb=5de1b1183a7a086f03fa5de407bb5df2445a3524 diff --git a/nihav-rad/src/codecs/bink2.rs b/nihav-rad/src/codecs/bink2.rs index e200a0c..9ae02a7 100644 --- a/nihav-rad/src/codecs/bink2.rs +++ b/nihav-rad/src/codecs/bink2.rs @@ -4,6 +4,16 @@ use nihav_core::io::byteio::*; use nihav_core::io::bitreader::*; use nihav_core::io::codebook::*; use nihav_core::io::intcode::*; +use nihav_codec_support::codecs::{IPShuffler, MV, ZERO_MV}; + +macro_rules! mktag { + ($a:expr, $b:expr, $c:expr, $d:expr) => ({ + (($a as u32) << 24) | (($b as u32) << 16) | (($c as u32) << 8) | ($d as u32) + }); + ($arr:expr) => ({ + (($arr[0] as u32) << 24) | (($arr[1] as u32) << 16) | (($arr[2] as u32) << 8) | ($arr[3] as u32) + }); +} macro_rules! idct_mul { (a; $val: expr) => ($val + ($val >> 2)); @@ -44,7 +54,7 @@ macro_rules! idct { }; (float; $src: expr, $sstep: expr, $off: expr, $dst: expr, $dstep: expr, $doff: expr, $bias: expr, $shift: expr) => { let t00 = $src[$off + $sstep * 2] + $src[$off + $sstep * 6]; - let t01 = ($src[$off + $sstep * 2] - $src[$off + $sstep * 6]) * 1.4142135 - t00; + let t01 = ($src[$off + $sstep * 2] - $src[$off + $sstep * 6]) * std::f32::consts::SQRT_2 - t00; let t02 = $src[$off + $sstep * 0] + $src[$off + $sstep * 4]; let t03 = $src[$off + $sstep * 0] - $src[$off + $sstep * 4]; let t04 = $src[$off + $sstep * 3] + $src[$off + $sstep * 5]; @@ -56,7 +66,7 @@ macro_rules! idct { let t10 = t03 + t01; let t11 = t03 - t01; let t12 = t06 + t04; - let t13 = (t06 - t04) * 1.4142135; + let t13 = (t06 - t04) * std::f32::consts::SQRT_2; let t14 = (t07 - t05) * 1.847759; let t15 = t05 * 2.613126 + t14 - t12; let t16 = t13 - t15; @@ -73,6 +83,7 @@ macro_rules! idct { }; } +#[allow(clippy::erasing_op)] fn bink2_idct(coeffs: &mut [i32; 64]) { let mut tmp: [i32; 64] = [0; 64]; for i in 0..8 { @@ -83,8 +94,10 @@ fn bink2_idct(coeffs: &mut [i32; 64]) { } } +#[allow(clippy::erasing_op)] fn bink2_idct_old(coeffs: &mut [f32; 64]) { let mut tmp: [f32; 64] = [0.0; 64]; + coeffs[0] += 512.5; for i in 0..8 { idct!(float; coeffs, 8, i, tmp, 8, i, 0, 0); } @@ -110,7 +123,7 @@ macro_rules! luma_filter { let t0 = el!($src, $off - 0 * $step) + el!($src, $off + 1 * $step); let t1 = el!($src, $off - 1 * $step) + el!($src, $off + 2 * $step); let t2 = el!($src, $off - 2 * $step) + el!($src, $off + 3 * $step); - clip8((((t0 * 19) >> 1) - t1 * 2 + (t2 >> 1) + 8) >> 4) + (((t0 * 19) >> 1) - t1 * 2 + (t2 >> 1) + 8) >> 4 }); } @@ -138,6 +151,7 @@ macro_rules! avg_tree { ($a: expr, $b: expr, $c: expr, $d: expr) => (avg_tree!(avg_tree!($a, $b), avg_tree!($c, $d))); } +#[allow(clippy::erasing_op)] impl Bink2DSP { fn calc_dc(src: &[u8], stride: usize) -> i32 { let mut sums = [0u16; 8]; @@ -208,16 +222,16 @@ impl Bink2DSP { { let dout = &mut dst[off..]; for (row, (b0, b1)) in dout.chunks_mut(stride).zip(blk[0].chunks(8).zip(blk[1].chunks(8))) { - for i in 0..8 { row[i + 0] = clip8(b0[i] as i32); } - for i in 0..8 { row[i + 8] = clip8(b1[i] as i32); } + for i in 0..8 { row[i + 0] = clip8((b0[i] as i32) - 512); } + for i in 0..8 { row[i + 8] = clip8((b1[i] as i32) - 512); } } } off += stride * 8; { let dout = &mut dst[off..]; for (row, (b2, b3)) in dout.chunks_mut(stride).zip(blk[2].chunks(8).zip(blk[3].chunks(8))) { - for i in 0..8 { row[i + 0] = clip8(b2[i] as i32); } - for i in 0..8 { row[i + 8] = clip8(b3[i] as i32); } + for i in 0..8 { row[i + 0] = clip8((b2[i] as i32) - 512); } + for i in 0..8 { row[i + 8] = clip8((b3[i] as i32) - 512); } } } } @@ -229,16 +243,16 @@ impl Bink2DSP { { let dout = &mut dst[off..]; for (row, (b0, b1)) in dout.chunks_mut(stride).zip(blk[0].chunks(8).zip(blk[1].chunks(8))) { - for i in 0..8 { row[i + 0] = clip8((row[i + 0] as i32) + (b0[i] as i32)); } - for i in 0..8 { row[i + 8] = clip8((row[i + 8] as i32) + (b1[i] as i32)); } + for i in 0..8 { row[i + 0] = clip8((row[i + 0] as i32) + (b0[i] as i32) - 512); } + for i in 0..8 { row[i + 8] = clip8((row[i + 8] as i32) + (b1[i] as i32) - 512); } } } off += stride * 8; { let dout = &mut dst[off..]; for (row, (b2, b3)) in dout.chunks_mut(stride).zip(blk[2].chunks(8).zip(blk[3].chunks(8))) { - for i in 0..8 { row[i + 0] = clip8((row[i + 0] as i32) + (b2[i] as i32)); } - for i in 0..8 { row[i + 8] = clip8((row[i + 8] as i32) + (b3[i] as i32)); } + for i in 0..8 { row[i + 0] = clip8((row[i + 0] as i32) + (b2[i] as i32) - 512); } + for i in 0..8 { row[i + 8] = clip8((row[i + 8] as i32) + (b3[i] as i32) - 512); } } } } @@ -250,8 +264,10 @@ impl Bink2DSP { let (d_y, add_y) = if (mv.y & 1) != 0 { (2, 5) } else { (0, 0) }; let (w, h) = ref_pic.get_dimensions(plane); - validate!((sx - d_x >= 0) && (sx - d_x + add_x + 16 <= (w as isize))); - validate!((sy - d_y >= 0) && (sy - d_y + add_y + 16 <= (h as isize))); + let align_w = ((w + 31) & !31) as isize; + let align_h = ((h + 31) & !31) as isize; + validate!((sx - d_x >= 0) && (sx - d_x + add_x + 16 <= align_w)); + validate!((sy - d_y >= 0) && (sy - d_y + add_y + 16 <= align_h)); let pstride = ref_pic.get_stride(plane); let mut poff = ref_pic.get_offset(plane) + (sx as usize) + (sy as usize) * pstride; let pdata = ref_pic.get_data(); @@ -263,13 +279,13 @@ impl Bink2DSP { 0 => { let src = &ppix[poff..]; for (out, row) in dst.chunks_mut(stride).take(16).zip(src.chunks(pstride)) { - for i in 0..16 { out[i] = row[i]; } + out[..16].copy_from_slice(&row[..16]); } }, 1 => { for out in dst.chunks_mut(stride).take(16) { for i in 0..16 { - out[i] = luma_filter!(ppix, poff + i, 1); + out[i] = clip8(luma_filter!(ppix, poff + i, 1)); } poff += pstride; } @@ -277,22 +293,22 @@ impl Bink2DSP { 2 => { for out in dst.chunks_mut(stride).take(16) { for i in 0..16 { - out[i] = luma_filter!(ppix, poff + i, pstride); + out[i] = clip8(luma_filter!(ppix, poff + i, pstride)); } poff += pstride; } }, 3 => { - let mut tmp = [0u8; 21 * 16]; + let mut tmp = [0i16; 21 * 16]; for out in tmp.chunks_mut(16) { for i in 0..16 { - out[i] = luma_filter!(ppix, poff - 2 * pstride + i, 1); + out[i] = luma_filter!(ppix, poff - 2 * pstride + i, 1) as i16; } poff += pstride; } for (row, out) in dst.chunks_mut(stride).take(16).enumerate() { for i in 0..16 { - out[i] = luma_filter!(tmp, (row + 2) * 16 + i, 16); + out[i] = clip8(luma_filter!(tmp, (row + 2) * 16 + i, 16)); } } }, @@ -309,8 +325,10 @@ impl Bink2DSP { let add_y = if my != 0 { 1 } else { 0 }; let (w, h) = ref_pic.get_dimensions(plane); - validate!((sx >= 0) && (sx + add_x + 8 <= (w as isize))); - validate!((sy >= 0) && (sy + add_y + 8 <= (h as isize))); + let align_w = ((w + 15) & !15) as isize; + let align_h = ((h + 15) & !15) as isize; + validate!((sx >= 0) && (sx + add_x + 8 <= align_w)); + validate!((sy >= 0) && (sy + add_y + 8 <= align_h)); let pstride = ref_pic.get_stride(plane); let poff = ref_pic.get_offset(plane) + (sx as usize) + (sy as usize) * pstride; let pdata = ref_pic.get_data(); @@ -320,7 +338,7 @@ impl Bink2DSP { if (mx == 0) && (my == 0) { let inpix = &ppix[poff..]; for (out, src) in dst.chunks_mut(stride).take(8).zip(inpix.chunks(pstride)) { - for i in 0..8 { out[i] = src[i]; } + out[..8].copy_from_slice(&src[..8]); } } else if my == 0 { chroma_interp!(dst, stride, 8, u8, &ppix[poff..], pstride, 1, mx, 2); @@ -956,7 +974,7 @@ struct Bink2Codes { } fn map_ac(idx: usize) -> u8 { idx as u8 } -fn map_mv(idx: usize) -> i8 { BINK2_MV_SYMS[idx] } +fn map_mv(idx: usize) -> i8 { BINK2_MV_SYMS[idx] } impl Default for Bink2Codes { fn default() -> Self { @@ -982,10 +1000,14 @@ impl Default for Bink2Codes { #[derive(Default)] struct Bink2Decoder { - info: Rc, + info: NACodecInfoRef, ips: IPShuffler, version: u32, + has_alpha: bool, + slice_h: [usize; 8], + num_slices: usize, + key_frame: bool, cur_w: usize, cur_h: usize, @@ -993,54 +1015,71 @@ struct Bink2Decoder { y_dcs: YDCInfo, u_dcs: CDCInfo, v_dcs: CDCInfo, + a_dcs: YDCInfo, mvs: MVInfo, codes: Bink2Codes, } +#[allow(clippy::erasing_op)] impl Bink2Decoder { fn new() -> Self { Self::default() } + #[allow(clippy::cyclomatic_complexity)] fn decode_frame_new(&mut self, br: &mut BitReader, buf: &mut NAVideoBuffer, is_intra: bool) -> DecoderResult<()> { - let (stride_y, stride_u, stride_v) = (buf.get_stride(0), buf.get_stride(1), buf.get_stride(2)); - let (mut off_y, mut off_u, mut off_v) = (buf.get_offset(0), buf.get_offset(1), buf.get_offset(2)); - let (ooff_y, ooff_u, ooff_v) = (off_y, off_u, off_v); + let (stride_y, stride_u, stride_v, stride_a) = (buf.get_stride(0), buf.get_stride(1), buf.get_stride(2), buf.get_stride(3)); + let (mut off_y, mut off_u, mut off_v, mut off_a) = (buf.get_offset(0), buf.get_offset(1), buf.get_offset(2), buf.get_offset(3)); + let (ooff_y, ooff_u, ooff_v, ooff_a) = (off_y, off_u, off_v, off_a); let (width, height) = buf.get_dimensions(0); - let mut data = buf.get_data_mut(); + let data = buf.get_data_mut().unwrap(); let dst = data.as_mut_slice(); let bw = (width + 31) >> 5; - let bh = (height + 31) >> 6; + let bheight = (height + 31) >> 5; self.cur_w = (width + 7) & !7; self.cur_h = ((height + 7) & !7) >> 1; let frame_flags = br.read(32)?; - let offset2 = br.read(32)?; - if (frame_flags & 0x80000) != 0 { -println!("fill {:X}", frame_flags); -unimplemented!(); + let mut offsets: [u32; 7] = [0; 7]; + for i in 0..self.num_slices-1 { + offsets[i] = br.read(32)?; } - for slice_no in 0..2 { - if slice_no == 1 { - br.seek(offset2 * 8)?; - off_y = ooff_y + stride_y * bh * 32; - off_u = ooff_u + stride_u * bh * 16; - off_v = ooff_v + stride_v * bh * 16; + let mut do_alpha = self.has_alpha; + if (frame_flags & 0x80000) != 0 && self.has_alpha { + do_alpha = false; + let fillval = (frame_flags >> 24) as u8; + let aplane = &mut dst[off_a..][..stride_a * bheight * 32]; + for el in aplane.iter_mut() { + *el = fillval; } - let mut row_flags: Vec = Vec::with_capacity(height >> 3); - let mut col_flags: Vec = Vec::with_capacity(width >> 3); - if (frame_flags & 0x1000) != 0 { - if (frame_flags & 0x8000) != 0 { - decode_flags(br, &mut row_flags, 1, (height >> 3) - 1)?; - } - if (frame_flags & 0x4000) != 0 { - decode_flags(br, &mut col_flags, 1, (width >> 3) - 1)?; - } + } + let mut row_flags: Vec = Vec::with_capacity(bheight * 4); + let mut col_flags: Vec = Vec::with_capacity(bw * 4); + if (frame_flags & 0x10000) != 0 { + if (frame_flags & 0x8000) == 0 { + let len = (height + 15) >> 4; + decode_flags(br, &mut row_flags, 1, len * 2 - 1)?; + } + if (frame_flags & 0x4000) == 0 { + let len = (width + 15) >> 4; + decode_flags(br, &mut col_flags, 1, len * 2 - 1)?; + } + } + row_flags.resize(bheight * 4, false); + col_flags.resize(bw * 4, false); + //store frame_flags * 8 & 0x7F8 + + let mut start_by = 0; + for slice_no in 0..self.num_slices { + let end_by = self.slice_h[slice_no]; + if slice_no != 0 { + br.seek(offsets[slice_no - 1] * 8)?; } - row_flags.resize(height >> 3, 0); - col_flags.resize(width >> 3, 0); - //store frame_flags * 8 & 0x7F8 + off_y = ooff_y + stride_y * start_by * 32; + off_u = ooff_u + stride_u * start_by * 16; + off_v = ooff_v + stride_v * start_by * 16; + off_a = ooff_a + stride_a * start_by * 32; let mut row_state = frame_flags & 0x2E000; if is_intra { @@ -1050,34 +1089,39 @@ unimplemented!(); self.y_dcs.resize(bw); self.u_dcs.resize(bw); self.v_dcs.resize(bw); + self.a_dcs.resize(bw); self.mvs.resize(bw); - for by in 0..bh { + for by in start_by..end_by { let mut cbp_y = 0; let mut cbp_u = 0; let mut cbp_v = 0; + let mut cbp_a = 0; let mut cbp_y_p = 0; let mut cbp_u_p = 0; let mut cbp_v_p = 0; + let mut cbp_a_p = 0; let mut q_y = 8; let mut q_u = 8; let mut q_v = 8; + let mut q_a = 8; let mut q_y_p = 8; let mut q_u_p = 8; let mut q_v_p = 8; - let rflags = (row_flags[by >> 1] >> (if (by & 1) != 0 { 4 } else { 0 })) as u32; + let mut q_a_p = 8; + let rflags = (row_flags[by] as u32) * 4; row_state = (row_state & 0x3FFFFFF) | ((row_state >> 4) & 0xC000000) | (rflags << 28); - if by == 0 { + if by == start_by { row_state |= 0x80; // } else { // row_state |= 0x8; } - if by + 2 >= bh { + if by + 2 >= end_by { row_state |= 0x100; } let mut btype_lru: [u8; 4] = [ 2, 3, 1, 0 ]; let mut edge_state = 0; - let is_top = by == 0; + let is_top = by == start_by; for bx in 0..bw { let mut blk_state = row_state | (edge_state & 0x3FC0000); if bx == 0 { @@ -1089,7 +1133,7 @@ unimplemented!(); if (bx & 1) != 0 { blk_state |= 0x200; } - let clflags = (col_flags[bx >> 1] >> (if (bx & 1) != 0 { 4 } else { 0 })) as u32; + let clflags = (col_flags[bx] as u32) * 4; let edge_state_c = ((blk_state >> 4) & 0x3C0000) | (blk_state & 0xFC03FFFF) | ((clflags & 0xF) << 22); let edge_state_y = (frame_flags & 0x40000) | (blk_state & 0x3FFFF); edge_state = edge_state_c; @@ -1145,7 +1189,14 @@ unimplemented!(); cbp_y = decode_luma_intra(br, &self.codes, cbp_y, q, &mut yblk, edge_state_y, &mut self.y_dcs, bx)?; cbp_v = decode_chroma_intra(br, &self.codes, cbp_v, q, &mut vblk, edge_state_c, &mut self.v_dcs, bx)?; cbp_u = decode_chroma_intra(br, &self.codes, cbp_u, q, &mut ublk, edge_state_c, &mut self.u_dcs, bx)?; -//if smth decode one more y + if do_alpha { + let mut ablk: [[[i32; 64]; 4]; 4] = [[[0; 64]; 4]; 4]; + cbp_a = decode_luma_intra(br, &self.codes, cbp_a, q, &mut ablk, edge_state_y, &mut self.a_dcs, bx)?; + Bink2DSP::put_mb4(dst, off_a + bx * 32 + 0 + 0 * stride_a, stride_a, &mut ablk[0]); + Bink2DSP::put_mb4(dst, off_a + bx * 32 + 16 + 0 * stride_a, stride_a, &mut ablk[1]); + Bink2DSP::put_mb4(dst, off_a + bx * 32 + 0 + 16 * stride_a, stride_a, &mut ablk[2]); + Bink2DSP::put_mb4(dst, off_a + bx * 32 + 16 + 16 * stride_a, stride_a, &mut ablk[3]); + } //if smth else decode one more y Bink2DSP::put_mb4(dst, off_y + bx * 32 + 0 + 0 * stride_y, stride_y, &mut yblk[0]); Bink2DSP::put_mb4(dst, off_y + bx * 32 + 16 + 0 * stride_y, stride_y, &mut yblk[1]); @@ -1160,6 +1211,14 @@ unimplemented!(); cbp_y = decode_luma_intra_old(br, &self.codes, cbp_y, &mut yblk, edge_state_y, &mut self.y_dcs, bx, &mut q_y)?; cbp_v = decode_chroma_intra_old(br, &self.codes, cbp_v, &mut vblk, edge_state_c, &mut self.v_dcs, bx, &mut q_v)?; cbp_u = decode_chroma_intra_old(br, &self.codes, cbp_u, &mut ublk, edge_state_c, &mut self.u_dcs, bx, &mut q_u)?; + if do_alpha { + let mut ablk: [[[f32; 64]; 4]; 4] = [[[0.0; 64]; 4]; 4]; + cbp_a = decode_luma_intra_old(br, &self.codes, cbp_a, &mut ablk, edge_state_y, &mut self.a_dcs, bx, &mut q_a)?; + Bink2DSP::put_mb4_old(dst, off_a + bx * 32 + 0 + 0 * stride_a, stride_a, &mut ablk[0]); + Bink2DSP::put_mb4_old(dst, off_a + bx * 32 + 16 + 0 * stride_a, stride_a, &mut ablk[1]); + Bink2DSP::put_mb4_old(dst, off_a + bx * 32 + 0 + 16 * stride_a, stride_a, &mut ablk[2]); + Bink2DSP::put_mb4_old(dst, off_a + bx * 32 + 16 + 16 * stride_a, stride_a, &mut ablk[3]); + } Bink2DSP::put_mb4_old(dst, off_y + bx * 32 + 0 + 0 * stride_y, stride_y, &mut yblk[0]); Bink2DSP::put_mb4_old(dst, off_y + bx * 32 + 16 + 0 * stride_y, stride_y, &mut yblk[1]); Bink2DSP::put_mb4_old(dst, off_y + bx * 32 + 0 + 16 * stride_y, stride_y, &mut yblk[2]); @@ -1175,10 +1234,13 @@ unimplemented!(); if let Some(ref ref_pic) = self.ips.get_ref() { for blk_no in 0..4 { let xoff = bx * 32 + (blk_no & 1) * 16; - let yoff = slice_no * bh * 32 + by * 32 + (blk_no & 2) * 8; + let yoff = by * 32 + (blk_no & 2) * 8; Bink2DSP::mc_luma(&mut dst[off_y..], stride_y, ref_pic, xoff, yoff, ZERO_MV, 0)?; Bink2DSP::mc_chroma(&mut dst[off_u..], stride_u, ref_pic, xoff >> 1, yoff >> 1, ZERO_MV, 1)?; Bink2DSP::mc_chroma(&mut dst[off_v..], stride_v, ref_pic, xoff >> 1, yoff >> 1, ZERO_MV, 2)?; + if do_alpha { + Bink2DSP::mc_luma(&mut dst[off_a..], stride_a, ref_pic, xoff, yoff, ZERO_MV, 3)?; + } } } else { return Err(DecoderError::MissingReference); @@ -1194,11 +1256,14 @@ unimplemented!(); if let Some(ref ref_pic) = self.ips.get_ref() { for blk_no in 0..4 { let xoff = bx * 32 + (blk_no & 1) * 16; - let yoff = slice_no * bh * 32 + by * 32 + (blk_no & 2) * 8; + let yoff = by * 32 + (blk_no & 2) * 8; let mv = self.mvs.get_mv(bx, blk_no); Bink2DSP::mc_luma(&mut dst[off_y..], stride_y, ref_pic, xoff, yoff, mv, 0)?; Bink2DSP::mc_chroma(&mut dst[off_u..], stride_u, ref_pic, xoff >> 1, yoff >> 1, mv, 1)?; Bink2DSP::mc_chroma(&mut dst[off_v..], stride_v, ref_pic, xoff >> 1, yoff >> 1, mv, 2)?; + if do_alpha { + Bink2DSP::mc_luma(&mut dst[off_a..], stride_a, ref_pic, xoff, yoff, mv, 3)?; + } } } else { return Err(DecoderError::MissingReference); @@ -1213,11 +1278,14 @@ unimplemented!(); if let Some(ref ref_pic) = self.ips.get_ref() { for blk_no in 0..4 { let xoff = bx * 32 + (blk_no & 1) * 16; - let yoff = slice_no * bh * 32 + by * 32 + (blk_no & 2) * 8; + let yoff = by * 32 + (blk_no & 2) * 8; let mv = self.mvs.get_mv(bx, blk_no); Bink2DSP::mc_luma(&mut dst[off_y..], stride_y, ref_pic, xoff, yoff, mv, 0)?; Bink2DSP::mc_chroma(&mut dst[off_u..], stride_u, ref_pic, xoff >> 1, yoff >> 1, mv, 1)?; Bink2DSP::mc_chroma(&mut dst[off_v..], stride_v, ref_pic, xoff >> 1, yoff >> 1, mv, 2)?; + if do_alpha { + Bink2DSP::mc_luma(&mut dst[off_a..], stride_a, ref_pic, xoff, yoff, mv, 3)?; + } } } else { return Err(DecoderError::MissingReference); @@ -1236,6 +1304,14 @@ unimplemented!(); cbp_v_p = 0; cbp_u_p = 0; } + if do_alpha { + let mut ablk: [[[i32; 64]; 4]; 4] = [[[0; 64]; 4]; 4]; + cbp_a_p = decode_luma_inter(br, &self.codes, cbp_a_p, q, &mut ablk, edge_state_y, &mut self.a_dcs)?; + Bink2DSP::add_mb4(dst, off_a + bx * 32 + 0 + 0 * stride_a, stride_a, &mut ablk[0]); + Bink2DSP::add_mb4(dst, off_a + bx * 32 + 16 + 0 * stride_a, stride_a, &mut ablk[1]); + Bink2DSP::add_mb4(dst, off_a + bx * 32 + 0 + 16 * stride_a, stride_a, &mut ablk[2]); + Bink2DSP::add_mb4(dst, off_a + bx * 32 + 16 + 16 * stride_a, stride_a, &mut ablk[3]); + } Bink2DSP::add_mb4(dst, off_y + bx * 32 + 0 + 0 * stride_y, stride_y, &mut yblk[0]); Bink2DSP::add_mb4(dst, off_y + bx * 32 + 16 + 0 * stride_y, stride_y, &mut yblk[1]); Bink2DSP::add_mb4(dst, off_y + bx * 32 + 0 + 16 * stride_y, stride_y, &mut yblk[2]); @@ -1249,6 +1325,14 @@ unimplemented!(); cbp_y_p = decode_luma_inter_old(br, &self.codes, cbp_y_p, &mut yblk, edge_state_y, &mut self.y_dcs, &mut q_y_p)?; cbp_v_p = decode_chroma_inter_old(br, &self.codes, cbp_v_p, &mut vblk, edge_state_y, &mut self.v_dcs, &mut q_v_p)?; cbp_u_p = decode_chroma_inter_old(br, &self.codes, cbp_u_p, &mut ublk, edge_state_y, &mut self.u_dcs, &mut q_u_p)?; + if do_alpha { + let mut ablk: [[[f32; 64]; 4]; 4] = [[[0.0; 64]; 4]; 4]; + cbp_a_p = decode_luma_inter_old(br, &self.codes, cbp_a_p, &mut ablk, edge_state_y, &mut self.a_dcs, &mut q_a_p)?; + Bink2DSP::add_mb4_old(dst, off_a + bx * 32 + 0 + 0 * stride_a, stride_a, &mut ablk[0]); + Bink2DSP::add_mb4_old(dst, off_a + bx * 32 + 16 + 0 * stride_a, stride_a, &mut ablk[1]); + Bink2DSP::add_mb4_old(dst, off_a + bx * 32 + 0 + 16 * stride_a, stride_a, &mut ablk[2]); + Bink2DSP::add_mb4_old(dst, off_a + bx * 32 + 16 + 16 * stride_a, stride_a, &mut ablk[3]); + } Bink2DSP::add_mb4_old(dst, off_y + bx * 32 + 0 + 0 * stride_y, stride_y, &mut yblk[0]); Bink2DSP::add_mb4_old(dst, off_y + bx * 32 + 16 + 0 * stride_y, stride_y, &mut yblk[1]); Bink2DSP::add_mb4_old(dst, off_y + bx * 32 + 0 + 16 * stride_y, stride_y, &mut yblk[2]); @@ -1279,60 +1363,64 @@ unimplemented!(); let dc2 = Bink2DSP::calc_dc(&src[0 + stride_v * 8..], stride_v); let dc3 = Bink2DSP::calc_dc(&src[8 + stride_v * 8..], stride_v); self.v_dcs.set_dcs(bx, dc1, dc2, dc3); + if do_alpha { + let src = &dst[off_a + bx * 32..]; + let dc5 = Bink2DSP::calc_dc(&src[24..], stride_a); + let dc7 = Bink2DSP::calc_dc(&src[24 + stride_y * 8..], stride_a); + let dc13 = Bink2DSP::calc_dc(&src[24 + stride_y * 16..], stride_a); + let dc10 = Bink2DSP::calc_dc(&src[ 0 + stride_y * 24..], stride_a); + let dc11 = Bink2DSP::calc_dc(&src[ 8 + stride_y * 24..], stride_a); + let dc14 = Bink2DSP::calc_dc(&src[16 + stride_y * 24..], stride_a); + let dc15 = Bink2DSP::calc_dc(&src[24 + stride_y * 24..], stride_a); + self.a_dcs.set_dcs(bx, dc5, dc7, dc13, dc10, dc11, dc14, dc15); + } } } self.qinfo.update_line(); self.y_dcs.update_line(); self.u_dcs.update_line(); self.v_dcs.update_line(); + self.a_dcs.update_line(); self.mvs.update_line(); off_y += stride_y * 32; off_u += stride_u * 16; off_v += stride_v * 16; + off_a += stride_a * 32; row_state = (row_state & !0x190) | ((row_state & 4) << 2); } + start_by = self.slice_h[slice_no]; } Ok(()) } } -fn decode_flags(_br: &mut BitReader, _dst: &mut Vec, _start: usize, _nbits: usize) -> DecoderResult { -unimplemented!(); -/* if !br.read_bool()? { // read bits into byte array? - if nbits == 0 { return Ok(()); } - if nbits < 9 { - shift = in_shift; - pfx = 0; - } else { - shift = in_shift; - loop { - pfx |= br.read(8)? << shift; - dst.push((pfx & 0xFF) as u8); - pfx >>= 8; - shift -= 8; - } +fn decode_flags(br: &mut BitReader, dst: &mut Vec, start: usize, nbits: usize) -> DecoderResult<()> { + if start > 0 { + dst.push(false); + } + if !br.read_bool()? { + for _ in 0..nbits { + let bit = br.read_bool()?; + dst.push(bit); } - let val = br.read(cur_nbits)?; - dst.push(pfx | (val << shift)) } else { let mut cur_bits = nbits; let mut mode = 0; - let mut lastbit = 0; + let mut lastbit = false; while cur_bits > 0 { if !br.read_bool()? { - lastbit = if mode == 3 { lastbit ^ 1 } else { br.read(1)? }; - let val1 = lastval | (lastbit << shift); - let val2 = br.read(if cur_bits > 4 { 4 } else { cur_bits }); - let val = lastval | (lastbit << shift) | (val2 << (shift + 1)); - mode = 2; - if oshift >= 8 { - dst.push((val & 0xFF) as u8); - oshift -= 8; - val >>= 8; + lastbit = if mode == 3 { !lastbit } else { br.read_bool()? }; + dst.push(lastbit); + cur_bits -= 1; + let len = cur_bits.min(4); + for _ in 0..len { + let bit = br.read_bool()?; + dst.push(bit); } - lastval = val; + cur_bits -= len; + mode = 2; } else { - let bread; + let bread: u8; if cur_bits < 4 { bread = 2; } else if cur_bits < 16 { @@ -1340,20 +1428,21 @@ unimplemented!(); } else { bread = 4 | 1; } - lastbit = if mode == 3 { lastbit ^ 1 } else { br.read(1)? }; - run = (if mode == 3 { bread + 1 } else { bread + 2 }).min(cur_bits); - if run == cur_bits { - output lastbit x run - } else { + lastbit = if mode == 3 { !lastbit } else { br.read_bool()? }; + let mut run = (if mode == 3 { bread + 1 } else { bread + 2 } as usize).min(cur_bits); + if run != cur_bits { let add_run = br.read(bread)? as usize; run += add_run; - output lastbit x run - mode = if add_run == (1 << bread) - 1 { 3 } else { 1 }; + mode = if add_run == (1 << bread) - 1 { 1 } else { 3 }; } + for _ in 0..run { + dst.push(lastbit); + } + cur_bits -= run; } } } - Ok(())*/ + Ok(()) } fn get_new_quant(br: &mut BitReader, prev_q: u8) -> DecoderResult { @@ -1409,7 +1498,7 @@ fn decode_luma_inter(br: &mut BitReader, codes: &Bink2Codes, prev_cbp: u32, q: u let dcs = &dcinfo.dcs; for i in 0..4 { decode_acs_4blocks(br, codes, &mut dst[i], BINK2_QUANT_INTER, q, cbp >> (i * 4))?; - for j in 0..4 { dst[i][j][0] = dcs[i * 4 + j]; } + for j in 0..4 { dst[i][j][0] = dcs[i * 4 + j] * 8; } } Ok(cbp) } @@ -1421,7 +1510,7 @@ fn decode_chroma_inter(br: &mut BitReader, codes: &Bink2Codes, prev_cbp: u32, q: dcinfo.predict_inter(min_dc, max_dc); let dcs = &dcinfo.dcs; decode_acs_4blocks(br, codes, dst, BINK2_QUANT_INTER, q, cbp)?; - for i in 0..4 { dst[i][0] = dcs[i]; } + for i in 0..4 { dst[i][0] = dcs[i] * 8; } Ok(cbp) } @@ -1626,11 +1715,11 @@ fn decode_cbp_luma_old(br: &mut BitReader, prev_cbp: u32) -> DecoderResult if !br.read_bool()? { nib1 = br.read(4)?; } - new_cbp = new_cbp | (nib1 << 4); + new_cbp |= nib1 << 4; if !br.read_bool()? { nib1 = br.read(4)?; } - new_cbp = new_cbp | (nib1 << 8); + new_cbp |= nib1 << 8; if !br.read_bool()? { nib1 = br.read(4)?; } @@ -1749,7 +1838,7 @@ fn decode_acs_4blocks_old(br: &mut BitReader, codes: &Bink2Codes, dst: &mut [[f3 level = -level; } let pos = scan[idx]; - dst[blk_no][pos] = (level as f32) * quant_mat[idx] * quant; + dst[blk_no][pos] = (level as f32) * quant_mat[(pos & 7) * 8 + (pos >> 3)] * quant; } idx += 1; if idx >= 64 { break; } @@ -1772,8 +1861,10 @@ fn decode_acs_4blocks_old(br: &mut BitReader, codes: &Bink2Codes, dst: &mut [[f3 Ok(()) } +const KB2H_NUM_SLICES: [usize; 4] = [ 2, 3, 4, 8 ]; + impl NADecoder for Bink2Decoder { - fn init(&mut self, info: Rc) -> DecoderResult<()> { + fn init(&mut self, _supp: &mut NADecoderSupport, info: NACodecInfoRef) -> DecoderResult<()> { if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { let w = vinfo.get_width(); let h = vinfo.get_height(); @@ -1784,36 +1875,60 @@ impl NADecoder for Bink2Decoder { let mut mr = MemoryReader::new_read(&edata); let mut br = ByteReader::new(&mut mr); let magic = br.read_u32be()?; - let _flags = br.read_u32le()?; + let flags = br.read_u32le()?; self.version = magic; + self.has_alpha = (flags & 0x100000) != 0; + + let height_a = (h + 31) & !31; + if self.version <= mktag!(b"KB2f") { + self.num_slices = 2; + self.slice_h[0] = (h + 32) >> 6; + } else if self.version == mktag!(b"KB2g") { + if height_a < 128 { + self.num_slices = 1; + } else { + self.num_slices = 2; + self.slice_h[0] = (h + 31) >> 6; + } + } else { + self.num_slices = KB2H_NUM_SLICES[(flags & 3) as usize]; + let mut start = 0; + let mut end = height_a + 32 * self.num_slices - 1; + for i in 0..self.num_slices - 1 { + start += ((end - start) / (self.num_slices - i)) & !31; + end -= 32; + self.slice_h[i] = start >> 5; + } + } + self.slice_h[self.num_slices - 1] = height_a >> 5; let fmt; + let aplane = if self.has_alpha { Some(NAPixelChromaton::new(0, 0, false, 8, 0, 3, 1)) } else { None }; fmt = NAPixelFormaton::new(ColorModel::YUV(YUVSubmodel::YUVJ), Some(NAPixelChromaton::new(0, 0, false, 8, 0, 0, 1)), Some(NAPixelChromaton::new(1, 1, false, 8, 0, 1, 1)), Some(NAPixelChromaton::new(1, 1, false, 8, 0, 2, 1)), - None, None, - 0, 3); + aplane, None, + if self.has_alpha { FORMATON_FLAG_ALPHA } else { 0 }, + if self.has_alpha { 4 } else { 3 }); let myinfo = NACodecTypeInfo::Video(NAVideoInfo::new(w, h, false, fmt)); - self.info = Rc::new(NACodecInfo::new_ref(info.get_name(), myinfo, info.get_extradata())); + self.info = NACodecInfo::new_ref(info.get_name(), myinfo, info.get_extradata()).into_ref(); Ok(()) } else { Err(DecoderError::InvalidData) } } - fn decode(&mut self, pkt: &NAPacket) -> DecoderResult { + fn decode(&mut self, _supp: &mut NADecoderSupport, pkt: &NAPacket) -> DecoderResult { let src = pkt.get_buffer(); - let mut br = BitReader::new(&src, src.len(), BitReaderMode::LE); + let mut br = BitReader::new(&src, BitReaderMode::LE); let mut buf; self.key_frame = pkt.is_keyframe(); - let bufret = alloc_video_buffer(self.info.get_properties().get_video_info().unwrap(), 4); - if let Err(_) = bufret { return Err(DecoderError::InvalidData); } - let bufinfo = bufret.unwrap(); + let bufinfo = alloc_video_buffer(self.info.get_properties().get_video_info().unwrap(), 5)?; buf = bufinfo.get_vbuf().unwrap(); self.decode_frame_new(&mut br, &mut buf, pkt.is_keyframe())?; @@ -1822,11 +1937,20 @@ impl NADecoder for Bink2Decoder { let mut frm = NAFrame::new_from_pkt(pkt, self.info.clone(), bufinfo); frm.set_frame_type(if self.key_frame { FrameType::I } else { FrameType::P }); - Ok(Rc::new(RefCell::new(frm))) + Ok(frm.into_ref()) } + fn flush(&mut self) { + self.ips.clear(); + } +} + +impl NAOptionHandler for Bink2Decoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } + fn set_options(&mut self, _options: &[NAOption]) { } + fn query_option_value(&self, _name: &str) -> Option { None } } -pub fn get_decoder() -> Box { +pub fn get_decoder() -> Box { Box::new(Bink2Decoder::new()) } @@ -1834,15 +1958,15 @@ pub fn get_decoder() -> Box { mod test { use nihav_core::codecs::RegisteredDecoders; use nihav_core::demuxers::RegisteredDemuxers; - use nihav_core::test::dec_video::*; - use crate::codecs::rad_register_all_codecs; - use crate::demuxers::rad_register_all_demuxers; + use nihav_codec_support::test::dec_video::*; + use crate::rad_register_all_decoders; + use crate::rad_register_all_demuxers; #[test] fn test_bink2() { let mut dmx_reg = RegisteredDemuxers::new(); rad_register_all_demuxers(&mut dmx_reg); let mut dec_reg = RegisteredDecoders::new(); - rad_register_all_codecs(&mut dec_reg); + rad_register_all_decoders(&mut dec_reg); //let file = "assets/RAD/Open_Logos_partial.bik"; //let file = "assets/RAD/sc13_01_partial.bk2";