X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=nihav-itu%2Fsrc%2Fcodecs%2Fh264%2Fdsp%2Fmod.rs;h=a2a58a45dbf8a14b702598d06834eafa28413d56;hb=HEAD;hp=05b46e20e5f34903c14f0f367336eb2d4be1a77a;hpb=932ae27bc58abef098a4be6f05fdd731d47a7653;p=nihav.git diff --git a/nihav-itu/src/codecs/h264/dsp/mod.rs b/nihav-itu/src/codecs/h264/dsp/mod.rs index 05b46e2..a2a58a4 100644 --- a/nihav-itu/src/codecs/h264/dsp/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mod.rs @@ -1,5 +1,5 @@ mod mc; -pub use mc::H264MC; +pub use mc::{H264MC, McBlock}; #[cfg(target_arch="x86_64")] use std::arch::asm; @@ -130,12 +130,12 @@ pub fn idct_luma_dc(blk: &mut [i16; 16], qp: u8) { for i in 0..4 { transform!(luma_dc; blk[i], blk[i + 4], blk[i + 8], blk[i + 12]); } - for row in blk.chunks_mut(4) { + for row in blk.chunks_exact_mut(4) { transform!(luma_dc; row[0], row[1], row[2], row[3]); } } -pub fn idct(blk: &mut [i16; 16], qp: u8, quant_dc: bool) { +pub fn idct_skip_dc(blk: &mut [i16; 16], qp: u8) { const BLK_INDEX: [usize; 16] = [ 0, 2, 0, 2, 2, 1, 2, 1, @@ -144,11 +144,30 @@ pub fn idct(blk: &mut [i16; 16], qp: u8, quant_dc: bool) { ]; let qidx = (qp % 6) as usize; let shift = qp / 6; - let start = if quant_dc { 0 } else { 1 }; - for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()).skip(start) { + for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()).skip(1) { *el = (*el * LEVEL_SCALE[idx][qidx]) << shift; } - for row in blk.chunks_mut(4) { + for row in blk.chunks_exact_mut(4) { + transform!(row[0], row[1], row[2], row[3], 0); + } + for i in 0..4 { + transform!(blk[i], blk[i + 4], blk[i + 8], blk[i + 12], 6); + } +} + +pub fn idct(blk: &mut [i16; 16], qp: u8) { + const BLK_INDEX: [usize; 16] = [ + 0, 2, 0, 2, + 2, 1, 2, 1, + 0, 2, 0, 2, + 2, 1, 2, 1 + ]; + let qidx = (qp % 6) as usize; + let shift = qp / 6; + for (el, &idx) in blk.iter_mut().zip(BLK_INDEX.iter()) { + *el = (*el * LEVEL_SCALE[idx][qidx]) << shift; + } + for row in blk.chunks_exact_mut(4) { transform!(row[0], row[1], row[2], row[3], 0); } for i in 0..4 { @@ -228,7 +247,7 @@ pub fn idct8x8(blk: &mut [i16; 64], qp: u8) { *dst = i32::from(src).wrapping_mul(i32::from(qmat[idx])).wrapping_add(bias) >> shift; } } - for row in tmp.chunks_mut(8) { + for row in tmp.chunks_exact_mut(8) { transform!(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]); } for col in 0..8 { @@ -242,7 +261,7 @@ pub fn idct8x8(blk: &mut [i16; 64], qp: u8) { pub fn add_coeffs(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16]) { let out = &mut dst[offset..][..stride * 3 + 4]; - for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks(4)) { + for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks_exact(4)) { for (dst, src) in line.iter_mut().take(4).zip(src.iter()) { *dst = (i32::from(*dst) + i32::from(*src)).max(0).min(255) as u8; } @@ -251,7 +270,7 @@ pub fn add_coeffs(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16]) pub fn add_coeffs8(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16; 64]) { let out = &mut dst[offset..]; - for (line, src) in out.chunks_mut(stride).take(8).zip(coeffs.chunks(8)) { + for (line, src) in out.chunks_mut(stride).take(8).zip(coeffs.chunks_exact(8)) { for (dst, src) in line.iter_mut().take(8).zip(src.iter()) { *dst = (i32::from(*dst) + i32::from(*src)).max(0).min(255) as u8; }