X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=nihav-qt%2Fsrc%2Fcodecs%2Fsvq3dsp.rs;fp=nihav-qt%2Fsrc%2Fcodecs%2Fsvq3dsp.rs;h=0c4540222fc550640268764aa6ab651215d1e327;hb=4c1582cf2e275af7c0f4a2c1a397fed5b68d31d5;hp=0000000000000000000000000000000000000000;hpb=d341f57a0caf409d7dcc258b396cdee2080be399;p=nihav.git diff --git a/nihav-qt/src/codecs/svq3dsp.rs b/nihav-qt/src/codecs/svq3dsp.rs new file mode 100644 index 0000000..0c45402 --- /dev/null +++ b/nihav-qt/src/codecs/svq3dsp.rs @@ -0,0 +1,620 @@ +use nihav_codec_support::codecs::blockdsp::*; + +#[allow(dead_code)] +#[derive(Debug,Clone,Copy)] +pub enum PredType4x4 { + Ver, + Hor, + DC, + DiagDownLeft, + DiagDownRight, + VerRight, + HorDown, + VerLeft, + HorUp, + LeftDC, + TopDC, + DC128, +} + +#[allow(dead_code)] +#[derive(Debug,Clone,Copy)] +pub enum PredType8x8 { + DC, + Hor, + Ver, + Plane, + LeftDC, + TopDC, + DC128 +} + +pub const INTRA_PRED16: [PredType8x8; 4] = [ + PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane +]; +pub const INTRA_PRED4: [PredType4x4; 9] = [ + PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, + PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight, + PredType4x4::VerRight, PredType4x4::HorDown, + PredType4x4::VerLeft, PredType4x4::HorUp +]; + + +const SVQ3_QUANTS: [i32; 32] = [ + 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718, + 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873, + 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683, + 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533 +]; + +pub fn chroma_transform(blk: &mut [i16; 4]) { + let t0 = blk[0] + blk[2]; + let t1 = blk[0] - blk[2]; + let t2 = blk[1] + blk[3]; + let t3 = blk[1] - blk[3]; + blk[0] = t0 + t2; + blk[1] = t0 - t2; + blk[2] = t1 + t3; + blk[3] = t1 - t3; +} + +pub fn idct_dc_coeffs(blk: &mut [i16; 16], q: u8) { + let quant = SVQ3_QUANTS[q as usize]; + let mut tmp = [0i32; 16]; + for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { + let s0 = i32::from(src[0]); + let s1 = i32::from(src[1]); + let s2 = i32::from(src[2]); + let s3 = i32::from(src[3]); + let t0 = 13 * (s0 + s2); + let t1 = 13 * (s0 - s2); + let t2 = 17 * s1 + 7 * s3; + let t3 = 7 * s1 - 17 * s3; + dst[0] = t0 + t2; + dst[1] = t1 + t3; + dst[2] = t1 - t3; + dst[3] = t0 - t2; + } + for i in 0..4 { + let s0 = tmp[i]; + let s1 = tmp[i + 4]; + let s2 = tmp[i + 4 * 2]; + let s3 = tmp[i + 4 * 3]; + let t0 = 13 * (s0 + s2); + let t1 = 13 * (s0 - s2); + let t2 = 17 * s1 + 7 * s3; + let t3 = 7 * s1 - 17 * s3; + blk[i] = (((t0 + t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; + blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; + blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; + blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; + } +} + +pub fn idct(blk: &mut [i16; 16], q: u8, chroma: bool) { + let quant = SVQ3_QUANTS[q as usize]; + let mut tmp = [0i32; 16]; + let dc = 13 * 13 * if chroma { quant * i32::from(blk[0]) / 2 } else { i32::from(blk[0]) * 1538 }; + blk[0] = 0; + for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { + let s0 = i32::from(src[0]); + let s1 = i32::from(src[1]); + let s2 = i32::from(src[2]); + let s3 = i32::from(src[3]); + let t0 = 13 * (s0 + s2); + let t1 = 13 * (s0 - s2); + let t2 = 17 * s1 + 7 * s3; + let t3 = 7 * s1 - 17 * s3; + dst[0] = t0 + t2; + dst[1] = t1 + t3; + dst[2] = t1 - t3; + dst[3] = t0 - t2; + } + for i in 0..4 { + let s0 = tmp[i]; + let s1 = tmp[i + 4]; + let s2 = tmp[i + 4 * 2]; + let s3 = tmp[i + 4 * 3]; + let t0 = 13 * (s0 + s2); + let t1 = 13 * (s0 - s2); + let t2 = 17 * s1 + 7 * s3; + let t3 = 7 * s1 - 17 * s3; + blk[i] = (((t0 + t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; + blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; + blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; + blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; + } +} + +pub fn add_coeffs(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16]) { + let out = &mut dst[offset..][..stride * 3 + 4]; + for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks(4)) { + for (dst, src) in line.iter_mut().take(4).zip(src.iter()) { + *dst = (i32::from(*dst) + i32::from(*src)).max(0).min(255) as u8; + } + } +} + +pub fn avg(dst: &mut [u8], dstride: usize, + src: &[u8], sstride: usize, bw: usize, bh: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { + for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { + *dst = ((u16::from(*dst) + u16::from(*src) + 1) >> 1) as u8; + } + } +} + +fn clip8(val: i16) -> u8 { val.max(0).min(255) as u8 } + +fn ipred_dc128(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { + for _ in 0..bsize { + for x in 0..bsize { buf[idx + x] = 128; } + idx += stride; + } +} +fn ipred_ver(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { + let oidx = idx - stride; + for _ in 0..bsize { + for x in 0..bsize { buf[idx + x] = buf[oidx + x]; } + idx += stride; + } +} +fn ipred_hor(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { + for _ in 0..bsize { + for x in 0..bsize { buf[idx + x] = buf[idx - 1]; } + idx += stride; + } +} +fn ipred_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { + let mut adc: u16 = 0; + for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } + for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } + let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; + + for _ in 0..bsize { + for x in 0..bsize { buf[idx + x] = dc; } + idx += stride; + } +} +fn ipred_left_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { + let mut adc: u16 = 0; + for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } + let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; + + for _ in 0..bsize { + for x in 0..bsize { buf[idx + x] = dc; } + idx += stride; + } +} +fn ipred_top_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { + let mut adc: u16 = 0; + for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } + let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; + + for _ in 0..bsize { + for x in 0..bsize { buf[idx + x] = dc; } + idx += stride; + } +} + +fn load_top(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { + for i in 0..len { dst[i] = u16::from(buf[idx - stride + i]); } +} +fn load_left(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { + for i in 0..len { dst[i] = u16::from(buf[idx - 1 + i * stride]); } +} + +fn ipred_4x4_ver(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + ipred_ver(buf, idx, stride, 4); +} +fn ipred_4x4_hor(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + ipred_hor(buf, idx, stride, 4); +} +fn ipred_4x4_diag_down_left(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + let mut t: [u16; 8] = [0; 8]; + let mut l: [u16; 8] = [0; 8]; + load_top(&mut t, buf, idx, stride, 4); + load_left(&mut l, buf, idx, stride, 4); + let a = ((l[1] + t[1]) >> 1) as u8; + let b = ((l[2] + t[2]) >> 1) as u8; + let c = ((l[3] + t[3]) >> 1) as u8; + + let dst = &mut buf[idx..]; + dst[0] = a; dst[1] = b; dst[2] = c; dst[3] = c; + let dst = &mut buf[idx + stride..]; + dst[0] = b; dst[1] = c; dst[2] = c; dst[3] = c; + let dst = &mut buf[idx + stride * 2..]; + dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; + let dst = &mut buf[idx + stride * 3..]; + dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; +} +fn ipred_4x4_diag_down_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + let mut t: [u16; 5] = [0; 5]; + let mut l: [u16; 5] = [0; 5]; + load_top(&mut t, buf, idx - 1, stride, 5); + load_left(&mut l, buf, idx - stride, stride, 5); + let dst = &mut buf[idx..]; + + for j in 0..4 { + for i in 0..j { + dst[i + j * stride] = ((l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2) as u8; + } + dst[j + j * stride] = ((l[1] + 2 * l[0] + t[1] + 2) >> 2) as u8; + for i in (j+1)..4 { + dst[i + j * stride] = ((t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2) as u8; + } + } +} +fn ipred_4x4_ver_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + let mut t: [u16; 5] = [0; 5]; + let mut l: [u16; 5] = [0; 5]; + load_top(&mut t, buf, idx - 1, stride, 5); + load_left(&mut l, buf, idx - stride, stride, 5); + let dst = &mut buf[idx..]; + + for j in 0..4 { + for i in 0..4 { + let zvr = ((2 * i) as i8) - (j as i8); + let pix; + if zvr >= 0 { + if (zvr & 1) == 0 { + pix = (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1; + } else { + pix = (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2; + } + } else { + if zvr == -1 { + pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; + } else { + pix = (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2; + } + } + dst[i + j * stride] = pix as u8; + } + } +} +fn ipred_4x4_ver_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { + let mut t: [u16; 8] = [0; 8]; + load_top(&mut t, buf, idx, stride, 4); + for i in 0..4 { t[i + 4] = u16::from(tr[i]); } + let dst = &mut buf[idx..]; + + dst[0 + 0 * stride] = ((t[0] + t[1] + 1) >> 1) as u8; + let pix = ((t[1] + t[2] + 1) >> 1) as u8; + dst[1 + 0 * stride] = pix; + dst[0 + 2 * stride] = pix; + let pix = ((t[2] + t[3] + 1) >> 1) as u8; + dst[2 + 0 * stride] = pix; + dst[1 + 2 * stride] = pix; + let pix = ((t[3] + t[4] + 1) >> 1) as u8; + dst[3 + 0 * stride] = pix; + dst[2 + 2 * stride] = pix; + dst[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8; + dst[0 + 1 * stride] = ((t[0] + 2*t[1] + t[2] + 2) >> 2) as u8; + let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8; + dst[1 + 1 * stride] = pix; + dst[0 + 3 * stride] = pix; + let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8; + dst[2 + 1 * stride] = pix; + dst[1 + 3 * stride] = pix; + let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8; + dst[3 + 1 * stride] = pix; + dst[2 + 3 * stride] = pix; + dst[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; +} +fn ipred_4x4_hor_down(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + let mut t: [u16; 5] = [0; 5]; + let mut l: [u16; 5] = [0; 5]; + load_top(&mut t, buf, idx - 1, stride, 5); + load_left(&mut l, buf, idx - stride, stride, 5); + let dst = &mut buf[idx..]; + + for j in 0..4 { + for i in 0..4 { + let zhd = ((2 * j) as i8) - (i as i8); + let pix; + if zhd >= 0 { + if (zhd & 1) == 0 { + pix = (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1; + } else { + pix = (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2; + } + } else { + if zhd == -1 { + pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; + } else { + pix = (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2; + } + } + dst[i + j * stride] = pix as u8; + } + } +} +fn ipred_4x4_hor_up(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + let mut l: [u16; 8] = [0; 8]; + load_left(&mut l, buf, idx, stride, 8); + let dst = &mut buf[idx..]; + + dst[0 + 0 * stride] = ((l[0] + l[1] + 1) >> 1) as u8; + dst[1 + 0 * stride] = ((l[0] + 2*l[1] + l[2] + 2) >> 2) as u8; + let pix = ((l[1] + l[2] + 1) >> 1) as u8; + dst[2 + 0 * stride] = pix; + dst[0 + 1 * stride] = pix; + let pix = ((l[1] + 2*l[2] + l[3] + 2) >> 2) as u8; + dst[3 + 0 * stride] = pix; + dst[1 + 1 * stride] = pix; + let pix = ((l[2] + l[3] + 1) >> 1) as u8; + dst[2 + 1 * stride] = pix; + dst[0 + 2 * stride] = pix; + let pix = ((l[2] + 3*l[3] + 2) >> 2) as u8; + dst[3 + 1 * stride] = pix; + dst[1 + 2 * stride] = pix; + dst[3 + 2 * stride] = l[3] as u8; + dst[1 + 3 * stride] = l[3] as u8; + dst[0 + 3 * stride] = l[3] as u8; + dst[2 + 2 * stride] = l[3] as u8; + dst[2 + 3 * stride] = l[3] as u8; + dst[3 + 3 * stride] = l[3] as u8; +} +fn ipred_4x4_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + ipred_dc(buf, idx, stride, 4, 3); +} +fn ipred_4x4_left_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + ipred_left_dc(buf, idx, stride, 4, 2); +} +fn ipred_4x4_top_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + ipred_top_dc(buf, idx, stride, 4, 2); +} +fn ipred_4x4_dc128(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { + ipred_dc128(buf, idx, stride, 4); +} + +fn ipred_8x8_ver(buf: &mut [u8], idx: usize, stride: usize) { + ipred_ver(buf, idx, stride, 8); +} +fn ipred_8x8_hor(buf: &mut [u8], idx: usize, stride: usize) { + ipred_hor(buf, idx, stride, 8); +} +fn ipred_8x8_dc(buf: &mut [u8], idx: usize, stride: usize) { + let mut t: [u16; 8] = [0; 8]; + load_top(&mut t, buf, idx, stride, 8); + let mut l: [u16; 8] = [0; 8]; + load_left(&mut l, buf, idx, stride, 8); + + let dc0 = ((t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3) as u8; + let sum1 = t[4] + t[5] + t[6] + t[7]; + let dc1 = ((sum1 + 2) >> 2) as u8; + let sum2 = l[4] + l[5] + l[6] + l[7]; + let dc2 = ((sum2 + 2) >> 2) as u8; + let dc3 = ((sum1 + sum2 + 4) >> 3) as u8; + + let dst = &mut buf[idx..]; + for row in dst.chunks_mut(stride).take(4) { + row[..4].copy_from_slice(&[dc0; 4]); + row[4..8].copy_from_slice(&[dc1; 4]); + } + for row in dst.chunks_mut(stride).skip(4).take(4) { + row[..4].copy_from_slice(&[dc2; 4]); + row[4..8].copy_from_slice(&[dc3; 4]); + } +} +fn ipred_8x8_left_dc(buf: &mut [u8], idx: usize, stride: usize) { + ipred_left_dc(buf, idx, stride, 8, 3); +} +fn ipred_8x8_top_dc(buf: &mut [u8], idx: usize, stride: usize) { + ipred_top_dc(buf, idx, stride, 8, 3); +} +fn ipred_8x8_dc128(buf: &mut [u8], idx: usize, stride: usize) { + ipred_dc128(buf, idx, stride, 8); +} +fn ipred_8x8_plane(_buf: &mut [u8], _idx: usize, _stride: usize) { + unreachable!(); +/* let mut h: i16 = 0; + let mut v: i16 = 0; + let idx0 = idx + 3 - stride; + let mut idx1 = idx + 4 * stride - 1; + let mut idx2 = idx + 2 * stride - 1; + for i in 0..4 { + let i1 = (i + 1) as i16; + h += i1 * (i16::from(buf[idx0 + i + 1]) - i16::from(buf[idx0 - i - 1])); + v += i1 * (i16::from(buf[idx1]) - i16::from(buf[idx2])); + idx1 += stride; + idx2 -= stride; + } + let b = (17 * h + 16) >> 5; + let c = (17 * v + 16) >> 5; + let mut a = 16 * (i16::from(buf[idx - 1 + 7 * stride]) + i16::from(buf[idx + 7 - stride])) - 3 * (b + c) + 16; + for line in buf[idx..].chunks_mut(stride).take(8) { + let mut acc = a; + for el in line.iter_mut().take(8) { + *el = clip8(acc >> 5); + acc += b; + } + a += c; + }*/ +} + +fn ipred_16x16_ver(buf: &mut [u8], idx: usize, stride: usize) { + ipred_ver(buf, idx, stride, 16); +} +fn ipred_16x16_hor(buf: &mut [u8], idx: usize, stride: usize) { + ipred_hor(buf, idx, stride, 16); +} +fn ipred_16x16_dc(buf: &mut [u8], idx: usize, stride: usize) { + ipred_dc(buf, idx, stride, 16, 5); +} +fn ipred_16x16_left_dc(buf: &mut [u8], idx: usize, stride: usize) { + ipred_left_dc(buf, idx, stride, 16, 4); +} +fn ipred_16x16_top_dc(buf: &mut [u8], idx: usize, stride: usize) { + ipred_top_dc(buf, idx, stride, 16, 4); +} +fn ipred_16x16_dc128(buf: &mut [u8], idx: usize, stride: usize) { + ipred_dc128(buf, idx, stride, 16); +} +fn ipred_16x16_plane(buf: &mut [u8], mut idx: usize, stride: usize) { + let idx0 = idx + 7 - stride; + let mut idx1 = idx + 8 * stride - 1; + let mut idx2 = idx1 - 2 * stride; + + let mut h = i16::from(buf[idx0 + 1]) - i16::from(buf[idx0 - 1]); + let mut v = i16::from(buf[idx1]) - i16::from(buf[idx2]); + + for k in 2..9 { + idx1 += stride; + idx2 -= stride; + h += (k as i16) * (i16::from(buf[idx0 + k]) - i16::from(buf[idx0 - k])); + v += (k as i16) * (i16::from(buf[idx1]) - i16::from(buf[idx2])); + } + h = 5 * (h / 4) / 16; + v = 5 * (v / 4) / 16; + std::mem::swap(&mut h, &mut v); + + let mut a = 16 * (i16::from(buf[idx - 1 + 15 * stride]) + i16::from(buf[idx + 15 - stride]) + 1) - 7 * (v + h); + + for _ in 0..16 { + let mut b = a; + a += v; + + for dst in buf[idx..].chunks_mut(4).take(4) { + dst[0] = clip8((b ) >> 5); + dst[1] = clip8((b + h) >> 5); + dst[2] = clip8((b + 2*h) >> 5); + dst[3] = clip8((b + 3*h) >> 5); + b += h * 4; + } + idx += stride; + } +} + +pub type IPred4x4Func = fn(buf: &mut [u8], off: usize, stride: usize, tr: &[u8]); +pub type IPred8x8Func = fn(buf: &mut [u8], off: usize, stride: usize); + +pub const IPRED_FUNCS4X4: [IPred4x4Func; 12] = [ + ipred_4x4_ver, ipred_4x4_hor, ipred_4x4_dc, + ipred_4x4_diag_down_left, ipred_4x4_diag_down_right, + ipred_4x4_ver_right, ipred_4x4_hor_down, ipred_4x4_ver_left, ipred_4x4_hor_up, + ipred_4x4_left_dc, ipred_4x4_top_dc, ipred_4x4_dc128 +]; + +pub const IPRED_FUNCS8X8: [IPred8x8Func; 7] = [ + ipred_8x8_dc, ipred_8x8_hor, ipred_8x8_ver, ipred_8x8_plane, + ipred_8x8_left_dc, ipred_8x8_top_dc, ipred_8x8_dc128 +]; + +pub const IPRED_FUNCS16X16: [IPred8x8Func; 7] = [ + ipred_16x16_dc, ipred_16x16_hor, ipred_16x16_ver, ipred_16x16_plane, + ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128 +]; + +fn tpel_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { + dline[..bw].copy_from_slice(&sline[..bw]); + } +} + +fn interp2(val: u32) -> u8 { + (((val + 1) * 683) >> 11) as u8 +} + +fn interp4(val: u32) -> u8 { + (((val + 6) * 2731) >> 15) as u8 +} + +fn tpel_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { + let mut last = u32::from(sline[0]); + for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { + let new = u32::from(*src); + *dst = interp2(last * 2 + new); + last = new; + } + } +} + +fn tpel_interp02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { + let mut last = u32::from(sline[0]); + for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { + let new = u32::from(*src); + *dst = interp2(last + new * 2); + last = new; + } + } +} + +fn tpel_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + let src1 = &src[sstride..]; + for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { + for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { + *dst = interp2(u32::from(*s0) * 2 + u32::from(*s1)); + } + } +} + +fn tpel_interp11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + let mut sidx0 = 0; + let mut sidx1 = sstride; + for dline in dst.chunks_mut(dstride).take(bh) { + for (x, dst) in dline.iter_mut().take(bw).enumerate() { + *dst = interp4(u32::from(src[sidx0 + x]) * 4 + u32::from(src[sidx0 + x + 1]) * 3 + + u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 2); + } + sidx0 += sstride; + sidx1 += sstride; + } +} + +fn tpel_interp12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + let mut sidx0 = 0; + let mut sidx1 = sstride; + for dline in dst.chunks_mut(dstride).take(bh) { + for (x, dst) in dline.iter_mut().take(bw).enumerate() { + *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 4 + + u32::from(src[sidx1 + x]) * 2 + u32::from(src[sidx1 + x + 1]) * 3); + } + sidx0 += sstride; + sidx1 += sstride; + } +} + +fn tpel_interp20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + let src1 = &src[sstride..]; + for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { + for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { + *dst = interp2(u32::from(*s0) + u32::from(*s1) * 2); + } + } +} + +fn tpel_interp21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + let mut sidx0 = 0; + let mut sidx1 = sstride; + for dline in dst.chunks_mut(dstride).take(bh) { + for (x, dst) in dline.iter_mut().take(bw).enumerate() { + *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 2 + + u32::from(src[sidx1 + x]) * 4 + u32::from(src[sidx1 + x + 1]) * 3); + } + sidx0 += sstride; + sidx1 += sstride; + } +} + +fn tpel_interp22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { + let mut sidx0 = 0; + let mut sidx1 = sstride; + for dline in dst.chunks_mut(dstride).take(bh) { + for (x, dst) in dline.iter_mut().take(bw).enumerate() { + *dst = interp4(u32::from(src[sidx0 + x]) * 2 + u32::from(src[sidx0 + x + 1]) * 3 + + u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 4); + } + sidx0 += sstride; + sidx1 += sstride; + } +} + +pub const THIRDPEL_INTERP_FUNCS: &[BlkInterpFunc] = &[ + tpel_interp00, tpel_interp01, tpel_interp02, + tpel_interp10, tpel_interp11, tpel_interp12, + tpel_interp20, tpel_interp21, tpel_interp22 +];