+struct VivoBlockDSP {
+ dct_tab: [[f64; 64]; 64],
+}
+
+fn gen_coef(i: usize, j: usize) -> f64 {
+ if i == 0 {
+ 1.0 / 8.0f64.sqrt()
+ } else {
+ (((j as f64) + 0.5) * (i as f64) * std::f64::consts::PI / 8.0).cos() * 0.5
+ }
+}
+
+impl VivoBlockDSP {
+ fn new() -> Self {
+ let mut dct_tab = [[0.0; 64]; 64];
+ for i in 0..8 {
+ for j in 0..8 {
+ for k in 0..8 {
+ for l in 0..8 {
+ let c0 = gen_coef(i, k);
+ let c1 = gen_coef(j, l);
+ let c = c0 * c1 * 64.0;
+ dct_tab[i * 8 + j][k * 8 + l] = c;
+ }
+ }
+ }
+ }
+ Self {
+ dct_tab
+ }
+ }
+}
+
+#[allow(clippy::erasing_op)]
+#[allow(clippy::identity_op)]
+fn deblock_hor(buf: &mut [u8], stride: usize, off: usize, clip_tab: &[i16; 64]) {
+ for x in 0..8 {
+ let p1 = i16::from(buf[off - 2 * stride + x]);
+ let p0 = i16::from(buf[off - 1 * stride + x]);
+ let q0 = i16::from(buf[off + 0 * stride + x]);
+ let q1 = i16::from(buf[off + 1 * stride + x]);
+ let diff = (3 * (p1 - q1) + 8 * (q0 - p0)) >> 4;
+ if (diff != 0) && (diff > -32) && (diff < 32) {
+ let delta = clip_tab[(diff + 32) as usize];
+ buf[off - 1 * stride + x] = (p0 + delta).max(0).min(255) as u8;
+ buf[off + 0 * stride + x] = (q0 - delta).max(0).min(255) as u8;
+ }
+ }
+}
+
+#[allow(clippy::identity_op)]
+fn deblock_ver(buf: &mut [u8], stride: usize, off: usize, clip_tab: &[i16; 64]) {
+ for y in 0..8 {
+ let p1 = i16::from(buf[off - 2 + y * stride]);
+ let p0 = i16::from(buf[off - 1 + y * stride]);
+ let q0 = i16::from(buf[off + 0 + y * stride]);
+ let q1 = i16::from(buf[off + 1 + y * stride]);
+ let diff = (3 * (p1 - q1) + 8 * (q0 - p0)) >> 4;
+ if (diff != 0) && (diff > -32) && (diff < 32) {
+ let delta = clip_tab[(diff + 32) as usize];
+ buf[off - 1 + y * stride] = (p0 + delta).max(0).min(255) as u8;
+ buf[off + y * stride] = (q0 - delta).max(0).min(255) as u8;
+ }
+ }
+}
+
+fn gen_clip_tab(clip_tab: &mut [i16; 64], q: u8) {
+ let q = i16::from(q);
+ *clip_tab = [0; 64];
+ let lim = (q + 2) >> 1;
+ for i in 0..lim {
+ clip_tab[(32 - i) as usize] = -i;
+ clip_tab[(32 + i) as usize] = i;
+ }
+ for i in lim..q {
+ let val = q - i;
+ clip_tab[(32 - i) as usize] = -val;
+ clip_tab[(32 + i) as usize] = val;
+ }
+}
+
+impl BlockDSP for VivoBlockDSP {
+ fn idct(&self, blk: &mut [i16; 64]) {
+ let mut tmp = [0i32; 64];
+ for (i, &el) in blk.iter().enumerate() {
+ if el != 0 {
+ let cmat = self.dct_tab[i];
+ for (dst, &src) in tmp.iter_mut().zip(cmat.iter()) {
+ *dst += (src * (el as f64)) as i32;
+ }
+ }
+ }
+ for (dst, &src) in blk.iter_mut().zip(tmp.iter()) {
+ *dst = ((src + 0x20) >> 6) as i16;
+ }
+ }
+ fn copy_blocks(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mv: MV) {
+ let mode = ((mv.x & 1) + (mv.y & 1) * 2) as usize;
+ let cmode = (if (mv.x & 3) != 0 { 1 } else { 0 }) + (if (mv.y & 3) != 0 { 2 } else { 0 });
+
+ let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap();
+
+ blockdsp::copy_block(&mut dst, src.clone(), 0, xpos, ypos, mv.x >> 1, mv.y >> 1, 16, 16, 0, 1, mode, H263_INTERP_FUNCS);
+ blockdsp::copy_block(&mut dst, src.clone(), 1, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_FUNCS);
+ blockdsp::copy_block(&mut dst, src, 2, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_FUNCS);
+ }
+ fn copy_blocks8x8(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mvs: &[MV; 4]) {
+ let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap();
+
+ for (i, mv) in mvs.iter().enumerate() {
+ let xadd = (i & 1) * 8;
+ let yadd = (i & 2) * 4;
+ let mode = ((mv.x & 1) + (mv.y & 1) * 2) as usize;
+
+ blockdsp::copy_block(&mut dst, src.clone(), 0, xpos + xadd, ypos + yadd, mv.x >> 1, mv.y >> 1, 8, 8, 0, 1, mode, H263_INTERP_FUNCS);
+ }
+
+ let sum_mv = mvs[0] + mvs[1] + mvs[2] + mvs[3];
+ let cmx = (sum_mv.x >> 3) + H263_CHROMA_ROUND[(sum_mv.x & 0xF) as usize];
+ let cmy = (sum_mv.y >> 3) + H263_CHROMA_ROUND[(sum_mv.y & 0xF) as usize];
+ let mode = ((cmx & 1) + (cmy & 1) * 2) as usize;
+ for plane in 1..3 {
+ blockdsp::copy_block(&mut dst, src.clone(), plane, xpos >> 1, ypos >> 1, cmx >> 1, cmy >> 1, 8, 8, 0, 1, mode, H263_INTERP_FUNCS);
+ }
+ }
+ fn avg_blocks(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mv: MV) {
+ let mode = ((mv.x & 1) + (mv.y & 1) * 2) as usize;
+ let cmode = (if (mv.x & 3) != 0 { 1 } else { 0 }) + (if (mv.y & 3) != 0 { 2 } else { 0 });
+
+ let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap();
+
+ blockdsp::copy_block(&mut dst, src.clone(), 0, xpos, ypos, mv.x >> 1, mv.y >> 1, 16, 16, 0, 1, mode, H263_INTERP_AVG_FUNCS);
+ blockdsp::copy_block(&mut dst, src.clone(), 1, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_AVG_FUNCS);
+ blockdsp::copy_block(&mut dst, src, 2, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_AVG_FUNCS);
+ }
+ fn avg_blocks8x8(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mvs: &[MV; 4]) {
+ let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap();
+
+ for (i, mv) in mvs.iter().enumerate() {
+ let xadd = (i & 1) * 8;
+ let yadd = (i & 2) * 4;
+ let mode = ((mv.x & 1) + (mv.y & 1) * 2) as usize;
+
+ blockdsp::copy_block(&mut dst, src.clone(), 0, xpos + xadd, ypos + yadd, mv.x >> 1, mv.y >> 1, 8, 8, 0, 1, mode, H263_INTERP_AVG_FUNCS);
+ }
+
+ let sum_mv = mvs[0] + mvs[1] + mvs[2] + mvs[3];
+ let cmx = (sum_mv.x >> 3) + H263_CHROMA_ROUND[(sum_mv.x & 0xF) as usize];
+ let cmy = (sum_mv.y >> 3) + H263_CHROMA_ROUND[(sum_mv.y & 0xF) as usize];
+ let mode = ((cmx & 1) + (cmy & 1) * 2) as usize;
+ for plane in 1..3 {
+ blockdsp::copy_block(&mut dst, src.clone(), plane, xpos >> 1, ypos >> 1, cmx >> 1, cmy >> 1, 8, 8, 0, 1, mode, H263_INTERP_AVG_FUNCS);
+ }
+ }
+ fn filter_row(&self, buf: &mut NAVideoBuffer<u8>, mb_y: usize, mb_w: usize, cbpi: &CBPInfo) {
+ let ystride = buf.get_stride(0);
+ let ustride = buf.get_stride(1);
+ let vstride = buf.get_stride(2);
+ let yoff = buf.get_offset(0) + mb_y * 16 * ystride;
+ let uoff = buf.get_offset(1) + mb_y * 8 * ustride;
+ let voff = buf.get_offset(2) + mb_y * 8 * vstride;
+ let buf = buf.get_data_mut().unwrap();
+
+ let mut clip_tab = [0i16; 64];
+ let mut last_q = 0;
+ let mut off = yoff;
+ for mb_x in 0..mb_w {
+ let q = cbpi.get_q(mb_w + mb_x);
+ if q != last_q {
+ gen_clip_tab(&mut clip_tab, q);
+ last_q = q;
+ }
+ if mb_y != 0 {
+ deblock_hor(buf, ystride, off, &clip_tab);
+ deblock_hor(buf, ystride, off + 8, &clip_tab);
+ }
+ deblock_hor(buf, ystride, off + 8 * ystride, &clip_tab);
+ deblock_hor(buf, ystride, off + 8 * ystride + 8, &clip_tab);
+ off += 16;
+ }
+ let mut off = yoff;
+ for mb_x in 0..mb_w {
+ let q = cbpi.get_q(mb_w + mb_x);
+ if q != last_q {
+ gen_clip_tab(&mut clip_tab, q);
+ last_q = q;
+ }
+ if mb_y != 0 {
+ let qtop = cbpi.get_q(mb_x);
+ if qtop != last_q {
+ gen_clip_tab(&mut clip_tab, qtop);
+ last_q = qtop;
+ }
+ if mb_x != 0 {
+ deblock_ver(buf, ystride, off - 8 * ystride, &clip_tab);
+ }
+ deblock_ver(buf, ystride, off - 8 * ystride + 8, &clip_tab);
+ }
+ if mb_x != 0 {
+ deblock_ver(buf, ystride, off, &clip_tab);
+ deblock_ver(buf, ystride, off + 8, &clip_tab);
+ }
+ off += 16;
+ }
+ if mb_y != 0 {
+ for mb_x in 0..mb_w {
+ let q = cbpi.get_q(mb_w + mb_x);
+ if q != last_q {
+ gen_clip_tab(&mut clip_tab, q);
+ last_q = q;
+ }
+ deblock_hor(buf, ustride, uoff + mb_x * 8, &clip_tab);
+ deblock_hor(buf, vstride, voff + mb_x * 8, &clip_tab);
+ }
+ let offu = uoff - 8 * ustride;
+ let offv = voff - 8 * vstride;
+ for mb_x in 1..mb_w {
+ let qt = cbpi.get_q(mb_x);
+ if qt != last_q {
+ gen_clip_tab(&mut clip_tab, qt);
+ last_q = qt;
+ }
+ deblock_ver(buf, ustride, offu + mb_x * 8, &clip_tab);
+ deblock_ver(buf, vstride, offv + mb_x * 8, &clip_tab);
+ }
+ }
+ }
+}
+