| 1 | use nihav_core::codecs::*; |
| 2 | use nihav_core::codecs::blockdsp::*; |
| 3 | |
| 4 | #[derive(Clone,Copy,Debug,PartialEq)] |
| 5 | #[allow(dead_code)] |
| 6 | pub enum VPMBType { |
| 7 | Intra, |
| 8 | InterNoMV, |
| 9 | InterMV, |
| 10 | InterNearest, |
| 11 | InterNear, |
| 12 | InterFourMV, |
| 13 | GoldenNoMV, |
| 14 | GoldenMV, |
| 15 | GoldenNearest, |
| 16 | GoldenNear, |
| 17 | } |
| 18 | |
| 19 | #[allow(dead_code)] |
| 20 | impl VPMBType { |
| 21 | pub fn is_intra(self) -> bool { self == VPMBType::Intra } |
| 22 | pub fn get_ref_id(self) -> u8 { |
| 23 | match self { |
| 24 | VPMBType::Intra => 0, |
| 25 | VPMBType::InterNoMV | |
| 26 | VPMBType::InterMV | |
| 27 | VPMBType::InterNearest | |
| 28 | VPMBType::InterNear | |
| 29 | VPMBType::InterFourMV => 1, |
| 30 | _ => 2, |
| 31 | } |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | impl Default for VPMBType { |
| 36 | fn default() -> Self { VPMBType::Intra } |
| 37 | } |
| 38 | |
| 39 | #[derive(Default)] |
| 40 | pub struct VPShuffler { |
| 41 | lastframe: Option<NAVideoBufferRef<u8>>, |
| 42 | goldframe: Option<NAVideoBufferRef<u8>>, |
| 43 | } |
| 44 | |
| 45 | impl VPShuffler { |
| 46 | pub fn new() -> Self { VPShuffler { lastframe: None, goldframe: None } } |
| 47 | pub fn clear(&mut self) { self.lastframe = None; self.goldframe = None; } |
| 48 | pub fn add_frame(&mut self, buf: NAVideoBufferRef<u8>) { |
| 49 | self.lastframe = Some(buf); |
| 50 | } |
| 51 | pub fn add_golden_frame(&mut self, buf: NAVideoBufferRef<u8>) { |
| 52 | self.goldframe = Some(buf); |
| 53 | } |
| 54 | pub fn get_last(&mut self) -> Option<NAVideoBufferRef<u8>> { |
| 55 | if let Some(ref frm) = self.lastframe { |
| 56 | Some(frm.clone()) |
| 57 | } else { |
| 58 | None |
| 59 | } |
| 60 | } |
| 61 | pub fn get_golden(&mut self) -> Option<NAVideoBufferRef<u8>> { |
| 62 | if let Some(ref frm) = self.goldframe { |
| 63 | Some(frm.clone()) |
| 64 | } else { |
| 65 | None |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | const C1S7: i32 = 64277; |
| 71 | const C2S6: i32 = 60547; |
| 72 | const C3S5: i32 = 54491; |
| 73 | const C4S4: i32 = 46341; |
| 74 | const C5S3: i32 = 36410; |
| 75 | const C6S2: i32 = 25080; |
| 76 | const C7S1: i32 = 12785; |
| 77 | |
| 78 | fn mul16(a: i32, b: i32) -> i32 { |
| 79 | (a * b) >> 16 |
| 80 | } |
| 81 | |
| 82 | macro_rules! idct_step { |
| 83 | ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, |
| 84 | $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr, |
| 85 | $bias:expr, $shift:expr, $otype:ty) => { |
| 86 | let t_a = mul16(C1S7, i32::from($s1)) + mul16(C7S1, i32::from($s7)); |
| 87 | let t_b = mul16(C7S1, i32::from($s1)) - mul16(C1S7, i32::from($s7)); |
| 88 | let t_c = mul16(C3S5, i32::from($s3)) + mul16(C5S3, i32::from($s5)); |
| 89 | let t_d = mul16(C3S5, i32::from($s5)) - mul16(C5S3, i32::from($s3)); |
| 90 | let t_a1 = mul16(C4S4, t_a - t_c); |
| 91 | let t_b1 = mul16(C4S4, t_b - t_d); |
| 92 | let t_c = t_a + t_c; |
| 93 | let t_d = t_b + t_d; |
| 94 | let t_e = mul16(C4S4, i32::from($s0 + $s4)) + $bias; |
| 95 | let t_f = mul16(C4S4, i32::from($s0 - $s4)) + $bias; |
| 96 | let t_g = mul16(C2S6, i32::from($s2)) + mul16(C6S2, i32::from($s6)); |
| 97 | let t_h = mul16(C6S2, i32::from($s2)) - mul16(C2S6, i32::from($s6)); |
| 98 | let t_e1 = t_e - t_g; |
| 99 | let t_g = t_e + t_g; |
| 100 | let t_a = t_f + t_a1; |
| 101 | let t_f = t_f - t_a1; |
| 102 | let t_b = t_b1 - t_h; |
| 103 | let t_h = t_b1 + t_h; |
| 104 | |
| 105 | $d0 = ((t_g + t_c) >> $shift) as $otype; |
| 106 | $d7 = ((t_g - t_c) >> $shift) as $otype; |
| 107 | $d1 = ((t_a + t_h) >> $shift) as $otype; |
| 108 | $d2 = ((t_a - t_h) >> $shift) as $otype; |
| 109 | $d3 = ((t_e1 + t_d) >> $shift) as $otype; |
| 110 | $d4 = ((t_e1 - t_d) >> $shift) as $otype; |
| 111 | $d5 = ((t_f + t_b) >> $shift) as $otype; |
| 112 | $d6 = ((t_f - t_b) >> $shift) as $otype; |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | pub fn vp_idct(coeffs: &mut [i16; 64]) { |
| 117 | let mut tmp = [0i32; 64]; |
| 118 | for (src, dst) in coeffs.chunks(8).zip(tmp.chunks_mut(8)) { |
| 119 | idct_step!(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7], |
| 120 | dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], 0, 0, i32); |
| 121 | } |
| 122 | let src = &tmp; |
| 123 | let dst = coeffs; |
| 124 | for i in 0..8 { |
| 125 | idct_step!(src[0 * 8 + i], src[1 * 8 + i], src[2 * 8 + i], src[3 * 8 + i], |
| 126 | src[4 * 8 + i], src[5 * 8 + i], src[6 * 8 + i], src[7 * 8 + i], |
| 127 | dst[0 * 8 + i], dst[1 * 8 + i], dst[2 * 8 + i], dst[3 * 8 + i], |
| 128 | dst[4 * 8 + i], dst[5 * 8 + i], dst[6 * 8 + i], dst[7 * 8 + i], 8, 4, i16); |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | pub fn vp_idct_dc(coeffs: &mut [i16; 64]) { |
| 133 | let dc = ((mul16(C4S4, mul16(C4S4, i32::from(coeffs[0]))) + 8) >> 4) as i16; |
| 134 | for i in 0..64 { |
| 135 | coeffs[i] = dc; |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | pub fn unquant(coeffs: &mut [i16; 64], qmat: &[i16; 64]) { |
| 140 | for i in 1..64 { |
| 141 | coeffs[i] = coeffs[i].wrapping_mul(qmat[i]); |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | pub fn vp_put_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 146 | vp_idct(coeffs); |
| 147 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 148 | for y in 0..8 { |
| 149 | for x in 0..8 { |
| 150 | frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8; |
| 151 | } |
| 152 | off += frm.stride[plane]; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | pub fn vp_put_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 157 | vp_idct_dc(coeffs); |
| 158 | let dc = (coeffs[0] + 128).min(255).max(0) as u8; |
| 159 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 160 | for _ in 0..8 { |
| 161 | for x in 0..8 { |
| 162 | frm.data[off + x] = dc; |
| 163 | } |
| 164 | off += frm.stride[plane]; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | pub fn vp_add_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 169 | vp_idct(coeffs); |
| 170 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 171 | for y in 0..8 { |
| 172 | for x in 0..8 { |
| 173 | frm.data[off + x] = (coeffs[x + y * 8] + (frm.data[off + x] as i16)).min(255).max(0) as u8; |
| 174 | } |
| 175 | off += frm.stride[plane]; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | pub fn vp_add_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 180 | vp_idct_dc(coeffs); |
| 181 | let dc = coeffs[0]; |
| 182 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 183 | for _ in 0..8 { |
| 184 | for x in 0..8 { |
| 185 | frm.data[off + x] = (dc + (frm.data[off + x] as i16)).min(255).max(0) as u8; |
| 186 | } |
| 187 | off += frm.stride[plane]; |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | pub fn vp31_loop_filter(data: &mut [u8], mut off: usize, step: usize, stride: usize, |
| 192 | len: usize, loop_str: i16) { |
| 193 | for _ in 0..len { |
| 194 | let a = data[off - step * 2] as i16; |
| 195 | let b = data[off - step] as i16; |
| 196 | let c = data[off] as i16; |
| 197 | let d = data[off + step] as i16; |
| 198 | let mut diff = ((a - d) + 3 * (c - b) + 4) >> 3; |
| 199 | if diff.abs() >= 2 * loop_str { |
| 200 | diff = 0; |
| 201 | } else if diff.abs() >= loop_str { |
| 202 | if diff < 0 { |
| 203 | diff = -diff - 2 * loop_str; |
| 204 | } else { |
| 205 | diff = -diff + 2 * loop_str; |
| 206 | } |
| 207 | } |
| 208 | if diff != 0 { |
| 209 | data[off - step] = (b + diff).max(0).min(255) as u8; |
| 210 | data[off] = (c - diff).max(0).min(255) as u8; |
| 211 | } |
| 212 | |
| 213 | off += stride; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | pub fn vp_copy_block(dst: &mut NASimpleVideoFrame<u8>, src: NAVideoBufferRef<u8>, comp: usize, |
| 218 | dx: usize, dy: usize, mv_x: i16, mv_y: i16, |
| 219 | preborder: usize, postborder: usize, loop_str: i16, |
| 220 | mode: usize, interp: &[BlkInterpFunc], mut mc_buf: NAVideoBufferRef<u8>) |
| 221 | { |
| 222 | let sx = (dx as isize) + (mv_x as isize); |
| 223 | let sy = (dy as isize) + (mv_y as isize); |
| 224 | if ((sx | sy) & 7) == 0 { |
| 225 | copy_block(dst, src, comp, dx, dy, mv_x, mv_y, 8, 8, preborder, postborder, mode, interp); |
| 226 | return; |
| 227 | } |
| 228 | let pre = preborder.max(2); |
| 229 | let post = postborder.max(1); |
| 230 | let bsize = 8 + pre + post; |
| 231 | let src_x = sx - (pre as isize); |
| 232 | let src_y = sy - (pre as isize); |
| 233 | { |
| 234 | let mut tmp_buf = NASimpleVideoFrame::from_video_buf(&mut mc_buf).unwrap(); |
| 235 | copy_block(&mut tmp_buf, src, comp, 0, 0, src_x as i16, src_y as i16, |
| 236 | bsize, bsize, 0, 0, 0, interp); |
| 237 | if (sy & 7) != 0 { |
| 238 | let foff = (8 - (sy & 7)) as usize; |
| 239 | let off = (pre + foff) * tmp_buf.stride[comp]; |
| 240 | vp31_loop_filter(tmp_buf.data, off, tmp_buf.stride[comp], 1, bsize, loop_str); |
| 241 | } |
| 242 | if (sx & 7) != 0 { |
| 243 | let foff = (8 - (sx & 7)) as usize; |
| 244 | let off = pre + foff; |
| 245 | vp31_loop_filter(tmp_buf.data, off, 1, tmp_buf.stride[comp], bsize, loop_str); |
| 246 | } |
| 247 | } |
| 248 | let dxoff = (pre as i16) - (dx as i16); |
| 249 | let dyoff = (pre as i16) - (dy as i16); |
| 250 | copy_block(dst, mc_buf, comp, dx, dy, dxoff, dyoff, 8, 8, preborder, postborder, 0/* mode*/, interp); |
| 251 | } |