| 1 | use nihav_core::codecs::*; |
| 2 | |
| 3 | #[derive(Clone,Copy,Debug,PartialEq)] |
| 4 | #[allow(dead_code)] |
| 5 | pub enum VPMBType { |
| 6 | Intra, |
| 7 | InterNoMV, |
| 8 | InterMV, |
| 9 | InterNearest, |
| 10 | InterNear, |
| 11 | InterFourMV, |
| 12 | GoldenNoMV, |
| 13 | GoldenMV, |
| 14 | GoldenNearest, |
| 15 | GoldenNear, |
| 16 | } |
| 17 | |
| 18 | #[allow(dead_code)] |
| 19 | impl VPMBType { |
| 20 | pub fn is_intra(self) -> bool { self == VPMBType::Intra } |
| 21 | pub fn get_ref_id(self) -> u8 { |
| 22 | match self { |
| 23 | VPMBType::Intra => 0, |
| 24 | VPMBType::InterNoMV | |
| 25 | VPMBType::InterMV | |
| 26 | VPMBType::InterNearest | |
| 27 | VPMBType::InterNear | |
| 28 | VPMBType::InterFourMV => 1, |
| 29 | _ => 2, |
| 30 | } |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | impl Default for VPMBType { |
| 35 | fn default() -> Self { VPMBType::Intra } |
| 36 | } |
| 37 | |
| 38 | #[derive(Default)] |
| 39 | pub struct VPShuffler { |
| 40 | lastframe: Option<NAVideoBufferRef<u8>>, |
| 41 | goldframe: Option<NAVideoBufferRef<u8>>, |
| 42 | } |
| 43 | |
| 44 | impl VPShuffler { |
| 45 | pub fn new() -> Self { VPShuffler { lastframe: None, goldframe: None } } |
| 46 | pub fn clear(&mut self) { self.lastframe = None; self.goldframe = None; } |
| 47 | pub fn add_frame(&mut self, buf: NAVideoBufferRef<u8>) { |
| 48 | self.lastframe = Some(buf); |
| 49 | } |
| 50 | pub fn add_golden_frame(&mut self, buf: NAVideoBufferRef<u8>) { |
| 51 | self.goldframe = Some(buf); |
| 52 | } |
| 53 | pub fn get_last(&mut self) -> Option<NAVideoBufferRef<u8>> { |
| 54 | if let Some(ref frm) = self.lastframe { |
| 55 | Some(frm.clone()) |
| 56 | } else { |
| 57 | None |
| 58 | } |
| 59 | } |
| 60 | pub fn get_golden(&mut self) -> Option<NAVideoBufferRef<u8>> { |
| 61 | if let Some(ref frm) = self.goldframe { |
| 62 | Some(frm.clone()) |
| 63 | } else { |
| 64 | None |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | const C1S7: i32 = 64277; |
| 70 | const C2S6: i32 = 60547; |
| 71 | const C3S5: i32 = 54491; |
| 72 | const C4S4: i32 = 46341; |
| 73 | const C5S3: i32 = 36410; |
| 74 | const C6S2: i32 = 25080; |
| 75 | const C7S1: i32 = 12785; |
| 76 | |
| 77 | fn mul16(a: i32, b: i32) -> i32 { |
| 78 | (a * b) >> 16 |
| 79 | } |
| 80 | |
| 81 | macro_rules! idct_step { |
| 82 | ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, |
| 83 | $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr, |
| 84 | $bias:expr, $shift:expr, $otype:ty) => { |
| 85 | let t_a = mul16(C1S7, i32::from($s1)) + mul16(C7S1, i32::from($s7)); |
| 86 | let t_b = mul16(C7S1, i32::from($s1)) - mul16(C1S7, i32::from($s7)); |
| 87 | let t_c = mul16(C3S5, i32::from($s3)) + mul16(C5S3, i32::from($s5)); |
| 88 | let t_d = mul16(C3S5, i32::from($s5)) - mul16(C5S3, i32::from($s3)); |
| 89 | let t_a1 = mul16(C4S4, t_a - t_c); |
| 90 | let t_b1 = mul16(C4S4, t_b - t_d); |
| 91 | let t_c = t_a + t_c; |
| 92 | let t_d = t_b + t_d; |
| 93 | let t_e = mul16(C4S4, i32::from($s0 + $s4)) + $bias; |
| 94 | let t_f = mul16(C4S4, i32::from($s0 - $s4)) + $bias; |
| 95 | let t_g = mul16(C2S6, i32::from($s2)) + mul16(C6S2, i32::from($s6)); |
| 96 | let t_h = mul16(C6S2, i32::from($s2)) - mul16(C2S6, i32::from($s6)); |
| 97 | let t_e1 = t_e - t_g; |
| 98 | let t_g = t_e + t_g; |
| 99 | let t_a = t_f + t_a1; |
| 100 | let t_f = t_f - t_a1; |
| 101 | let t_b = t_b1 - t_h; |
| 102 | let t_h = t_b1 + t_h; |
| 103 | |
| 104 | $d0 = ((t_g + t_c) >> $shift) as $otype; |
| 105 | $d7 = ((t_g - t_c) >> $shift) as $otype; |
| 106 | $d1 = ((t_a + t_h) >> $shift) as $otype; |
| 107 | $d2 = ((t_a - t_h) >> $shift) as $otype; |
| 108 | $d3 = ((t_e1 + t_d) >> $shift) as $otype; |
| 109 | $d4 = ((t_e1 - t_d) >> $shift) as $otype; |
| 110 | $d5 = ((t_f + t_b) >> $shift) as $otype; |
| 111 | $d6 = ((t_f - t_b) >> $shift) as $otype; |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | pub fn vp_idct(coeffs: &mut [i16; 64]) { |
| 116 | let mut tmp = [0i32; 64]; |
| 117 | for (src, dst) in coeffs.chunks(8).zip(tmp.chunks_mut(8)) { |
| 118 | idct_step!(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7], |
| 119 | dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], 0, 0, i32); |
| 120 | } |
| 121 | let src = &tmp; |
| 122 | let dst = coeffs; |
| 123 | for i in 0..8 { |
| 124 | idct_step!(src[0 * 8 + i], src[1 * 8 + i], src[2 * 8 + i], src[3 * 8 + i], |
| 125 | src[4 * 8 + i], src[5 * 8 + i], src[6 * 8 + i], src[7 * 8 + i], |
| 126 | dst[0 * 8 + i], dst[1 * 8 + i], dst[2 * 8 + i], dst[3 * 8 + i], |
| 127 | dst[4 * 8 + i], dst[5 * 8 + i], dst[6 * 8 + i], dst[7 * 8 + i], 8, 4, i16); |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | pub fn vp_idct_dc(coeffs: &mut [i16; 64]) { |
| 132 | let dc = ((mul16(C4S4, mul16(C4S4, i32::from(coeffs[0]))) + 8) >> 4) as i16; |
| 133 | for i in 0..64 { |
| 134 | coeffs[i] = dc; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | pub fn unquant(coeffs: &mut [i16; 64], qmat: &[i16; 64]) { |
| 139 | for i in 1..64 { |
| 140 | coeffs[i] = coeffs[i].wrapping_mul(qmat[i]); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | pub fn vp_put_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 145 | vp_idct(coeffs); |
| 146 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 147 | for y in 0..8 { |
| 148 | for x in 0..8 { |
| 149 | frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8; |
| 150 | } |
| 151 | off += frm.stride[plane]; |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | pub fn vp_put_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 156 | vp_idct_dc(coeffs); |
| 157 | let dc = (coeffs[0] + 128).min(255).max(0) as u8; |
| 158 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 159 | for _ in 0..8 { |
| 160 | for x in 0..8 { |
| 161 | frm.data[off + x] = dc; |
| 162 | } |
| 163 | off += frm.stride[plane]; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | pub fn vp_add_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 168 | vp_idct(coeffs); |
| 169 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 170 | for y in 0..8 { |
| 171 | for x in 0..8 { |
| 172 | frm.data[off + x] = (coeffs[x + y * 8] + (frm.data[off + x] as i16)).min(255).max(0) as u8; |
| 173 | } |
| 174 | off += frm.stride[plane]; |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | pub fn vp_add_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) { |
| 179 | vp_idct_dc(coeffs); |
| 180 | let dc = coeffs[0]; |
| 181 | let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane]; |
| 182 | for _ in 0..8 { |
| 183 | for x in 0..8 { |
| 184 | frm.data[off + x] = (dc + (frm.data[off + x] as i16)).min(255).max(0) as u8; |
| 185 | } |
| 186 | off += frm.stride[plane]; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | pub fn vp31_loop_filter(data: &mut [u8], mut off: usize, step: usize, stride: usize, |
| 191 | len: usize, loop_str: i16) { |
| 192 | for _ in 0..len { |
| 193 | let a = data[off - step * 2] as i16; |
| 194 | let b = data[off - step] as i16; |
| 195 | let c = data[off] as i16; |
| 196 | let d = data[off + step] as i16; |
| 197 | let mut diff = ((a - d) + 3 * (c - b) + 4) >> 3; |
| 198 | if diff.abs() >= 2 * loop_str { |
| 199 | diff = 0; |
| 200 | } else if diff.abs() >= loop_str { |
| 201 | if diff < 0 { |
| 202 | diff = -diff - 2 * loop_str; |
| 203 | } else { |
| 204 | diff = -diff + 2 * loop_str; |
| 205 | } |
| 206 | } |
| 207 | if diff != 0 { |
| 208 | data[off - step] = (b + diff).max(0).min(255) as u8; |
| 209 | data[off] = (c - diff).max(0).min(255) as u8; |
| 210 | } |
| 211 | |
| 212 | off += stride; |
| 213 | } |
| 214 | } |
| 215 | |