| 1 | use nihav_core::frame::*; |
| 2 | |
| 3 | fn clip_u8(val: i16) -> u8 { |
| 4 | val.max(0).min(255) as u8 |
| 5 | } |
| 6 | |
| 7 | const DCT_COEFFS: [i32; 16] = [ |
| 8 | 23170, 23170, 23170, 23170, |
| 9 | 30274, 12540, -12540, -30274, |
| 10 | 23170, -23170, -23170, 23170, |
| 11 | 12540, -30274, 30274, -12540 |
| 12 | ]; |
| 13 | |
| 14 | pub fn idct4x4(coeffs: &mut [i16; 16]) { |
| 15 | let mut tmp = [0i16; 16]; |
| 16 | for (src, dst) in coeffs.chunks(4).zip(tmp.chunks_mut(4)) { |
| 17 | let s0 = i32::from(src[0]); |
| 18 | let s1 = i32::from(src[1]); |
| 19 | let s2 = i32::from(src[2]); |
| 20 | let s3 = i32::from(src[3]); |
| 21 | |
| 22 | let t0 = (s0 + s2).wrapping_mul(23170); |
| 23 | let t1 = (s0 - s2).wrapping_mul(23170); |
| 24 | let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540); |
| 25 | let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274); |
| 26 | |
| 27 | dst[0] = ((t0 + t2) >> 14) as i16; |
| 28 | dst[1] = ((t1 + t3) >> 14) as i16; |
| 29 | dst[2] = ((t1 - t3) >> 14) as i16; |
| 30 | dst[3] = ((t0 - t2) >> 14) as i16; |
| 31 | } |
| 32 | for i in 0..4 { |
| 33 | let s0 = i32::from(tmp[i + 4 * 0]); |
| 34 | let s1 = i32::from(tmp[i + 4 * 1]); |
| 35 | let s2 = i32::from(tmp[i + 4 * 2]); |
| 36 | let s3 = i32::from(tmp[i + 4 * 3]); |
| 37 | |
| 38 | let t0 = (s0 + s2).wrapping_mul(23170) + 0x20000; |
| 39 | let t1 = (s0 - s2).wrapping_mul(23170) + 0x20000; |
| 40 | let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540); |
| 41 | let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274); |
| 42 | |
| 43 | coeffs[i + 0 * 4] = ((t0 + t2) >> 18) as i16; |
| 44 | coeffs[i + 1 * 4] = ((t1 + t3) >> 18) as i16; |
| 45 | coeffs[i + 2 * 4] = ((t1 - t3) >> 18) as i16; |
| 46 | coeffs[i + 3 * 4] = ((t0 - t2) >> 18) as i16; |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | pub fn idct4x4_dc(coeffs: &mut [i16; 16]) { |
| 51 | let dc = ((((i32::from(coeffs[0]) * DCT_COEFFS[0]) >> 14) * DCT_COEFFS[0] + 0x20000) >> 18) as i16; |
| 52 | for el in coeffs.iter_mut() { |
| 53 | *el = dc; |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | fn delta(p1: i16, p0: i16, q0: i16, q1: i16) -> i16 { |
| 58 | (p1 - q1) + 3 * (q0 - p0) |
| 59 | } |
| 60 | |
| 61 | pub type LoopFilterFunc = fn(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16); |
| 62 | |
| 63 | pub fn simple_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, _thr_inner: i16, _thr_hev: i16) { |
| 64 | for _ in 0..len { |
| 65 | let p1 = i16::from(buf[off - step * 2]); |
| 66 | let p0 = i16::from(buf[off - step * 1]); |
| 67 | let q0 = i16::from(buf[off + step * 0]); |
| 68 | let q1 = i16::from(buf[off + step * 1]); |
| 69 | let dpq = p0 - q0; |
| 70 | if dpq.abs() < thr { |
| 71 | let diff = delta(p1, p0, q0, q1); |
| 72 | let diffq0 = (diff.min(127) + 4) >> 3; |
| 73 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; |
| 74 | buf[off - step * 1] = clip_u8(p0 + diffp0); |
| 75 | buf[off + step * 0] = clip_u8(q0 - diffq0); |
| 76 | } |
| 77 | off += stride; |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | fn normal_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16, edge: bool) { |
| 82 | for _ in 0..len { |
| 83 | let p0 = i16::from(buf[off - step * 1]); |
| 84 | let q0 = i16::from(buf[off + step * 0]); |
| 85 | let dpq = p0 - q0; |
| 86 | if dpq.abs() <= thr { |
| 87 | let p3 = i16::from(buf[off - step * 4]); |
| 88 | let p2 = i16::from(buf[off - step * 3]); |
| 89 | let p1 = i16::from(buf[off - step * 2]); |
| 90 | let q1 = i16::from(buf[off + step * 1]); |
| 91 | let q2 = i16::from(buf[off + step * 2]); |
| 92 | let q3 = i16::from(buf[off + step * 3]); |
| 93 | let dp2 = p3 - p2; |
| 94 | let dp1 = p2 - p1; |
| 95 | let dp0 = p1 - p0; |
| 96 | let dq0 = q1 - q0; |
| 97 | let dq1 = q2 - q1; |
| 98 | let dq2 = q3 - q2; |
| 99 | if (dp0.abs() <= thr_inner) && (dp1.abs() <= thr_inner) && |
| 100 | (dp2.abs() <= thr_inner) && (dq0.abs() <= thr_inner) && |
| 101 | (dq1.abs() <= thr_inner) && (dq2.abs() <= thr_inner) { |
| 102 | let high_edge_variation = (dp0.abs() > thr_hev) || (dq0.abs() > thr_hev); |
| 103 | if high_edge_variation { |
| 104 | let diff = delta(p1, p0, q0, q1); |
| 105 | let diffq0 = (diff.min(127) + 4) >> 3; |
| 106 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; |
| 107 | buf[off - step * 1] = clip_u8(p0 + diffp0); |
| 108 | buf[off + step * 0] = clip_u8(q0 - diffq0); |
| 109 | } else if edge { |
| 110 | let d = delta(p1, p0, q0, q1); |
| 111 | let diff0 = (d * 27 + 63) >> 7; |
| 112 | buf[off - step * 1] = clip_u8(p0 + diff0); |
| 113 | buf[off + step * 0] = clip_u8(q0 - diff0); |
| 114 | let diff1 = (d * 18 + 63) >> 7; |
| 115 | buf[off - step * 2] = clip_u8(p1 + diff1); |
| 116 | buf[off + step * 1] = clip_u8(q1 - diff1); |
| 117 | let diff2 = (d * 9 + 63) >> 7; |
| 118 | buf[off - step * 3] = clip_u8(p2 + diff2); |
| 119 | buf[off + step * 2] = clip_u8(q2 - diff2); |
| 120 | } else { |
| 121 | let diff = 3 * (q0 - p0); |
| 122 | let diffq0 = (diff.min(127) + 4) >> 3; |
| 123 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; |
| 124 | buf[off - step * 1] = clip_u8(p0 + diffp0); |
| 125 | buf[off + step * 0] = clip_u8(q0 - diffq0); |
| 126 | let diff2 = (diffq0 + 1) >> 1; |
| 127 | buf[off - step * 2] = clip_u8(p1 + diff2); |
| 128 | buf[off + step * 1] = clip_u8(q1 - diff2); |
| 129 | } |
| 130 | } |
| 131 | } |
| 132 | off += stride; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | pub fn normal_loop_filter_inner(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { |
| 137 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, false); |
| 138 | } |
| 139 | |
| 140 | pub fn normal_loop_filter_edge(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { |
| 141 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, true); |
| 142 | } |
| 143 | |
| 144 | pub fn fade_frame(srcfrm: NAVideoBufferRef<u8>, dstfrm: &mut NASimpleVideoFrame<u8>, alpha: u16, beta: u16) { |
| 145 | let mut fade_lut = [0u8; 256]; |
| 146 | for (i, el) in fade_lut.iter_mut().enumerate() { |
| 147 | let y = i as u16; |
| 148 | *el = (y + ((y * beta) >> 8) + alpha).max(0).min(255) as u8; |
| 149 | } |
| 150 | |
| 151 | let (w, h) = srcfrm.get_dimensions(0); |
| 152 | let (wa, ha) = ((w + 15) & !15, (h + 15) & !15); |
| 153 | let soff = srcfrm.get_offset(0); |
| 154 | let sstride = srcfrm.get_stride(0); |
| 155 | let sdata = srcfrm.get_data(); |
| 156 | let src = &sdata[soff..]; |
| 157 | let dstride = dstfrm.stride[0]; |
| 158 | let dst = &mut dstfrm.data[dstfrm.offset[0]..]; |
| 159 | for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) { |
| 160 | for (s, d) in src.iter().zip(dst.iter_mut()).take(wa) { |
| 161 | *d = fade_lut[*s as usize]; |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | for plane in 1..3 { |
| 166 | let (w, h) = srcfrm.get_dimensions(plane); |
| 167 | let (wa, ha) = ((w + 7) & !7, (h + 7) & !7); |
| 168 | let soff = srcfrm.get_offset(plane); |
| 169 | let sstride = srcfrm.get_stride(plane); |
| 170 | let sdata = srcfrm.get_data(); |
| 171 | let src = &sdata[soff..]; |
| 172 | let dstride = dstfrm.stride[plane]; |
| 173 | let dst = &mut dstfrm.data[dstfrm.offset[plane]..]; |
| 174 | for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) { |
| 175 | (&mut dst[0..wa]).copy_from_slice(&src[0..wa]); |
| 176 | } |
| 177 | } |
| 178 | } |