]>
Commit | Line | Data |
---|---|---|
1 | use nihav_core::frame::NAVideoBufferRef; | |
2 | use nihav_codec_support::codecs::blockdsp::edge_emu; | |
3 | ||
4 | fn clip_u8(val: i16) -> u8 { | |
5 | val.max(0).min(255) as u8 | |
6 | } | |
7 | ||
8 | fn delta(p1: i16, p0: i16, q0: i16, q1: i16) -> i16 { | |
9 | ((p1 - q1).max(-128).min(127) + 3 * (q0 - p0)).max(-128).min(127) | |
10 | } | |
11 | ||
12 | pub type LoopFilterFunc = fn(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16); | |
13 | ||
14 | pub fn simple_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, _thr_inner: i16, _thr_hev: i16) { | |
15 | for _ in 0..len { | |
16 | let p1 = i16::from(buf[off - step * 2]); | |
17 | let p0 = i16::from(buf[off - step * 1]); | |
18 | let q0 = i16::from(buf[off + step * 0]); | |
19 | let q1 = i16::from(buf[off + step * 1]); | |
20 | let diff = (p0 - q0).abs() * 2 + ((p1 - q1).abs() >> 1); | |
21 | if diff <= thr { | |
22 | let diff = delta(p1, p0, q0, q1); | |
23 | let diffq0 = (diff + 4).min(127) >> 3; | |
24 | let diffp0 = (diff + 3).min(127) >> 3; | |
25 | buf[off - step * 1] = clip_u8(p0 + diffp0); | |
26 | buf[off + step * 0] = clip_u8(q0 - diffq0); | |
27 | } | |
28 | off += stride; | |
29 | } | |
30 | } | |
31 | ||
32 | fn normal_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16, edge: bool) { | |
33 | for _i in 0..len { | |
34 | let p1 = i16::from(buf[off - step * 2]); | |
35 | let p0 = i16::from(buf[off - step * 1]); | |
36 | let q0 = i16::from(buf[off + step * 0]); | |
37 | let q1 = i16::from(buf[off + step * 1]); | |
38 | let diff = (p0 - q0).abs() * 2 + ((p1 - q1).abs() >> 1); | |
39 | if diff <= thr { | |
40 | let p3 = i16::from(buf[off - step * 4]); | |
41 | let p2 = i16::from(buf[off - step * 3]); | |
42 | let p1 = i16::from(buf[off - step * 2]); | |
43 | let q1 = i16::from(buf[off + step * 1]); | |
44 | let q2 = i16::from(buf[off + step * 2]); | |
45 | let q3 = i16::from(buf[off + step * 3]); | |
46 | let dp2 = p3 - p2; | |
47 | let dp1 = p2 - p1; | |
48 | let dp0 = p1 - p0; | |
49 | let dq0 = q1 - q0; | |
50 | let dq1 = q2 - q1; | |
51 | let dq2 = q3 - q2; | |
52 | if (dp0.abs() <= thr_inner) && (dp1.abs() <= thr_inner) && | |
53 | (dp2.abs() <= thr_inner) && (dq0.abs() <= thr_inner) && | |
54 | (dq1.abs() <= thr_inner) && (dq2.abs() <= thr_inner) { | |
55 | let high_edge_variation = (dp0.abs() > thr_hev) || (dq0.abs() > thr_hev); | |
56 | if high_edge_variation { | |
57 | let diff = delta(p1, p0, q0, q1); | |
58 | let diffq0 = (diff + 4).min(127) >> 3; | |
59 | let diffp0 = (diff + 3).min(127) >> 3; | |
60 | buf[off - step * 1] = clip_u8(p0 + diffp0); | |
61 | buf[off + step * 0] = clip_u8(q0 - diffq0); | |
62 | } else if edge { | |
63 | let d = delta(p1, p0, q0, q1); | |
64 | let diff0 = (d * 27 + 63) >> 7; | |
65 | buf[off - step * 1] = clip_u8(p0 + diff0); | |
66 | buf[off + step * 0] = clip_u8(q0 - diff0); | |
67 | let diff1 = (d * 18 + 63) >> 7; | |
68 | buf[off - step * 2] = clip_u8(p1 + diff1); | |
69 | buf[off + step * 1] = clip_u8(q1 - diff1); | |
70 | let diff2 = (d * 9 + 63) >> 7; | |
71 | buf[off - step * 3] = clip_u8(p2 + diff2); | |
72 | buf[off + step * 2] = clip_u8(q2 - diff2); | |
73 | } else { | |
74 | let diff = (3 * (q0 - p0)).max(-128).min(127); | |
75 | let diffq0 = (diff + 4).min(127) >> 3; | |
76 | let diffp0 = (diff + 3).min(127) >> 3; | |
77 | buf[off - step * 1] = clip_u8(p0 + diffp0); | |
78 | buf[off + step * 0] = clip_u8(q0 - diffq0); | |
79 | let diff2 = (diffq0 + 1) >> 1; | |
80 | buf[off - step * 2] = clip_u8(p1 + diff2); | |
81 | buf[off + step * 1] = clip_u8(q1 - diff2); | |
82 | } | |
83 | } | |
84 | } | |
85 | off += stride; | |
86 | } | |
87 | } | |
88 | ||
89 | pub fn normal_loop_filter_inner(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { | |
90 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, false); | |
91 | } | |
92 | ||
93 | pub fn normal_loop_filter_edge(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { | |
94 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, true); | |
95 | } | |
96 | ||
97 | pub fn iwht4x4(coeffs: &mut [i16; 16]) { | |
98 | for i in 0..4 { | |
99 | let s0 = coeffs[i]; | |
100 | let s1 = coeffs[i + 4]; | |
101 | let s2 = coeffs[i + 8]; | |
102 | let s3 = coeffs[i + 12]; | |
103 | let a1 = s0 + s3; | |
104 | let b1 = s1 + s2; | |
105 | let c1 = s1 - s2; | |
106 | let d1 = s0 - s3; | |
107 | coeffs[i] = a1 + b1; | |
108 | coeffs[i + 4] = c1 + d1; | |
109 | coeffs[i + 8] = a1 - b1; | |
110 | coeffs[i + 12] = d1 - c1; | |
111 | } | |
112 | for row in coeffs.chunks_mut(4) { | |
113 | let a1 = row[0] + row[3]; | |
114 | let b1 = row[1] + row[2]; | |
115 | let c1 = row[1] - row[2]; | |
116 | let d1 = row[0] - row[3]; | |
117 | row[0] = (a1 + b1 + 3) >> 3; | |
118 | row[1] = (c1 + d1 + 3) >> 3; | |
119 | row[2] = (a1 - b1 + 3) >> 3; | |
120 | row[3] = (d1 - c1 + 3) >> 3; | |
121 | } | |
122 | } | |
123 | ||
124 | pub fn iwht4x4_dc(coeffs: &mut [i16; 16]) { | |
125 | let dc = (coeffs[0] + 3) >> 3; | |
126 | *coeffs = [dc; 16]; | |
127 | } | |
128 | ||
129 | const COS_PI8_SQRT2_MINUS1: i32 = 20091; | |
130 | const SIN_PI8_SQRT2: i32 = 35468; | |
131 | ||
132 | macro_rules! idct4 { | |
133 | ($s0: expr, $s1: expr, $s2: expr, $s3: expr, $shift: expr) => {{ | |
134 | let a1 = i32::from($s0) + i32::from($s2); | |
135 | let b1 = i32::from($s0) - i32::from($s2); | |
136 | let temp1 = (i32::from($s1) * SIN_PI8_SQRT2) >> 16; | |
137 | let temp2 = i32::from($s3) + ((i32::from($s3) * COS_PI8_SQRT2_MINUS1) >> 16); | |
138 | let c1 = temp1 - temp2; | |
139 | let temp1 = i32::from($s1) + ((i32::from($s1) * COS_PI8_SQRT2_MINUS1) >> 16); | |
140 | let temp2 = (i32::from($s3) * SIN_PI8_SQRT2) >> 16; | |
141 | let d1 = temp1 + temp2; | |
142 | ||
143 | let bias = (1 << $shift) >> 1; | |
144 | $s0 = ((a1 + d1 + bias) >> $shift) as i16; | |
145 | $s3 = ((a1 - d1 + bias) >> $shift) as i16; | |
146 | $s1 = ((b1 + c1 + bias) >> $shift) as i16; | |
147 | $s2 = ((b1 - c1 + bias) >> $shift) as i16; | |
148 | }} | |
149 | } | |
150 | ||
151 | pub fn idct4x4(coeffs: &mut [i16; 16]) { | |
152 | for i in 0..4 { | |
153 | idct4!(coeffs[i], coeffs[i + 4], coeffs[i + 8], coeffs[i + 12], 0); | |
154 | } | |
155 | for row in coeffs.chunks_mut(4) { | |
156 | idct4!(row[0], row[1], row[2], row[3], 3); | |
157 | } | |
158 | } | |
159 | ||
160 | pub fn idct4x4_dc(coeffs: &mut [i16; 16]) { | |
161 | let dc = (coeffs[0] + 4) >> 3; | |
162 | *coeffs = [dc; 16]; | |
163 | } | |
164 | macro_rules! interpolate { | |
165 | ($src: expr, $off: expr, $step: expr, $mode: expr) => {{ | |
166 | let s0 = i32::from($src[$off + 0 * $step]); | |
167 | let s1 = i32::from($src[$off + 1 * $step]); | |
168 | let a = (8 - $mode) as i32; | |
169 | let b = $mode as i32; | |
170 | ((a * s0 + b * s1 + 4) >> 3).max(0).min(255) as u8 | |
171 | }} | |
172 | } | |
173 | ||
174 | const TMP_STRIDE: usize = 16; | |
175 | ||
176 | fn mc_block_common(dst: &mut [u8], mut doff: usize, dstride: usize, src: &[u8], sstride: usize, size: usize, mx: usize, my: usize) { | |
177 | if (mx == 0) && (my == 0) { | |
178 | let dst = &mut dst[doff..]; | |
179 | for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) { | |
180 | (&mut out[0..size]).copy_from_slice(&src[0..size]); | |
181 | } | |
182 | } else if my == 0 { | |
183 | for src in src.chunks(sstride).take(size) { | |
184 | for x in 0..size { | |
185 | dst[doff + x] = interpolate!(src, x, 1, mx); | |
186 | } | |
187 | doff += dstride; | |
188 | } | |
189 | } else if mx == 0 { | |
190 | for y in 0..size { | |
191 | for x in 0..size { | |
192 | dst[doff + x] = interpolate!(src, x + y * sstride, sstride, my); | |
193 | } | |
194 | doff += dstride; | |
195 | } | |
196 | } else { | |
197 | let mut tmp = [0u8; TMP_STRIDE * (16 + 1)]; | |
198 | for (y, dst) in tmp.chunks_mut(TMP_STRIDE).take(size + 1).enumerate() { | |
199 | for x in 0..size { | |
200 | dst[x] = interpolate!(src, x + y * sstride, 1, mx); | |
201 | } | |
202 | } | |
203 | for y in 0..size { | |
204 | for x in 0..size { | |
205 | dst[doff + x] = interpolate!(tmp, x + y * TMP_STRIDE, TMP_STRIDE, my); | |
206 | } | |
207 | doff += dstride; | |
208 | } | |
209 | } | |
210 | } | |
211 | fn mc_block(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
212 | mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize, | |
213 | mc_buf: &mut [u8], size: usize) { | |
214 | if (mvx == 0) && (mvy == 0) { | |
215 | let dst = &mut dst[doff..]; | |
216 | let sstride = reffrm.get_stride(plane); | |
217 | let srcoff = reffrm.get_offset(plane) + xpos + ypos * sstride; | |
218 | let src = &reffrm.get_data(); | |
219 | let src = &src[srcoff..]; | |
220 | for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) { | |
221 | (&mut out[0..size]).copy_from_slice(&src[0..size]); | |
222 | } | |
223 | return; | |
224 | } | |
225 | let (w, h) = reffrm.get_dimensions(plane); | |
226 | let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize; | |
227 | let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize; | |
228 | let bsize = (size as isize) + 1; | |
229 | let ref_x = (xpos as isize) + ((mvx >> 3) as isize); | |
230 | let ref_y = (ypos as isize) + ((mvy >> 3) as isize); | |
231 | ||
232 | let (src, sstride) = if (ref_x < 0) || (ref_x + bsize > wa) || (ref_y < 0) || (ref_y + bsize > ha) { | |
233 | edge_emu(&reffrm, ref_x, ref_y, bsize as usize, bsize as usize, mc_buf, 32, plane, 4); | |
234 | (mc_buf as &[u8], 32) | |
235 | } else { | |
236 | let off = reffrm.get_offset(plane); | |
237 | let stride = reffrm.get_stride(plane); | |
238 | let data = reffrm.get_data(); | |
239 | (&data[off + (ref_x as usize) + (ref_y as usize) * stride..], stride) | |
240 | }; | |
241 | let mx = (mvx & 7) as usize; | |
242 | let my = (mvy & 7) as usize; | |
243 | mc_block_common(dst, doff, dstride, src, sstride, size, mx, my); | |
244 | } | |
245 | pub fn mc_block16x16_bilin(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
246 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { | |
247 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 16); | |
248 | } | |
249 | pub fn mc_block8x8_bilin(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
250 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { | |
251 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 8); | |
252 | } | |
253 | pub fn mc_block4x4_bilin(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
254 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { | |
255 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 4); | |
256 | } |