]>
Commit | Line | Data |
---|---|---|
587a6d78 | 1 | use nihav_core::frame::*; |
b4d5b851 | 2 | use nihav_codec_support::codecs::blockdsp::edge_emu; |
587a6d78 KS |
3 | |
4 | fn clip_u8(val: i16) -> u8 { | |
5 | val.max(0).min(255) as u8 | |
6 | } | |
7 | ||
8 | pub struct IPredContext { | |
9 | pub left: [u8; 16], | |
10 | pub has_left: bool, | |
11 | pub top: [u8; 16], | |
12 | pub has_top: bool, | |
13 | pub tl: u8, | |
14 | } | |
15 | ||
16 | impl IPredContext { | |
17 | pub fn fill(&mut self, src: &[u8], off: usize, stride: usize, tsize: usize, lsize: usize) { | |
18 | if self.has_top { | |
19 | for i in 0..tsize { | |
20 | self.top[i] = src[off - stride + i]; | |
21 | } | |
22 | for i in tsize..16 { | |
23 | self.top[i] = 0x80; | |
24 | } | |
25 | } else { | |
26 | self.top = [0x80; 16]; | |
27 | } | |
28 | if self.has_left { | |
29 | for i in 0..lsize { | |
30 | self.left[i] = src[off - 1 + i * stride]; | |
31 | } | |
32 | for i in lsize..16 { | |
33 | self.left[i] = 0x80; | |
34 | } | |
35 | } else { | |
36 | self.left = [0x80; 16]; | |
37 | } | |
38 | if self.has_top && self.has_left { | |
39 | self.tl = src[off - stride - 1]; | |
40 | } else { | |
41 | self.tl = 0x80; | |
42 | } | |
43 | } | |
44 | } | |
45 | ||
46 | impl Default for IPredContext { | |
47 | fn default() -> Self { | |
48 | Self { | |
49 | left: [0x80; 16], | |
50 | top: [0x80; 16], | |
51 | tl: 0x80, | |
52 | has_left: false, | |
53 | has_top: false, | |
54 | } | |
55 | } | |
56 | } | |
57 | ||
58 | const DCT_COEFFS: [i32; 16] = [ | |
59 | 23170, 23170, 23170, 23170, | |
60 | 30274, 12540, -12540, -30274, | |
d24468d9 | 61 | 23170, -23170, -23170, 23170, |
587a6d78 KS |
62 | 12540, -30274, 30274, -12540 |
63 | ]; | |
64 | ||
65 | pub fn idct4x4(coeffs: &mut [i16; 16]) { | |
66 | let mut tmp = [0i16; 16]; | |
67 | for (src, dst) in coeffs.chunks(4).zip(tmp.chunks_mut(4)) { | |
47933c6d KS |
68 | let s0 = i32::from(src[0]); |
69 | let s1 = i32::from(src[1]); | |
70 | let s2 = i32::from(src[2]); | |
71 | let s3 = i32::from(src[3]); | |
587a6d78 KS |
72 | |
73 | let t0 = (s0 + s2).wrapping_mul(23170); | |
74 | let t1 = (s0 - s2).wrapping_mul(23170); | |
75 | let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540); | |
76 | let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274); | |
77 | ||
78 | dst[0] = ((t0 + t2) >> 14) as i16; | |
79 | dst[1] = ((t1 + t3) >> 14) as i16; | |
80 | dst[2] = ((t1 - t3) >> 14) as i16; | |
81 | dst[3] = ((t0 - t2) >> 14) as i16; | |
82 | } | |
83 | for i in 0..4 { | |
47933c6d KS |
84 | let s0 = i32::from(tmp[i + 4 * 0]); |
85 | let s1 = i32::from(tmp[i + 4 * 1]); | |
86 | let s2 = i32::from(tmp[i + 4 * 2]); | |
87 | let s3 = i32::from(tmp[i + 4 * 3]); | |
587a6d78 KS |
88 | |
89 | let t0 = (s0 + s2).wrapping_mul(23170) + 0x20000; | |
90 | let t1 = (s0 - s2).wrapping_mul(23170) + 0x20000; | |
91 | let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540); | |
92 | let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274); | |
93 | ||
94 | coeffs[i + 0 * 4] = ((t0 + t2) >> 18) as i16; | |
95 | coeffs[i + 1 * 4] = ((t1 + t3) >> 18) as i16; | |
96 | coeffs[i + 2 * 4] = ((t1 - t3) >> 18) as i16; | |
97 | coeffs[i + 3 * 4] = ((t0 - t2) >> 18) as i16; | |
98 | } | |
99 | } | |
100 | ||
101 | pub fn idct4x4_dc(coeffs: &mut [i16; 16]) { | |
47933c6d | 102 | let dc = ((((i32::from(coeffs[0]) * DCT_COEFFS[0]) >> 14) * DCT_COEFFS[0] + 0x20000) >> 18) as i16; |
587a6d78 KS |
103 | for el in coeffs.iter_mut() { |
104 | *el = dc; | |
105 | } | |
106 | } | |
107 | ||
108 | pub fn add_coeffs4x4(dst: &mut [u8], off: usize, stride: usize, coeffs: &[i16; 16]) { | |
109 | let dst = &mut dst[off..]; | |
110 | for (out, src) in dst.chunks_mut(stride).zip(coeffs.chunks(4)) { | |
111 | for (oel, iel) in out.iter_mut().take(4).zip(src.iter()) { | |
47933c6d | 112 | *oel = clip_u8(i16::from(*oel) + *iel); |
587a6d78 KS |
113 | } |
114 | } | |
115 | } | |
116 | pub fn add_coeffs16x1(dst: &mut [u8], off: usize, coeffs: &[i16; 16]) { | |
117 | let dst = &mut dst[off..]; | |
118 | for (oel, iel) in dst.iter_mut().take(16).zip(coeffs.iter()) { | |
47933c6d | 119 | *oel = clip_u8(i16::from(*oel) + *iel); |
587a6d78 KS |
120 | } |
121 | } | |
122 | ||
123 | pub trait IntraPred { | |
124 | const SIZE: usize; | |
125 | fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
126 | let dc; | |
127 | if !ipred.has_left && !ipred.has_top { | |
128 | dc = 0x80; | |
129 | } else { | |
130 | let mut dcsum = 0; | |
131 | let mut dcshift = match Self::SIZE { | |
132 | 16 => 3, | |
133 | _ => 2, | |
134 | }; | |
135 | if ipred.has_left { | |
136 | for el in ipred.left.iter().take(Self::SIZE) { | |
47933c6d | 137 | dcsum += u16::from(*el); |
587a6d78 KS |
138 | } |
139 | dcshift += 1; | |
140 | } | |
141 | if ipred.has_top { | |
142 | for el in ipred.top.iter().take(Self::SIZE) { | |
47933c6d | 143 | dcsum += u16::from(*el); |
587a6d78 KS |
144 | } |
145 | dcshift += 1; | |
146 | } | |
147 | dc = ((dcsum + (1 << (dcshift - 1))) >> dcshift) as u8; | |
148 | } | |
149 | for _ in 0..Self::SIZE { | |
150 | let out = &mut dst[off..][..Self::SIZE]; | |
151 | for el in out.iter_mut() { | |
152 | *el = dc; | |
153 | } | |
154 | off += stride; | |
155 | } | |
156 | } | |
157 | fn ipred_v(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
158 | for _ in 0..Self::SIZE { | |
159 | let out = &mut dst[off..][..Self::SIZE]; | |
160 | out.copy_from_slice(&ipred.top[0..Self::SIZE]); | |
161 | off += stride; | |
162 | } | |
163 | } | |
164 | fn ipred_h(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
165 | for leftel in ipred.left.iter().take(Self::SIZE) { | |
166 | let out = &mut dst[off..][..Self::SIZE]; | |
167 | for el in out.iter_mut() { | |
168 | *el = *leftel; | |
169 | } | |
170 | off += stride; | |
171 | } | |
172 | } | |
173 | fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
47933c6d | 174 | let tl = i16::from(ipred.tl); |
587a6d78 KS |
175 | for m in 0..Self::SIZE { |
176 | for n in 0..Self::SIZE { | |
47933c6d | 177 | dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl); |
587a6d78 KS |
178 | } |
179 | off += stride; | |
180 | } | |
181 | } | |
182 | } | |
183 | ||
184 | pub struct IPred16x16 {} | |
185 | impl IntraPred for IPred16x16 { const SIZE: usize = 16; } | |
186 | ||
187 | pub struct IPred8x8 {} | |
188 | impl IntraPred for IPred8x8 { const SIZE: usize = 8; } | |
189 | ||
190 | macro_rules! load_pred4 { | |
191 | (topleft; $ipred: expr) => {{ | |
47933c6d KS |
192 | let tl = u16::from($ipred.tl); |
193 | let a0 = u16::from($ipred.top[0]); | |
194 | let l0 = u16::from($ipred.left[0]); | |
587a6d78 KS |
195 | ((l0 + tl * 2 + a0 + 2) >> 2) as u8 |
196 | }}; | |
197 | (top; $ipred: expr) => {{ | |
47933c6d KS |
198 | let tl = u16::from($ipred.tl); |
199 | let a0 = u16::from($ipred.top[0]); | |
200 | let a1 = u16::from($ipred.top[1]); | |
201 | let a2 = u16::from($ipred.top[2]); | |
202 | let a3 = u16::from($ipred.top[3]); | |
203 | let a4 = u16::from($ipred.top[4]); | |
587a6d78 KS |
204 | let p0 = ((tl + a0 * 2 + a1 + 2) >> 2) as u8; |
205 | let p1 = ((a0 + a1 * 2 + a2 + 2) >> 2) as u8; | |
206 | let p2 = ((a1 + a2 * 2 + a3 + 2) >> 2) as u8; | |
207 | let p3 = ((a2 + a3 * 2 + a4 + 2) >> 2) as u8; | |
208 | (p0, p1, p2, p3) | |
209 | }}; | |
210 | (top8; $ipred: expr) => {{ | |
47933c6d KS |
211 | let t3 = u16::from($ipred.top[3]); |
212 | let t4 = u16::from($ipred.top[4]); | |
213 | let t5 = u16::from($ipred.top[5]); | |
214 | let t6 = u16::from($ipred.top[6]); | |
215 | let t7 = u16::from($ipred.top[7]); | |
587a6d78 KS |
216 | let p4 = ((t3 + t4 * 2 + t5 + 2) >> 2) as u8; |
217 | let p5 = ((t4 + t5 * 2 + t6 + 2) >> 2) as u8; | |
218 | let p6 = ((t5 + t6 * 2 + t7 + 2) >> 2) as u8; | |
219 | let p7 = ((t6 + t7 * 2 + t7 + 2) >> 2) as u8; | |
220 | (p4, p5, p6, p7) | |
221 | }}; | |
222 | (topavg; $ipred: expr) => {{ | |
47933c6d KS |
223 | let tl = u16::from($ipred.tl); |
224 | let a0 = u16::from($ipred.top[0]); | |
225 | let a1 = u16::from($ipred.top[1]); | |
226 | let a2 = u16::from($ipred.top[2]); | |
227 | let a3 = u16::from($ipred.top[3]); | |
587a6d78 KS |
228 | let p0 = ((tl + a0 + 1) >> 1) as u8; |
229 | let p1 = ((a0 + a1 + 1) >> 1) as u8; | |
230 | let p2 = ((a1 + a2 + 1) >> 1) as u8; | |
231 | let p3 = ((a2 + a3 + 1) >> 1) as u8; | |
232 | (p0, p1, p2, p3) | |
233 | }}; | |
234 | (left; $ipred: expr) => {{ | |
47933c6d KS |
235 | let tl = u16::from($ipred.tl); |
236 | let l0 = u16::from($ipred.left[0]); | |
237 | let l1 = u16::from($ipred.left[1]); | |
238 | let l2 = u16::from($ipred.left[2]); | |
239 | let l3 = u16::from($ipred.left[3]); | |
240 | let l4 = u16::from($ipred.left[4]); | |
587a6d78 KS |
241 | let p0 = ((tl + l0 * 2 + l1 + 2) >> 2) as u8; |
242 | let p1 = ((l0 + l1 * 2 + l2 + 2) >> 2) as u8; | |
243 | let p2 = ((l1 + l2 * 2 + l3 + 2) >> 2) as u8; | |
244 | let p3 = ((l2 + l3 * 2 + l4 + 2) >> 2) as u8; | |
245 | (p0, p1, p2, p3) | |
246 | }}; | |
247 | (left8; $ipred: expr) => {{ | |
47933c6d KS |
248 | let l3 = u16::from($ipred.left[3]); |
249 | let l4 = u16::from($ipred.left[4]); | |
250 | let l5 = u16::from($ipred.left[5]); | |
251 | let l6 = u16::from($ipred.left[6]); | |
252 | let l7 = u16::from($ipred.left[7]); | |
587a6d78 KS |
253 | let p4 = ((l3 + l4 * 2 + l5 + 2) >> 2) as u8; |
254 | let p5 = ((l4 + l5 * 2 + l6 + 2) >> 2) as u8; | |
255 | let p6 = ((l5 + l6 * 2 + l7 + 2) >> 2) as u8; | |
256 | let p7 = ((l6 + l7 * 2 + l7 + 2) >> 2) as u8; | |
257 | (p4, p5, p6, p7) | |
258 | }}; | |
259 | (leftavg; $ipred: expr) => {{ | |
47933c6d KS |
260 | let tl = u16::from($ipred.tl); |
261 | let l0 = u16::from($ipred.left[0]); | |
262 | let l1 = u16::from($ipred.left[1]); | |
263 | let l2 = u16::from($ipred.left[2]); | |
264 | let l3 = u16::from($ipred.left[3]); | |
587a6d78 KS |
265 | let p0 = ((tl + l0 + 1) >> 1) as u8; |
266 | let p1 = ((l0 + l1 + 1) >> 1) as u8; | |
267 | let p2 = ((l1 + l2 + 1) >> 1) as u8; | |
268 | let p3 = ((l2 + l3 + 1) >> 1) as u8; | |
269 | (p0, p1, p2, p3) | |
270 | }}; | |
271 | } | |
272 | ||
273 | pub struct IPred4x4 {} | |
274 | impl IPred4x4 { | |
275 | pub fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
276 | let dc; | |
277 | let mut dcsum = 0; | |
278 | for el in ipred.left.iter().take(4) { | |
47933c6d | 279 | dcsum += u16::from(*el); |
587a6d78 KS |
280 | } |
281 | for el in ipred.top.iter().take(4) { | |
47933c6d | 282 | dcsum += u16::from(*el); |
587a6d78 KS |
283 | } |
284 | dc = ((dcsum + (1 << 2)) >> 3) as u8; | |
285 | for _ in 0..4 { | |
286 | let out = &mut dst[off..][..4]; | |
287 | for el in out.iter_mut() { | |
288 | *el = dc; | |
289 | } | |
290 | off += stride; | |
291 | } | |
292 | } | |
293 | pub fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
47933c6d | 294 | let tl = i16::from(ipred.tl); |
587a6d78 KS |
295 | for m in 0..4 { |
296 | for n in 0..4 { | |
47933c6d | 297 | dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl); |
587a6d78 KS |
298 | } |
299 | off += stride; | |
300 | } | |
301 | } | |
302 | pub fn ipred_ve(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
303 | let (v0, v1, v2, v3) = load_pred4!(top; ipred); | |
304 | let vert_pred = [v0, v1, v2, v3]; | |
305 | for _ in 0..4 { | |
306 | let out = &mut dst[off..][..4]; | |
307 | out.copy_from_slice(&vert_pred); | |
308 | off += stride; | |
309 | } | |
310 | } | |
311 | pub fn ipred_he(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
312 | let (p0, p1, p2, _) = load_pred4!(left; ipred); | |
47933c6d | 313 | let p3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8; |
587a6d78 KS |
314 | let hor_pred = [p0, p1, p2, p3]; |
315 | for m in 0..4 { | |
316 | for n in 0..4 { | |
317 | dst[off + n] = hor_pred[m]; | |
318 | } | |
319 | off += stride; | |
320 | } | |
321 | } | |
322 | pub fn ipred_ld(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
323 | let (_, p0, p1, p2) = load_pred4!(top; ipred); | |
324 | let (p3, p4, p5, p6) = load_pred4!(top8; ipred); | |
325 | ||
326 | dst[off + 0] = p0; dst[off + 1] = p1; dst[off + 2] = p2; dst[off + 3] = p3; | |
327 | off += stride; | |
328 | dst[off + 0] = p1; dst[off + 1] = p2; dst[off + 2] = p3; dst[off + 3] = p4; | |
329 | off += stride; | |
330 | dst[off + 0] = p2; dst[off + 1] = p3; dst[off + 2] = p4; dst[off + 3] = p5; | |
331 | off += stride; | |
332 | dst[off + 0] = p3; dst[off + 1] = p4; dst[off + 2] = p5; dst[off + 3] = p6; | |
333 | } | |
334 | pub fn ipred_rd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
335 | let tl = load_pred4!(topleft; ipred); | |
336 | let (l0, l1, l2, _) = load_pred4!(left; ipred); | |
337 | let (t0, t1, t2, _) = load_pred4!(top; ipred); | |
338 | ||
339 | dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2; | |
340 | off += stride; | |
341 | dst[off + 0] = l0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1; | |
342 | off += stride; | |
343 | dst[off + 0] = l1; dst[off + 1] = l0; dst[off + 2] = tl; dst[off + 3] = t0; | |
344 | off += stride; | |
345 | dst[off + 0] = l2; dst[off + 1] = l1; dst[off + 2] = l0; dst[off + 3] = tl; | |
346 | } | |
347 | pub fn ipred_vr(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
348 | let tl = load_pred4!(topleft; ipred); | |
349 | let (l0, l1, _, _) = load_pred4!(left; ipred); | |
350 | let (t0, t1, t2, _) = load_pred4!(top; ipred); | |
351 | let (m0, m1, m2, m3) = load_pred4!(topavg; ipred); | |
352 | ||
353 | dst[off + 0] = m0; dst[off + 1] = m1; dst[off + 2] = m2; dst[off + 3] = m3; | |
354 | off += stride; | |
355 | dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2; | |
356 | off += stride; | |
357 | dst[off + 0] = l0; dst[off + 1] = m0; dst[off + 2] = m1; dst[off + 3] = m2; | |
358 | off += stride; | |
359 | dst[off + 0] = l1; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1; | |
360 | } | |
361 | pub fn ipred_vl(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
362 | let (_, t1, t2, t3) = load_pred4!(top; ipred); | |
363 | let (t4, t5, t6, _) = load_pred4!(top8; ipred); | |
364 | let (_, m1, m2, m3) = load_pred4!(topavg; ipred); | |
47933c6d | 365 | let m4 = ((u16::from(ipred.top[3]) + u16::from(ipred.top[4]) + 1) >> 1) as u8; |
587a6d78 KS |
366 | |
367 | dst[off + 0] = m1; dst[off + 1] = m2; dst[off + 2] = m3; dst[off + 3] = m4; | |
368 | off += stride; | |
369 | dst[off + 0] = t1; dst[off + 1] = t2; dst[off + 2] = t3; dst[off + 3] = t4; | |
370 | off += stride; | |
371 | dst[off + 0] = m2; dst[off + 1] = m3; dst[off + 2] = m4; dst[off + 3] = t5; | |
372 | off += stride; | |
373 | dst[off + 0] = t2; dst[off + 1] = t3; dst[off + 2] = t4; dst[off + 3] = t6; | |
374 | } | |
375 | pub fn ipred_hd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
376 | let tl = load_pred4!(topleft; ipred); | |
377 | let (l0, l1, l2, _) = load_pred4!(left; ipred); | |
378 | let (m0, m1, m2, m3) = load_pred4!(leftavg; ipred); | |
379 | let (t0, t1, _, _) = load_pred4!(top; ipred); | |
380 | ||
381 | dst[off + 0] = m0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1; | |
382 | off += stride; | |
383 | dst[off + 0] = m1; dst[off + 1] = l0; dst[off + 2] = m0; dst[off + 3] = tl; | |
384 | off += stride; | |
385 | dst[off + 0] = m2; dst[off + 1] = l1; dst[off + 2] = m1; dst[off + 3] = l0; | |
386 | off += stride; | |
387 | dst[off + 0] = m3; dst[off + 1] = l2; dst[off + 2] = m2; dst[off + 3] = l1; | |
388 | } | |
389 | pub fn ipred_hu(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { | |
390 | let (_, m1, m2, m3) = load_pred4!(leftavg; ipred); | |
391 | let (_, l1, l2, _) = load_pred4!(left; ipred); | |
47933c6d | 392 | let l3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8; |
587a6d78 KS |
393 | let p3 = ipred.left[3]; |
394 | ||
395 | dst[off + 0] = m1; dst[off + 1] = l1; dst[off + 2] = m2; dst[off + 3] = l2; | |
396 | off += stride; | |
397 | dst[off + 0] = m2; dst[off + 1] = l2; dst[off + 2] = m3; dst[off + 3] = l3; | |
398 | off += stride; | |
399 | dst[off + 0] = m3; dst[off + 1] = l3; dst[off + 2] = p3; dst[off + 3] = p3; | |
400 | off += stride; | |
401 | dst[off + 0] = p3; dst[off + 1] = p3; dst[off + 2] = p3; dst[off + 3] = p3; | |
402 | } | |
403 | } | |
404 | ||
405 | fn delta(p1: i16, p0: i16, q0: i16, q1: i16) -> i16 { | |
406 | (p1 - q1) + 3 * (q0 - p0) | |
407 | } | |
408 | ||
409 | pub type LoopFilterFunc = fn(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16); | |
410 | ||
411 | pub fn simple_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, _thr_inner: i16, _thr_hev: i16) { | |
412 | for _ in 0..len { | |
47933c6d KS |
413 | let p1 = i16::from(buf[off - step * 2]); |
414 | let p0 = i16::from(buf[off - step * 1]); | |
415 | let q0 = i16::from(buf[off + step * 0]); | |
416 | let q1 = i16::from(buf[off + step * 1]); | |
587a6d78 KS |
417 | let dpq = p0 - q0; |
418 | if dpq.abs() < thr { | |
419 | let diff = delta(p1, p0, q0, q1); | |
420 | let diffq0 = (diff.min(127) + 4) >> 3; | |
421 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; | |
422 | buf[off - step * 1] = clip_u8(p0 + diffp0); | |
423 | buf[off + step * 0] = clip_u8(q0 - diffq0); | |
424 | } | |
425 | off += stride; | |
426 | } | |
427 | } | |
428 | ||
429 | fn normal_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16, edge: bool) { | |
430 | for _ in 0..len { | |
47933c6d KS |
431 | let p0 = i16::from(buf[off - step * 1]); |
432 | let q0 = i16::from(buf[off + step * 0]); | |
587a6d78 KS |
433 | let dpq = p0 - q0; |
434 | if dpq.abs() <= thr { | |
47933c6d KS |
435 | let p3 = i16::from(buf[off - step * 4]); |
436 | let p2 = i16::from(buf[off - step * 3]); | |
437 | let p1 = i16::from(buf[off - step * 2]); | |
438 | let q1 = i16::from(buf[off + step * 1]); | |
439 | let q2 = i16::from(buf[off + step * 2]); | |
440 | let q3 = i16::from(buf[off + step * 3]); | |
587a6d78 KS |
441 | let dp2 = p3 - p2; |
442 | let dp1 = p2 - p1; | |
443 | let dp0 = p1 - p0; | |
444 | let dq0 = q1 - q0; | |
445 | let dq1 = q2 - q1; | |
446 | let dq2 = q3 - q2; | |
447 | if (dp0.abs() <= thr_inner) && (dp1.abs() <= thr_inner) && | |
448 | (dp2.abs() <= thr_inner) && (dq0.abs() <= thr_inner) && | |
449 | (dq1.abs() <= thr_inner) && (dq2.abs() <= thr_inner) { | |
450 | let high_edge_variation = (dp0.abs() > thr_hev) || (dq0.abs() > thr_hev); | |
451 | if high_edge_variation { | |
452 | let diff = delta(p1, p0, q0, q1); | |
453 | let diffq0 = (diff.min(127) + 4) >> 3; | |
454 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; | |
455 | buf[off - step * 1] = clip_u8(p0 + diffp0); | |
456 | buf[off + step * 0] = clip_u8(q0 - diffq0); | |
457 | } else if edge { | |
458 | let d = delta(p1, p0, q0, q1); | |
459 | let diff0 = (d * 27 + 63) >> 7; | |
460 | buf[off - step * 1] = clip_u8(p0 + diff0); | |
461 | buf[off + step * 0] = clip_u8(q0 - diff0); | |
462 | let diff1 = (d * 18 + 63) >> 7; | |
463 | buf[off - step * 2] = clip_u8(p1 + diff1); | |
464 | buf[off + step * 1] = clip_u8(q1 - diff1); | |
465 | let diff2 = (d * 9 + 63) >> 7; | |
466 | buf[off - step * 3] = clip_u8(p2 + diff2); | |
467 | buf[off + step * 2] = clip_u8(q2 - diff2); | |
468 | } else { | |
469 | let diff = 3 * (q0 - p0); | |
470 | let diffq0 = (diff.min(127) + 4) >> 3; | |
471 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; | |
472 | buf[off - step * 1] = clip_u8(p0 + diffp0); | |
473 | buf[off + step * 0] = clip_u8(q0 - diffq0); | |
474 | let diff2 = (diffq0 + 1) >> 1; | |
475 | buf[off - step * 2] = clip_u8(p1 + diff2); | |
476 | buf[off + step * 1] = clip_u8(q1 - diff2); | |
477 | } | |
478 | } | |
479 | } | |
480 | off += stride; | |
481 | } | |
482 | } | |
483 | ||
484 | pub fn normal_loop_filter_inner(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { | |
485 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, false); | |
486 | } | |
487 | ||
488 | pub fn normal_loop_filter_edge(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { | |
489 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, true); | |
490 | } | |
491 | ||
492 | const VP7_BICUBIC_FILTERS: [[i16; 6]; 8] = [ | |
493 | [ 0, 0, 128, 0, 0, 0 ], | |
494 | [ 0, -6, 123, 12, -1, 0 ], | |
495 | [ 2, -11, 108, 36, -8, 1 ], | |
496 | [ 0, -9, 93, 50, -6, 0 ], | |
497 | [ 3, -16, 77, 77, -16, 3 ], | |
498 | [ 0, -6, 50, 93, -9, 0 ], | |
499 | [ 1, -8, 36, 108, -11, 2 ], | |
500 | [ 0, -1, 12, 123, -6, 0 ] | |
501 | ]; | |
502 | ||
503 | macro_rules! interpolate { | |
504 | ($src: expr, $off: expr, $step: expr, $mode: expr) => {{ | |
47933c6d KS |
505 | let s0 = i32::from($src[$off + 0 * $step]); |
506 | let s1 = i32::from($src[$off + 1 * $step]); | |
507 | let s2 = i32::from($src[$off + 2 * $step]); | |
508 | let s3 = i32::from($src[$off + 3 * $step]); | |
509 | let s4 = i32::from($src[$off + 4 * $step]); | |
510 | let s5 = i32::from($src[$off + 5 * $step]); | |
587a6d78 KS |
511 | let filt = &VP7_BICUBIC_FILTERS[$mode]; |
512 | let src = [s0, s1, s2, s3, s4, s5]; | |
513 | let mut val = 64; | |
514 | for (s, c) in src.iter().zip(filt.iter()) { | |
47933c6d | 515 | val += s * i32::from(*c); |
587a6d78 KS |
516 | } |
517 | clip_u8((val >> 7) as i16) | |
518 | }} | |
519 | } | |
520 | ||
521 | const EDGE_PRE: usize = 2; | |
522 | const EDGE_POST: usize = 4; | |
523 | const TMP_STRIDE: usize = 16; | |
524 | ||
525 | fn mc_block_common(dst: &mut [u8], mut doff: usize, dstride: usize, src: &[u8], sstride: usize, size: usize, mx: usize, my: usize) { | |
526 | if (mx == 0) && (my == 0) { | |
527 | let dst = &mut dst[doff..]; | |
528 | let src = &src[EDGE_PRE + EDGE_PRE * sstride..]; | |
529 | for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) { | |
530 | (&mut out[0..size]).copy_from_slice(&src[0..size]); | |
531 | } | |
532 | } else if my == 0 { | |
533 | let src = &src[EDGE_PRE * sstride..]; | |
534 | for src in src.chunks(sstride).take(size) { | |
535 | for x in 0..size { | |
536 | dst[doff + x] = interpolate!(src, x, 1, mx); | |
537 | } | |
538 | doff += dstride; | |
539 | } | |
540 | } else if mx == 0 { | |
541 | let src = &src[EDGE_PRE..]; | |
542 | for y in 0..size { | |
543 | for x in 0..size { | |
544 | dst[doff + x] = interpolate!(src, x + y * sstride, sstride, my); | |
545 | } | |
546 | doff += dstride; | |
547 | } | |
548 | } else { | |
549 | let mut tmp = [0u8; TMP_STRIDE * (16 + EDGE_PRE + EDGE_POST)]; | |
550 | for (y, dst) in tmp.chunks_mut(TMP_STRIDE).take(size + EDGE_PRE + EDGE_POST).enumerate() { | |
551 | for x in 0..size { | |
552 | dst[x] = interpolate!(src, x + y * sstride, 1, mx); | |
553 | } | |
554 | } | |
555 | for y in 0..size { | |
556 | for x in 0..size { | |
557 | dst[doff + x] = interpolate!(tmp, x + y * TMP_STRIDE, TMP_STRIDE, my); | |
558 | } | |
559 | doff += dstride; | |
560 | } | |
561 | } | |
562 | } | |
563 | fn mc_block(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
564 | mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize, | |
565 | mc_buf: &mut [u8], size: usize) { | |
566 | if (mvx == 0) && (mvy == 0) { | |
567 | let dst = &mut dst[doff..]; | |
568 | let sstride = reffrm.get_stride(plane); | |
569 | let srcoff = reffrm.get_offset(plane) + xpos + ypos * sstride; | |
570 | let src = &reffrm.get_data(); | |
571 | let src = &src[srcoff..]; | |
572 | for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) { | |
573 | (&mut out[0..size]).copy_from_slice(&src[0..size]); | |
574 | } | |
575 | return; | |
576 | } | |
577 | let (w, h) = reffrm.get_dimensions(plane); | |
ac818eac KS |
578 | let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize; |
579 | let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize; | |
587a6d78 KS |
580 | let bsize = (size as isize) + (EDGE_PRE as isize) + (EDGE_POST as isize); |
581 | let ref_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize); | |
582 | let ref_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize); | |
583 | ||
584 | let (src, sstride) = if (ref_x < 0) || (ref_x + bsize > wa) || (ref_y < 0) || (ref_y + bsize > ha) { | |
86081fed | 585 | edge_emu(&reffrm, ref_x, ref_y, bsize as usize, bsize as usize, mc_buf, 32, plane, 0); |
587a6d78 KS |
586 | (mc_buf as &[u8], 32) |
587 | } else { | |
588 | let off = reffrm.get_offset(plane); | |
589 | let stride = reffrm.get_stride(plane); | |
590 | let data = reffrm.get_data(); | |
591 | (&data[off + (ref_x as usize) + (ref_y as usize) * stride..], stride) | |
592 | }; | |
593 | let mx = (mvx & 7) as usize; | |
594 | let my = (mvy & 7) as usize; | |
595 | mc_block_common(dst, doff, dstride, src, sstride, size, mx, my); | |
596 | } | |
597 | pub fn mc_block16x16(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
598 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { | |
599 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 16); | |
600 | } | |
601 | pub fn mc_block8x8(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
602 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { | |
603 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 8); | |
604 | } | |
605 | pub fn mc_block4x4(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
606 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { | |
607 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 4); | |
608 | } | |
609 | pub fn mc_block_special(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, | |
610 | mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize, | |
611 | mc_buf: &mut [u8], size: usize, pitch_mode: u8) { | |
612 | const Y_MUL: [isize; 8] = [ 1, 0, 2, 4, 1, 1, 2, 2 ]; | |
613 | const Y_OFF: [isize; 8] = [ 0, 4, 0, 0, 1, -1, 1, -1 ]; | |
614 | const ILACE_CHROMA: [bool; 8] = [ false, false, true, true, false, false, true, true ]; // mode&2 != 0 | |
615 | ||
616 | let pitch_mode = (pitch_mode & 7) as usize; | |
617 | let (xstep, ymul) = if plane == 0 { | |
618 | (Y_OFF[pitch_mode], Y_MUL[pitch_mode]) | |
619 | } else { | |
620 | (0, if ILACE_CHROMA[pitch_mode] { 2 } else { 1 }) | |
621 | }; | |
622 | ||
623 | let (w, h) = reffrm.get_dimensions(plane); | |
ac818eac KS |
624 | let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize; |
625 | let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize; | |
00a2843d KS |
626 | let mut start_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize); |
627 | let mut end_x = (xpos as isize) + ((mvx >> 3) as isize) + ((size + EDGE_POST) as isize); | |
628 | if xstep < 0 { | |
629 | start_x -= (size + EDGE_POST) as isize; | |
630 | } else if xstep > 0 { | |
631 | end_x += (size as isize) * xstep; | |
632 | } | |
633 | let mut start_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize) * ymul; | |
634 | let mut end_y = (ypos as isize) + ((mvy >> 3) as isize) + ((size + EDGE_POST) as isize) * ymul; | |
635 | if ymul == 0 { | |
636 | start_y -= EDGE_PRE as isize; | |
637 | end_y += (EDGE_POST + 1) as isize; | |
638 | } | |
587a6d78 KS |
639 | let off = reffrm.get_offset(plane); |
640 | let stride = reffrm.get_stride(plane); | |
641 | let (src, sstride) = if (start_x >= 0) && (end_x <= wa) && (start_y >= 0) && (end_y <= ha) { | |
642 | let data = reffrm.get_data(); | |
643 | (&data[off + (start_x as usize) + (start_y as usize) * stride..], | |
644 | ((stride as isize) + xstep) as usize) | |
645 | } else { | |
646 | let add = (size + EDGE_PRE + EDGE_POST) * (xstep.abs() as usize); | |
647 | let bw = size + EDGE_PRE + EDGE_POST + add; | |
648 | let bh = (end_y - start_y) as usize; | |
649 | let bo = if xstep >= 0 { 0 } else { add }; | |
86081fed | 650 | edge_emu(&reffrm, start_x + (bo as isize), start_y, bw, bh, mc_buf, 128, plane, 0); |
587a6d78 KS |
651 | (&mc_buf[bo..], (128 + xstep) as usize) |
652 | }; | |
653 | let mx = (mvx & 7) as usize; | |
654 | let my = (mvy & 7) as usize; | |
655 | match ymul { | |
656 | 0 => unimplemented!(), | |
657 | 1 => mc_block_common(dst, doff, dstride, src, sstride, size, mx, my), | |
658 | 2 => { | |
659 | let hsize = size / 2; | |
660 | for y in 0..2 { | |
661 | for x in 0..2 { | |
662 | mc_block_common(dst, doff + x * hsize + y * hsize * dstride, dstride, | |
663 | &src[x * hsize + y * sstride..], sstride * 2, hsize, mx, my); | |
664 | } | |
665 | } | |
666 | }, | |
667 | 4 => { | |
668 | let qsize = size / 4; | |
669 | for y in 0..4 { | |
670 | for x in 0..4 { | |
671 | mc_block_common(dst, doff + x * qsize + y * qsize * dstride, dstride, | |
672 | &src[x * qsize + y * sstride..], sstride * 4, qsize, mx, my); | |
673 | } | |
674 | } | |
675 | }, | |
676 | _ => unreachable!(), | |
677 | }; | |
678 | } | |
679 | ||
680 | pub fn fade_frame(srcfrm: NAVideoBufferRef<u8>, dstfrm: &mut NASimpleVideoFrame<u8>, alpha: u16, beta: u16) { | |
681 | let mut fade_lut = [0u8; 256]; | |
682 | for (i, el) in fade_lut.iter_mut().enumerate() { | |
683 | let y = i as u16; | |
684 | *el = (y + ((y * beta) >> 8) + alpha).max(0).min(255) as u8; | |
685 | } | |
686 | ||
687 | let (w, h) = srcfrm.get_dimensions(0); | |
688 | let (wa, ha) = ((w + 15) & !15, (h + 15) & !15); | |
689 | let soff = srcfrm.get_offset(0); | |
690 | let sstride = srcfrm.get_stride(0); | |
691 | let sdata = srcfrm.get_data(); | |
692 | let src = &sdata[soff..]; | |
693 | let dstride = dstfrm.stride[0]; | |
694 | let dst = &mut dstfrm.data[dstfrm.offset[0]..]; | |
695 | for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) { | |
696 | for (s, d) in src.iter().zip(dst.iter_mut()).take(wa) { | |
697 | *d = fade_lut[*s as usize]; | |
698 | } | |
699 | } | |
700 | ||
701 | for plane in 1..3 { | |
702 | let (w, h) = srcfrm.get_dimensions(plane); | |
703 | let (wa, ha) = ((w + 7) & !7, (h + 7) & !7); | |
704 | let soff = srcfrm.get_offset(plane); | |
705 | let sstride = srcfrm.get_stride(plane); | |
706 | let sdata = srcfrm.get_data(); | |
707 | let src = &sdata[soff..]; | |
708 | let dstride = dstfrm.stride[plane]; | |
709 | let dst = &mut dstfrm.data[dstfrm.offset[plane]..]; | |
710 | for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) { | |
711 | (&mut dst[0..wa]).copy_from_slice(&src[0..wa]); | |
712 | } | |
713 | } | |
714 | } |