fix some warnings (unneeded parentheses, missing dyn keyword)
[nihav.git] / nihav-duck / src / codecs / vp7dsp.rs
CommitLineData
587a6d78 1use nihav_core::frame::*;
b4d5b851 2use nihav_codec_support::codecs::blockdsp::edge_emu;
587a6d78
KS
3
4fn clip_u8(val: i16) -> u8 {
5 val.max(0).min(255) as u8
6}
7
8pub struct IPredContext {
9 pub left: [u8; 16],
10 pub has_left: bool,
11 pub top: [u8; 16],
12 pub has_top: bool,
13 pub tl: u8,
14}
15
16impl IPredContext {
17 pub fn fill(&mut self, src: &[u8], off: usize, stride: usize, tsize: usize, lsize: usize) {
18 if self.has_top {
19 for i in 0..tsize {
20 self.top[i] = src[off - stride + i];
21 }
22 for i in tsize..16 {
23 self.top[i] = 0x80;
24 }
25 } else {
26 self.top = [0x80; 16];
27 }
28 if self.has_left {
29 for i in 0..lsize {
30 self.left[i] = src[off - 1 + i * stride];
31 }
32 for i in lsize..16 {
33 self.left[i] = 0x80;
34 }
35 } else {
36 self.left = [0x80; 16];
37 }
38 if self.has_top && self.has_left {
39 self.tl = src[off - stride - 1];
40 } else {
41 self.tl = 0x80;
42 }
43 }
44}
45
46impl Default for IPredContext {
47 fn default() -> Self {
48 Self {
49 left: [0x80; 16],
50 top: [0x80; 16],
51 tl: 0x80,
52 has_left: false,
53 has_top: false,
54 }
55 }
56}
57
58const DCT_COEFFS: [i32; 16] = [
59 23170, 23170, 23170, 23170,
60 30274, 12540, -12540, -30274,
d24468d9 61 23170, -23170, -23170, 23170,
587a6d78
KS
62 12540, -30274, 30274, -12540
63];
64
65pub fn idct4x4(coeffs: &mut [i16; 16]) {
66 let mut tmp = [0i16; 16];
67 for (src, dst) in coeffs.chunks(4).zip(tmp.chunks_mut(4)) {
47933c6d
KS
68 let s0 = i32::from(src[0]);
69 let s1 = i32::from(src[1]);
70 let s2 = i32::from(src[2]);
71 let s3 = i32::from(src[3]);
587a6d78
KS
72
73 let t0 = (s0 + s2).wrapping_mul(23170);
74 let t1 = (s0 - s2).wrapping_mul(23170);
75 let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540);
76 let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274);
77
78 dst[0] = ((t0 + t2) >> 14) as i16;
79 dst[1] = ((t1 + t3) >> 14) as i16;
80 dst[2] = ((t1 - t3) >> 14) as i16;
81 dst[3] = ((t0 - t2) >> 14) as i16;
82 }
83 for i in 0..4 {
47933c6d
KS
84 let s0 = i32::from(tmp[i + 4 * 0]);
85 let s1 = i32::from(tmp[i + 4 * 1]);
86 let s2 = i32::from(tmp[i + 4 * 2]);
87 let s3 = i32::from(tmp[i + 4 * 3]);
587a6d78
KS
88
89 let t0 = (s0 + s2).wrapping_mul(23170) + 0x20000;
90 let t1 = (s0 - s2).wrapping_mul(23170) + 0x20000;
91 let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540);
92 let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274);
93
94 coeffs[i + 0 * 4] = ((t0 + t2) >> 18) as i16;
95 coeffs[i + 1 * 4] = ((t1 + t3) >> 18) as i16;
96 coeffs[i + 2 * 4] = ((t1 - t3) >> 18) as i16;
97 coeffs[i + 3 * 4] = ((t0 - t2) >> 18) as i16;
98 }
99}
100
101pub fn idct4x4_dc(coeffs: &mut [i16; 16]) {
47933c6d 102 let dc = ((((i32::from(coeffs[0]) * DCT_COEFFS[0]) >> 14) * DCT_COEFFS[0] + 0x20000) >> 18) as i16;
587a6d78
KS
103 for el in coeffs.iter_mut() {
104 *el = dc;
105 }
106}
107
108pub fn add_coeffs4x4(dst: &mut [u8], off: usize, stride: usize, coeffs: &[i16; 16]) {
109 let dst = &mut dst[off..];
110 for (out, src) in dst.chunks_mut(stride).zip(coeffs.chunks(4)) {
111 for (oel, iel) in out.iter_mut().take(4).zip(src.iter()) {
47933c6d 112 *oel = clip_u8(i16::from(*oel) + *iel);
587a6d78
KS
113 }
114 }
115}
116pub fn add_coeffs16x1(dst: &mut [u8], off: usize, coeffs: &[i16; 16]) {
117 let dst = &mut dst[off..];
118 for (oel, iel) in dst.iter_mut().take(16).zip(coeffs.iter()) {
47933c6d 119 *oel = clip_u8(i16::from(*oel) + *iel);
587a6d78
KS
120 }
121}
122
123pub trait IntraPred {
124 const SIZE: usize;
125 fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
126 let dc;
127 if !ipred.has_left && !ipred.has_top {
128 dc = 0x80;
129 } else {
130 let mut dcsum = 0;
131 let mut dcshift = match Self::SIZE {
132 16 => 3,
133 _ => 2,
134 };
135 if ipred.has_left {
136 for el in ipred.left.iter().take(Self::SIZE) {
47933c6d 137 dcsum += u16::from(*el);
587a6d78
KS
138 }
139 dcshift += 1;
140 }
141 if ipred.has_top {
142 for el in ipred.top.iter().take(Self::SIZE) {
47933c6d 143 dcsum += u16::from(*el);
587a6d78
KS
144 }
145 dcshift += 1;
146 }
147 dc = ((dcsum + (1 << (dcshift - 1))) >> dcshift) as u8;
148 }
149 for _ in 0..Self::SIZE {
150 let out = &mut dst[off..][..Self::SIZE];
151 for el in out.iter_mut() {
152 *el = dc;
153 }
154 off += stride;
155 }
156 }
157 fn ipred_v(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
158 for _ in 0..Self::SIZE {
159 let out = &mut dst[off..][..Self::SIZE];
160 out.copy_from_slice(&ipred.top[0..Self::SIZE]);
161 off += stride;
162 }
163 }
164 fn ipred_h(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
165 for leftel in ipred.left.iter().take(Self::SIZE) {
166 let out = &mut dst[off..][..Self::SIZE];
167 for el in out.iter_mut() {
168 *el = *leftel;
169 }
170 off += stride;
171 }
172 }
173 fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
47933c6d 174 let tl = i16::from(ipred.tl);
587a6d78
KS
175 for m in 0..Self::SIZE {
176 for n in 0..Self::SIZE {
47933c6d 177 dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl);
587a6d78
KS
178 }
179 off += stride;
180 }
181 }
182}
183
184pub struct IPred16x16 {}
185impl IntraPred for IPred16x16 { const SIZE: usize = 16; }
186
187pub struct IPred8x8 {}
188impl IntraPred for IPred8x8 { const SIZE: usize = 8; }
189
190macro_rules! load_pred4 {
191 (topleft; $ipred: expr) => {{
47933c6d
KS
192 let tl = u16::from($ipred.tl);
193 let a0 = u16::from($ipred.top[0]);
194 let l0 = u16::from($ipred.left[0]);
587a6d78
KS
195 ((l0 + tl * 2 + a0 + 2) >> 2) as u8
196 }};
197 (top; $ipred: expr) => {{
47933c6d
KS
198 let tl = u16::from($ipred.tl);
199 let a0 = u16::from($ipred.top[0]);
200 let a1 = u16::from($ipred.top[1]);
201 let a2 = u16::from($ipred.top[2]);
202 let a3 = u16::from($ipred.top[3]);
203 let a4 = u16::from($ipred.top[4]);
587a6d78
KS
204 let p0 = ((tl + a0 * 2 + a1 + 2) >> 2) as u8;
205 let p1 = ((a0 + a1 * 2 + a2 + 2) >> 2) as u8;
206 let p2 = ((a1 + a2 * 2 + a3 + 2) >> 2) as u8;
207 let p3 = ((a2 + a3 * 2 + a4 + 2) >> 2) as u8;
208 (p0, p1, p2, p3)
209 }};
210 (top8; $ipred: expr) => {{
47933c6d
KS
211 let t3 = u16::from($ipred.top[3]);
212 let t4 = u16::from($ipred.top[4]);
213 let t5 = u16::from($ipred.top[5]);
214 let t6 = u16::from($ipred.top[6]);
215 let t7 = u16::from($ipred.top[7]);
587a6d78
KS
216 let p4 = ((t3 + t4 * 2 + t5 + 2) >> 2) as u8;
217 let p5 = ((t4 + t5 * 2 + t6 + 2) >> 2) as u8;
218 let p6 = ((t5 + t6 * 2 + t7 + 2) >> 2) as u8;
219 let p7 = ((t6 + t7 * 2 + t7 + 2) >> 2) as u8;
220 (p4, p5, p6, p7)
221 }};
222 (topavg; $ipred: expr) => {{
47933c6d
KS
223 let tl = u16::from($ipred.tl);
224 let a0 = u16::from($ipred.top[0]);
225 let a1 = u16::from($ipred.top[1]);
226 let a2 = u16::from($ipred.top[2]);
227 let a3 = u16::from($ipred.top[3]);
587a6d78
KS
228 let p0 = ((tl + a0 + 1) >> 1) as u8;
229 let p1 = ((a0 + a1 + 1) >> 1) as u8;
230 let p2 = ((a1 + a2 + 1) >> 1) as u8;
231 let p3 = ((a2 + a3 + 1) >> 1) as u8;
232 (p0, p1, p2, p3)
233 }};
234 (left; $ipred: expr) => {{
47933c6d
KS
235 let tl = u16::from($ipred.tl);
236 let l0 = u16::from($ipred.left[0]);
237 let l1 = u16::from($ipred.left[1]);
238 let l2 = u16::from($ipred.left[2]);
239 let l3 = u16::from($ipred.left[3]);
240 let l4 = u16::from($ipred.left[4]);
587a6d78
KS
241 let p0 = ((tl + l0 * 2 + l1 + 2) >> 2) as u8;
242 let p1 = ((l0 + l1 * 2 + l2 + 2) >> 2) as u8;
243 let p2 = ((l1 + l2 * 2 + l3 + 2) >> 2) as u8;
244 let p3 = ((l2 + l3 * 2 + l4 + 2) >> 2) as u8;
245 (p0, p1, p2, p3)
246 }};
247 (left8; $ipred: expr) => {{
47933c6d
KS
248 let l3 = u16::from($ipred.left[3]);
249 let l4 = u16::from($ipred.left[4]);
250 let l5 = u16::from($ipred.left[5]);
251 let l6 = u16::from($ipred.left[6]);
252 let l7 = u16::from($ipred.left[7]);
587a6d78
KS
253 let p4 = ((l3 + l4 * 2 + l5 + 2) >> 2) as u8;
254 let p5 = ((l4 + l5 * 2 + l6 + 2) >> 2) as u8;
255 let p6 = ((l5 + l6 * 2 + l7 + 2) >> 2) as u8;
256 let p7 = ((l6 + l7 * 2 + l7 + 2) >> 2) as u8;
257 (p4, p5, p6, p7)
258 }};
259 (leftavg; $ipred: expr) => {{
47933c6d
KS
260 let tl = u16::from($ipred.tl);
261 let l0 = u16::from($ipred.left[0]);
262 let l1 = u16::from($ipred.left[1]);
263 let l2 = u16::from($ipred.left[2]);
264 let l3 = u16::from($ipred.left[3]);
587a6d78
KS
265 let p0 = ((tl + l0 + 1) >> 1) as u8;
266 let p1 = ((l0 + l1 + 1) >> 1) as u8;
267 let p2 = ((l1 + l2 + 1) >> 1) as u8;
268 let p3 = ((l2 + l3 + 1) >> 1) as u8;
269 (p0, p1, p2, p3)
270 }};
271}
272
273pub struct IPred4x4 {}
274impl IPred4x4 {
275 pub fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
276 let dc;
277 let mut dcsum = 0;
278 for el in ipred.left.iter().take(4) {
47933c6d 279 dcsum += u16::from(*el);
587a6d78
KS
280 }
281 for el in ipred.top.iter().take(4) {
47933c6d 282 dcsum += u16::from(*el);
587a6d78
KS
283 }
284 dc = ((dcsum + (1 << 2)) >> 3) as u8;
285 for _ in 0..4 {
286 let out = &mut dst[off..][..4];
287 for el in out.iter_mut() {
288 *el = dc;
289 }
290 off += stride;
291 }
292 }
293 pub fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
47933c6d 294 let tl = i16::from(ipred.tl);
587a6d78
KS
295 for m in 0..4 {
296 for n in 0..4 {
47933c6d 297 dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl);
587a6d78
KS
298 }
299 off += stride;
300 }
301 }
302 pub fn ipred_ve(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
303 let (v0, v1, v2, v3) = load_pred4!(top; ipred);
304 let vert_pred = [v0, v1, v2, v3];
305 for _ in 0..4 {
306 let out = &mut dst[off..][..4];
307 out.copy_from_slice(&vert_pred);
308 off += stride;
309 }
310 }
311 pub fn ipred_he(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
312 let (p0, p1, p2, _) = load_pred4!(left; ipred);
47933c6d 313 let p3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8;
587a6d78
KS
314 let hor_pred = [p0, p1, p2, p3];
315 for m in 0..4 {
316 for n in 0..4 {
317 dst[off + n] = hor_pred[m];
318 }
319 off += stride;
320 }
321 }
322 pub fn ipred_ld(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
323 let (_, p0, p1, p2) = load_pred4!(top; ipred);
324 let (p3, p4, p5, p6) = load_pred4!(top8; ipred);
325
326 dst[off + 0] = p0; dst[off + 1] = p1; dst[off + 2] = p2; dst[off + 3] = p3;
327 off += stride;
328 dst[off + 0] = p1; dst[off + 1] = p2; dst[off + 2] = p3; dst[off + 3] = p4;
329 off += stride;
330 dst[off + 0] = p2; dst[off + 1] = p3; dst[off + 2] = p4; dst[off + 3] = p5;
331 off += stride;
332 dst[off + 0] = p3; dst[off + 1] = p4; dst[off + 2] = p5; dst[off + 3] = p6;
333 }
334 pub fn ipred_rd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
335 let tl = load_pred4!(topleft; ipred);
336 let (l0, l1, l2, _) = load_pred4!(left; ipred);
337 let (t0, t1, t2, _) = load_pred4!(top; ipred);
338
339 dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2;
340 off += stride;
341 dst[off + 0] = l0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1;
342 off += stride;
343 dst[off + 0] = l1; dst[off + 1] = l0; dst[off + 2] = tl; dst[off + 3] = t0;
344 off += stride;
345 dst[off + 0] = l2; dst[off + 1] = l1; dst[off + 2] = l0; dst[off + 3] = tl;
346 }
347 pub fn ipred_vr(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
348 let tl = load_pred4!(topleft; ipred);
349 let (l0, l1, _, _) = load_pred4!(left; ipred);
350 let (t0, t1, t2, _) = load_pred4!(top; ipred);
351 let (m0, m1, m2, m3) = load_pred4!(topavg; ipred);
352
353 dst[off + 0] = m0; dst[off + 1] = m1; dst[off + 2] = m2; dst[off + 3] = m3;
354 off += stride;
355 dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2;
356 off += stride;
357 dst[off + 0] = l0; dst[off + 1] = m0; dst[off + 2] = m1; dst[off + 3] = m2;
358 off += stride;
359 dst[off + 0] = l1; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1;
360 }
361 pub fn ipred_vl(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
362 let (_, t1, t2, t3) = load_pred4!(top; ipred);
363 let (t4, t5, t6, _) = load_pred4!(top8; ipred);
364 let (_, m1, m2, m3) = load_pred4!(topavg; ipred);
47933c6d 365 let m4 = ((u16::from(ipred.top[3]) + u16::from(ipred.top[4]) + 1) >> 1) as u8;
587a6d78
KS
366
367 dst[off + 0] = m1; dst[off + 1] = m2; dst[off + 2] = m3; dst[off + 3] = m4;
368 off += stride;
369 dst[off + 0] = t1; dst[off + 1] = t2; dst[off + 2] = t3; dst[off + 3] = t4;
370 off += stride;
371 dst[off + 0] = m2; dst[off + 1] = m3; dst[off + 2] = m4; dst[off + 3] = t5;
372 off += stride;
373 dst[off + 0] = t2; dst[off + 1] = t3; dst[off + 2] = t4; dst[off + 3] = t6;
374 }
375 pub fn ipred_hd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
376 let tl = load_pred4!(topleft; ipred);
377 let (l0, l1, l2, _) = load_pred4!(left; ipred);
378 let (m0, m1, m2, m3) = load_pred4!(leftavg; ipred);
379 let (t0, t1, _, _) = load_pred4!(top; ipred);
380
381 dst[off + 0] = m0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1;
382 off += stride;
383 dst[off + 0] = m1; dst[off + 1] = l0; dst[off + 2] = m0; dst[off + 3] = tl;
384 off += stride;
385 dst[off + 0] = m2; dst[off + 1] = l1; dst[off + 2] = m1; dst[off + 3] = l0;
386 off += stride;
387 dst[off + 0] = m3; dst[off + 1] = l2; dst[off + 2] = m2; dst[off + 3] = l1;
388 }
389 pub fn ipred_hu(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
390 let (_, m1, m2, m3) = load_pred4!(leftavg; ipred);
391 let (_, l1, l2, _) = load_pred4!(left; ipred);
47933c6d 392 let l3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8;
587a6d78
KS
393 let p3 = ipred.left[3];
394
395 dst[off + 0] = m1; dst[off + 1] = l1; dst[off + 2] = m2; dst[off + 3] = l2;
396 off += stride;
397 dst[off + 0] = m2; dst[off + 1] = l2; dst[off + 2] = m3; dst[off + 3] = l3;
398 off += stride;
399 dst[off + 0] = m3; dst[off + 1] = l3; dst[off + 2] = p3; dst[off + 3] = p3;
400 off += stride;
401 dst[off + 0] = p3; dst[off + 1] = p3; dst[off + 2] = p3; dst[off + 3] = p3;
402 }
403}
404
405fn delta(p1: i16, p0: i16, q0: i16, q1: i16) -> i16 {
406 (p1 - q1) + 3 * (q0 - p0)
407}
408
409pub type LoopFilterFunc = fn(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16);
410
411pub fn simple_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, _thr_inner: i16, _thr_hev: i16) {
412 for _ in 0..len {
47933c6d
KS
413 let p1 = i16::from(buf[off - step * 2]);
414 let p0 = i16::from(buf[off - step * 1]);
415 let q0 = i16::from(buf[off + step * 0]);
416 let q1 = i16::from(buf[off + step * 1]);
587a6d78
KS
417 let dpq = p0 - q0;
418 if dpq.abs() < thr {
419 let diff = delta(p1, p0, q0, q1);
420 let diffq0 = (diff.min(127) + 4) >> 3;
421 let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 };
422 buf[off - step * 1] = clip_u8(p0 + diffp0);
423 buf[off + step * 0] = clip_u8(q0 - diffq0);
424 }
425 off += stride;
426 }
427}
428
429fn normal_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16, edge: bool) {
430 for _ in 0..len {
47933c6d
KS
431 let p0 = i16::from(buf[off - step * 1]);
432 let q0 = i16::from(buf[off + step * 0]);
587a6d78
KS
433 let dpq = p0 - q0;
434 if dpq.abs() <= thr {
47933c6d
KS
435 let p3 = i16::from(buf[off - step * 4]);
436 let p2 = i16::from(buf[off - step * 3]);
437 let p1 = i16::from(buf[off - step * 2]);
438 let q1 = i16::from(buf[off + step * 1]);
439 let q2 = i16::from(buf[off + step * 2]);
440 let q3 = i16::from(buf[off + step * 3]);
587a6d78
KS
441 let dp2 = p3 - p2;
442 let dp1 = p2 - p1;
443 let dp0 = p1 - p0;
444 let dq0 = q1 - q0;
445 let dq1 = q2 - q1;
446 let dq2 = q3 - q2;
447 if (dp0.abs() <= thr_inner) && (dp1.abs() <= thr_inner) &&
448 (dp2.abs() <= thr_inner) && (dq0.abs() <= thr_inner) &&
449 (dq1.abs() <= thr_inner) && (dq2.abs() <= thr_inner) {
450 let high_edge_variation = (dp0.abs() > thr_hev) || (dq0.abs() > thr_hev);
451 if high_edge_variation {
452 let diff = delta(p1, p0, q0, q1);
453 let diffq0 = (diff.min(127) + 4) >> 3;
454 let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 };
455 buf[off - step * 1] = clip_u8(p0 + diffp0);
456 buf[off + step * 0] = clip_u8(q0 - diffq0);
457 } else if edge {
458 let d = delta(p1, p0, q0, q1);
459 let diff0 = (d * 27 + 63) >> 7;
460 buf[off - step * 1] = clip_u8(p0 + diff0);
461 buf[off + step * 0] = clip_u8(q0 - diff0);
462 let diff1 = (d * 18 + 63) >> 7;
463 buf[off - step * 2] = clip_u8(p1 + diff1);
464 buf[off + step * 1] = clip_u8(q1 - diff1);
465 let diff2 = (d * 9 + 63) >> 7;
466 buf[off - step * 3] = clip_u8(p2 + diff2);
467 buf[off + step * 2] = clip_u8(q2 - diff2);
468 } else {
469 let diff = 3 * (q0 - p0);
470 let diffq0 = (diff.min(127) + 4) >> 3;
471 let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 };
472 buf[off - step * 1] = clip_u8(p0 + diffp0);
473 buf[off + step * 0] = clip_u8(q0 - diffq0);
474 let diff2 = (diffq0 + 1) >> 1;
475 buf[off - step * 2] = clip_u8(p1 + diff2);
476 buf[off + step * 1] = clip_u8(q1 - diff2);
477 }
478 }
479 }
480 off += stride;
481 }
482}
483
484pub fn normal_loop_filter_inner(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) {
485 normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, false);
486}
487
488pub fn normal_loop_filter_edge(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) {
489 normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, true);
490}
491
492const VP7_BICUBIC_FILTERS: [[i16; 6]; 8] = [
493 [ 0, 0, 128, 0, 0, 0 ],
494 [ 0, -6, 123, 12, -1, 0 ],
495 [ 2, -11, 108, 36, -8, 1 ],
496 [ 0, -9, 93, 50, -6, 0 ],
497 [ 3, -16, 77, 77, -16, 3 ],
498 [ 0, -6, 50, 93, -9, 0 ],
499 [ 1, -8, 36, 108, -11, 2 ],
500 [ 0, -1, 12, 123, -6, 0 ]
501];
502
503macro_rules! interpolate {
504 ($src: expr, $off: expr, $step: expr, $mode: expr) => {{
47933c6d
KS
505 let s0 = i32::from($src[$off + 0 * $step]);
506 let s1 = i32::from($src[$off + 1 * $step]);
507 let s2 = i32::from($src[$off + 2 * $step]);
508 let s3 = i32::from($src[$off + 3 * $step]);
509 let s4 = i32::from($src[$off + 4 * $step]);
510 let s5 = i32::from($src[$off + 5 * $step]);
587a6d78
KS
511 let filt = &VP7_BICUBIC_FILTERS[$mode];
512 let src = [s0, s1, s2, s3, s4, s5];
513 let mut val = 64;
514 for (s, c) in src.iter().zip(filt.iter()) {
47933c6d 515 val += s * i32::from(*c);
587a6d78
KS
516 }
517 clip_u8((val >> 7) as i16)
518 }}
519}
520
521const EDGE_PRE: usize = 2;
522const EDGE_POST: usize = 4;
523const TMP_STRIDE: usize = 16;
524
525fn mc_block_common(dst: &mut [u8], mut doff: usize, dstride: usize, src: &[u8], sstride: usize, size: usize, mx: usize, my: usize) {
526 if (mx == 0) && (my == 0) {
527 let dst = &mut dst[doff..];
528 let src = &src[EDGE_PRE + EDGE_PRE * sstride..];
529 for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) {
530 (&mut out[0..size]).copy_from_slice(&src[0..size]);
531 }
532 } else if my == 0 {
533 let src = &src[EDGE_PRE * sstride..];
534 for src in src.chunks(sstride).take(size) {
535 for x in 0..size {
536 dst[doff + x] = interpolate!(src, x, 1, mx);
537 }
538 doff += dstride;
539 }
540 } else if mx == 0 {
541 let src = &src[EDGE_PRE..];
542 for y in 0..size {
543 for x in 0..size {
544 dst[doff + x] = interpolate!(src, x + y * sstride, sstride, my);
545 }
546 doff += dstride;
547 }
548 } else {
549 let mut tmp = [0u8; TMP_STRIDE * (16 + EDGE_PRE + EDGE_POST)];
550 for (y, dst) in tmp.chunks_mut(TMP_STRIDE).take(size + EDGE_PRE + EDGE_POST).enumerate() {
551 for x in 0..size {
552 dst[x] = interpolate!(src, x + y * sstride, 1, mx);
553 }
554 }
555 for y in 0..size {
556 for x in 0..size {
557 dst[doff + x] = interpolate!(tmp, x + y * TMP_STRIDE, TMP_STRIDE, my);
558 }
559 doff += dstride;
560 }
561 }
562}
563fn mc_block(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
564 mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize,
565 mc_buf: &mut [u8], size: usize) {
566 if (mvx == 0) && (mvy == 0) {
567 let dst = &mut dst[doff..];
568 let sstride = reffrm.get_stride(plane);
569 let srcoff = reffrm.get_offset(plane) + xpos + ypos * sstride;
570 let src = &reffrm.get_data();
571 let src = &src[srcoff..];
572 for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) {
573 (&mut out[0..size]).copy_from_slice(&src[0..size]);
574 }
575 return;
576 }
577 let (w, h) = reffrm.get_dimensions(plane);
ac818eac
KS
578 let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize;
579 let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize;
587a6d78
KS
580 let bsize = (size as isize) + (EDGE_PRE as isize) + (EDGE_POST as isize);
581 let ref_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize);
582 let ref_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize);
583
584 let (src, sstride) = if (ref_x < 0) || (ref_x + bsize > wa) || (ref_y < 0) || (ref_y + bsize > ha) {
86081fed 585 edge_emu(&reffrm, ref_x, ref_y, bsize as usize, bsize as usize, mc_buf, 32, plane, 0);
587a6d78
KS
586 (mc_buf as &[u8], 32)
587 } else {
588 let off = reffrm.get_offset(plane);
589 let stride = reffrm.get_stride(plane);
590 let data = reffrm.get_data();
591 (&data[off + (ref_x as usize) + (ref_y as usize) * stride..], stride)
592 };
593 let mx = (mvx & 7) as usize;
594 let my = (mvy & 7) as usize;
595 mc_block_common(dst, doff, dstride, src, sstride, size, mx, my);
596}
597pub fn mc_block16x16(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
598 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
599 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 16);
600}
601pub fn mc_block8x8(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
602 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
603 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 8);
604}
605pub fn mc_block4x4(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
606 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
607 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 4);
608}
609pub fn mc_block_special(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
610 mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize,
611 mc_buf: &mut [u8], size: usize, pitch_mode: u8) {
612 const Y_MUL: [isize; 8] = [ 1, 0, 2, 4, 1, 1, 2, 2 ];
613 const Y_OFF: [isize; 8] = [ 0, 4, 0, 0, 1, -1, 1, -1 ];
614 const ILACE_CHROMA: [bool; 8] = [ false, false, true, true, false, false, true, true ]; // mode&2 != 0
615
616 let pitch_mode = (pitch_mode & 7) as usize;
617 let (xstep, ymul) = if plane == 0 {
618 (Y_OFF[pitch_mode], Y_MUL[pitch_mode])
619 } else {
620 (0, if ILACE_CHROMA[pitch_mode] { 2 } else { 1 })
621 };
622
623 let (w, h) = reffrm.get_dimensions(plane);
ac818eac
KS
624 let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize;
625 let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize;
00a2843d
KS
626 let mut start_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize);
627 let mut end_x = (xpos as isize) + ((mvx >> 3) as isize) + ((size + EDGE_POST) as isize);
628 if xstep < 0 {
629 start_x -= (size + EDGE_POST) as isize;
630 } else if xstep > 0 {
631 end_x += (size as isize) * xstep;
632 }
633 let mut start_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize) * ymul;
634 let mut end_y = (ypos as isize) + ((mvy >> 3) as isize) + ((size + EDGE_POST) as isize) * ymul;
635 if ymul == 0 {
636 start_y -= EDGE_PRE as isize;
637 end_y += (EDGE_POST + 1) as isize;
638 }
587a6d78
KS
639 let off = reffrm.get_offset(plane);
640 let stride = reffrm.get_stride(plane);
641 let (src, sstride) = if (start_x >= 0) && (end_x <= wa) && (start_y >= 0) && (end_y <= ha) {
642 let data = reffrm.get_data();
643 (&data[off + (start_x as usize) + (start_y as usize) * stride..],
644 ((stride as isize) + xstep) as usize)
645 } else {
646 let add = (size + EDGE_PRE + EDGE_POST) * (xstep.abs() as usize);
647 let bw = size + EDGE_PRE + EDGE_POST + add;
648 let bh = (end_y - start_y) as usize;
649 let bo = if xstep >= 0 { 0 } else { add };
86081fed 650 edge_emu(&reffrm, start_x + (bo as isize), start_y, bw, bh, mc_buf, 128, plane, 0);
587a6d78
KS
651 (&mc_buf[bo..], (128 + xstep) as usize)
652 };
653 let mx = (mvx & 7) as usize;
654 let my = (mvy & 7) as usize;
655 match ymul {
656 0 => unimplemented!(),
657 1 => mc_block_common(dst, doff, dstride, src, sstride, size, mx, my),
658 2 => {
659 let hsize = size / 2;
660 for y in 0..2 {
661 for x in 0..2 {
662 mc_block_common(dst, doff + x * hsize + y * hsize * dstride, dstride,
663 &src[x * hsize + y * sstride..], sstride * 2, hsize, mx, my);
664 }
665 }
666 },
667 4 => {
668 let qsize = size / 4;
669 for y in 0..4 {
670 for x in 0..4 {
671 mc_block_common(dst, doff + x * qsize + y * qsize * dstride, dstride,
672 &src[x * qsize + y * sstride..], sstride * 4, qsize, mx, my);
673 }
674 }
675 },
676 _ => unreachable!(),
677 };
678}
679
680pub fn fade_frame(srcfrm: NAVideoBufferRef<u8>, dstfrm: &mut NASimpleVideoFrame<u8>, alpha: u16, beta: u16) {
681 let mut fade_lut = [0u8; 256];
682 for (i, el) in fade_lut.iter_mut().enumerate() {
683 let y = i as u16;
684 *el = (y + ((y * beta) >> 8) + alpha).max(0).min(255) as u8;
685 }
686
687 let (w, h) = srcfrm.get_dimensions(0);
688 let (wa, ha) = ((w + 15) & !15, (h + 15) & !15);
689 let soff = srcfrm.get_offset(0);
690 let sstride = srcfrm.get_stride(0);
691 let sdata = srcfrm.get_data();
692 let src = &sdata[soff..];
693 let dstride = dstfrm.stride[0];
694 let dst = &mut dstfrm.data[dstfrm.offset[0]..];
695 for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) {
696 for (s, d) in src.iter().zip(dst.iter_mut()).take(wa) {
697 *d = fade_lut[*s as usize];
698 }
699 }
700
701 for plane in 1..3 {
702 let (w, h) = srcfrm.get_dimensions(plane);
703 let (wa, ha) = ((w + 7) & !7, (h + 7) & !7);
704 let soff = srcfrm.get_offset(plane);
705 let sstride = srcfrm.get_stride(plane);
706 let sdata = srcfrm.get_data();
707 let src = &sdata[soff..];
708 let dstride = dstfrm.stride[plane];
709 let dst = &mut dstfrm.data[dstfrm.offset[plane]..];
710 for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) {
711 (&mut dst[0..wa]).copy_from_slice(&src[0..wa]);
712 }
713 }
714}