]>
Commit | Line | Data |
---|---|---|
cd830591 | 1 | use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame}; |
b4d5b851 KS |
2 | use nihav_codec_support::codecs::MV; |
3 | use nihav_codec_support::codecs::blockdsp::edge_emu; | |
52aad9fe KS |
4 | |
5 | fn clip8(val: i16) -> u8 { val.min(255).max(0) as u8 } | |
6 | ||
7 | macro_rules! el { | |
8 | ($s: ident, $o: expr) => ( $s[$o] as i16 ) | |
9 | } | |
10 | ||
11 | macro_rules! filter { | |
12 | (01; $s: ident, $o: expr, $step: expr) => ( | |
13 | clip8((( el!($s, $o - 2 * $step) | |
14 | -5 * el!($s, $o - 1 * $step) | |
15 | +52 * el!($s, $o - 0 * $step) | |
16 | +20 * el!($s, $o + 1 * $step) | |
17 | -5 * el!($s, $o + 2 * $step) | |
18 | + el!($s, $o + 3 * $step) + 32) >> 6) as i16) | |
19 | ); | |
20 | (02; $s: ident, $o: expr, $step: expr) => ( | |
21 | clip8((( el!($s, $o - 2 * $step) | |
22 | -5 * el!($s, $o - 1 * $step) | |
23 | +20 * el!($s, $o - 0 * $step) | |
24 | +20 * el!($s, $o + 1 * $step) | |
25 | -5 * el!($s, $o + 2 * $step) | |
26 | + el!($s, $o + 3 * $step) + 16) >> 5) as i16) | |
27 | ); | |
28 | (03; $s: ident, $o: expr, $step: expr) => ( | |
29 | clip8((( el!($s, $o - 2 * $step) | |
30 | -5 * el!($s, $o - 1 * $step) | |
31 | +20 * el!($s, $o - 0 * $step) | |
32 | +52 * el!($s, $o + 1 * $step) | |
33 | -5 * el!($s, $o + 2 * $step) | |
34 | + el!($s, $o + 3 * $step) + 32) >> 6) as i16) | |
35 | ); | |
36 | } | |
37 | ||
38 | macro_rules! filter_row { | |
39 | ($d: ident, $do: expr, $s: ident, $so: expr, $step: expr, $size: expr, $mode: expr) => ({ | |
40 | match $mode { | |
41 | 1 => { | |
42 | for x in 0..$size { | |
43 | $d[$do + x] = filter!(01; $s, $so + x, $step); | |
44 | } | |
45 | }, | |
46 | 2 => { | |
47 | for x in 0..$size { | |
48 | $d[$do + x] = filter!(02; $s, $so + x, $step); | |
49 | } | |
50 | }, | |
51 | 3 => { | |
52 | for x in 0..$size { | |
53 | $d[$do + x] = filter!(03; $s, $so + x, $step); | |
54 | } | |
55 | }, | |
56 | _ => {}, | |
57 | }; | |
58 | }); | |
59 | } | |
60 | ||
b7c882c1 | 61 | #[allow(clippy::cognitive_complexity)] |
52aad9fe KS |
62 | fn luma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, w: usize, h: usize, cx: usize, cy: usize) { |
63 | if (cx == 0) && (cy == 0) { | |
64 | for _ in 0..h { | |
fa57381e | 65 | dst[didx..][..w].copy_from_slice(&src[sidx..][..w]); |
52aad9fe KS |
66 | didx += dstride; |
67 | sidx += sstride; | |
68 | } | |
69 | } else if cy == 0 { | |
70 | for _ in 0..h { | |
71 | filter_row!(dst, didx, src, sidx, 1, w, cx); | |
72 | didx += dstride; | |
73 | sidx += sstride; | |
74 | } | |
75 | } else if cx == 0 { | |
76 | for _ in 0..h { | |
77 | filter_row!(dst, didx, src, sidx, sstride, w, cy); | |
78 | didx += dstride; | |
79 | sidx += sstride; | |
80 | } | |
81 | } else if (cx != 3) || (cy != 3) { | |
82 | let mut tmp: [u8; 70 * 64] = [0; 70 * 64]; | |
83 | for y in 0..h+5 { | |
84 | filter_row!(tmp, y * 64, src, sidx - sstride * 2, 1, w, cx); | |
85 | sidx += sstride; | |
86 | } | |
87 | for y in 0..h { | |
88 | filter_row!(dst, didx, tmp, (y + 2) * 64, 64, w, cy); | |
89 | didx += dstride; | |
90 | } | |
91 | } else { | |
92 | for _ in 0..h { | |
93 | for x in 0..w { | |
94 | dst[didx + x] = ((el!(src, sidx + x) + el!(src, sidx + x + 1) + | |
95 | el!(src, sidx + x + sstride) + el!(src, sidx + x + 1 + sstride) + 2) >> 2) as u8; | |
96 | } | |
97 | didx += dstride; | |
98 | sidx += sstride; | |
99 | } | |
100 | } | |
101 | } | |
102 | ||
103 | fn chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, w: usize, h: usize, x: usize, y: usize) { | |
104 | if (x == 0) && (y == 0) { | |
105 | for _ in 0..h { | |
fa57381e | 106 | dst[didx..][..w].copy_from_slice(&src[sidx..][..w]); |
52aad9fe KS |
107 | didx += dstride; |
108 | sidx += sstride; | |
109 | } | |
110 | return; | |
111 | } | |
112 | if (x > 0) && (y > 0) { | |
2eefcf79 KS |
113 | // 3,3 case is the same as 3,2 for some reason |
114 | let ymod = if (x == 3) && (y == 3) { 2 } else { y }; | |
115 | let a = ((4 - x) * (4 - ymod)) as u16; | |
116 | let b = (( x) * (4 - ymod)) as u16; | |
117 | let c = ((4 - x) * ( ymod)) as u16; | |
118 | let d = (( x) * ( ymod)) as u16; | |
52aad9fe KS |
119 | for _ in 0..h { |
120 | for x in 0..w { | |
121 | dst[didx + x] = ((a * (src[sidx + x] as u16) | |
122 | + b * (src[sidx + x + 1] as u16) | |
123 | + c * (src[sidx + x + sstride] as u16) | |
124 | + d * (src[sidx + x + 1 + sstride] as u16) + 8) >> 4) as u8; | |
125 | } | |
126 | didx += dstride; | |
127 | sidx += sstride; | |
128 | } | |
129 | } else { | |
130 | let a = ((4 - x) * (4 - y)) as u16; | |
131 | let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16; | |
132 | let step = if y > 0 { sstride } else { 1 }; | |
133 | for _ in 0..h { | |
134 | for x in 0..w { | |
135 | dst[didx + x] = ((a * (src[sidx + x] as u16) | |
136 | + e * (src[sidx + x + step] as u16) + 8) >> 4) as u8; | |
137 | } | |
138 | didx += dstride; | |
139 | sidx += sstride; | |
140 | } | |
141 | } | |
142 | } | |
143 | ||
144 | fn check_pos(x: usize, y: usize, cw: usize, ch: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool { | |
145 | let xn = (x as isize) + (dx as isize); | |
146 | let yn = (y as isize) + (dy as isize); | |
147 | ||
148 | (xn - e0 >= 0) && (xn + (cw as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (ch as isize) + e3 <= (h as isize)) | |
149 | } | |
150 | ||
151 | macro_rules! diff{ | |
152 | ($src: ident, $e1: expr, $e2: expr) => ( | |
153 | ($src[$e1] as i16) - ($src[$e2] as i16) | |
154 | ) | |
155 | } | |
156 | macro_rules! strength{ | |
d649acc9 | 157 | ($el: expr, $lim: expr) => (if $el.abs() < $lim { 3 } else { 1 }) |
52aad9fe KS |
158 | } |
159 | fn clip_symm(val: i16, lim: i16) -> i16 { val.max(-lim).min(lim) } | |
160 | ||
161 | fn filter_luma_edge(dst: &mut [u8], mut offset: usize, step: usize, stride: usize, mode1: u8, mode2: u8, lim1: i16, lim2: i16) { | |
162 | let mut diff_q1q0: [i16; 4] = [0; 4]; | |
163 | let mut diff_p1p0: [i16; 4] = [0; 4]; | |
164 | for i in 0..4 { | |
165 | let off = offset + i * stride; | |
166 | diff_q1q0[i] = diff!(dst, off - 2 * step, off - step); | |
167 | diff_p1p0[i] = diff!(dst, off + step, off); | |
168 | } | |
169 | let str_p = strength!(diff_q1q0[0] + diff_q1q0[1] + diff_q1q0[2] + diff_q1q0[3], lim2); | |
170 | let str_q = strength!(diff_p1p0[0] + diff_p1p0[1] + diff_p1p0[2] + diff_p1p0[3], lim2); | |
171 | if str_p + str_q > 2 { | |
172 | let msum = ((mode1 + mode2 + str_q + str_p) >> 1) as i16; | |
9e4a63b2 | 173 | let (maxprod, weak) = if (str_q == 1) || (str_p == 1) { (384, true) } else { (256, false) }; |
52aad9fe KS |
174 | for y in 0..4 { |
175 | let diff_p0q0 = diff!(dst, offset, offset - step); | |
9e4a63b2 | 176 | if (diff_p0q0 != 0) && (((lim1 * diff_p0q0.abs()) & !0x7F) <= maxprod) { |
52aad9fe KS |
177 | let diff_q1q2 = diff!(dst, offset - 2 * step, offset - 3 * step); |
178 | let diff_p1p2 = diff!(dst, offset + step, offset + 2 * step); | |
179 | let delta = if weak { | |
180 | clip_symm((diff_p0q0 + 1) >> 1, msum >> 1) | |
181 | } else { | |
182 | let diff_strg = (diff!(dst, offset - 2 * step, offset + step) + 4 * diff_p0q0 + 4) >> 3; | |
183 | clip_symm(diff_strg, msum) | |
184 | }; | |
185 | dst[offset - step] = clip8((dst[offset - step] as i16) + delta); | |
186 | dst[offset] = clip8((dst[offset] as i16) - delta); | |
d649acc9 | 187 | if (str_q != 1) && (diff_q1q2.abs() <= (lim2 >> 2)) { |
52aad9fe KS |
188 | let diff = (diff_q1q0[y] + diff_q1q2 - delta) >> 1; |
189 | let delta_q1 = if weak { | |
190 | clip_symm(diff, (mode1 >> 1) as i16) | |
191 | } else { | |
192 | clip_symm(diff, mode1 as i16) | |
193 | }; | |
194 | dst[offset - 2 * step] = clip8((dst[offset - 2 * step] as i16) - delta_q1); | |
195 | } | |
d649acc9 | 196 | if (str_p != 1) && (diff_p1p2.abs() <= (lim2 >> 2)) { |
52aad9fe KS |
197 | let diff = (diff_p1p0[y] + diff_p1p2 + delta) >> 1; |
198 | let delta_p1 = if weak { | |
199 | clip_symm(diff, (mode2 >> 1) as i16) | |
200 | } else { | |
201 | clip_symm(diff, mode2 as i16) | |
202 | }; | |
203 | dst[offset + step] = clip8((dst[offset + step] as i16) - delta_p1); | |
204 | } | |
205 | } | |
206 | offset += stride; | |
207 | } | |
208 | } | |
209 | } | |
210 | fn filter_chroma_edge(dst: &mut [u8], mut offset: usize, step: usize, stride: usize, mode1: u8, mode2: u8, lim1: i16, lim2: i16) { | |
211 | let diff_q = 4 * diff!(dst, offset - 2 * step, offset - step).abs(); | |
212 | let diff_p = 4 * diff!(dst, offset + step, offset ).abs(); | |
213 | let str_q = strength!(diff_q, lim2); | |
214 | let str_p = strength!(diff_p, lim2); | |
215 | if str_p + str_q > 2 { | |
216 | let msum = ((mode1 + mode2 + str_q + str_p) >> 1) as i16; | |
9e4a63b2 | 217 | let (maxprod, weak) = if (str_q == 1) || (str_p == 1) { (384, true) } else { (256, false) }; |
52aad9fe KS |
218 | for _ in 0..2 { |
219 | let diff_pq = diff!(dst, offset, offset - step); | |
9e4a63b2 | 220 | if (diff_pq != 0) && (((lim1 * diff_pq.abs()) & !0x7F) <= maxprod) { |
52aad9fe KS |
221 | let delta = if weak { |
222 | clip_symm((diff_pq + 1) >> 1, msum >> 1) | |
223 | } else { | |
224 | let diff_strg = (diff!(dst, offset - 2 * step, offset + step) + 4 * diff_pq + 4) >> 3; | |
225 | clip_symm(diff_strg, msum) | |
226 | }; | |
227 | dst[offset - step] = clip8((dst[offset - step] as i16) + delta); | |
228 | dst[offset] = clip8((dst[offset] as i16) - delta); | |
229 | } | |
230 | offset += stride; | |
231 | } | |
232 | } | |
233 | } | |
234 | ||
235 | pub struct RV60DeblockParams { | |
236 | pub deblock_chroma: bool, | |
237 | pub width: usize, | |
238 | pub height: usize, | |
239 | pub dblkstride: usize, | |
240 | } | |
241 | ||
242 | pub struct RV60DSP {} | |
243 | /*pub fn rv6_transform4x4_dc(coeffs: &mut [i16]) { | |
244 | let dc = (((coeffs[0] * 13 + 0x10) >> 5) * 13 + 0x10) >> 5; | |
245 | for el in coeffs.iter_mut().take(16) { | |
246 | *el = dc; | |
247 | } | |
248 | }*/ | |
249 | ||
250 | impl RV60DSP { | |
251 | pub fn new() -> Self { Self{} } | |
252 | pub fn transform4x4(&self, blk: &mut [i16]) { | |
253 | let mut tmp: [i32; 4 * 4] = [0; 4 * 4]; | |
254 | ||
255 | for i in 0..4 { | |
256 | let a = blk[i + 0 * 4] as i32; | |
257 | let b = blk[i + 1 * 4] as i32; | |
258 | let c = blk[i + 2 * 4] as i32; | |
259 | let d = blk[i + 3 * 4] as i32; | |
260 | ||
261 | let t0 = 13 * (a + c); | |
262 | let t1 = 13 * (a - c); | |
263 | let t2 = 7 * b - 17 * d; | |
264 | let t3 = 7 * d + 17 * b; | |
265 | tmp[i + 0 * 4] = (t0 + t3 + 0x10) >> 5; | |
266 | tmp[i + 1 * 4] = (t1 + t2 + 0x10) >> 5; | |
267 | tmp[i + 2 * 4] = (t1 - t2 + 0x10) >> 5; | |
268 | tmp[i + 3 * 4] = (t0 - t3 + 0x10) >> 5; | |
269 | } | |
270 | for (dst, src) in blk.chunks_mut(4).zip(tmp.chunks(4)) { | |
271 | let a = src[0]; | |
272 | let b = src[1]; | |
273 | let c = src[2]; | |
274 | let d = src[3]; | |
275 | ||
276 | let t0 = 13 * (a + c); | |
277 | let t1 = 13 * (a - c); | |
278 | let t2 = 7 * b - 17 * d; | |
279 | let t3 = 7 * d + 17 * b; | |
280 | dst[0] = ((t0 + t3 + 0x10) >> 5) as i16; | |
281 | dst[1] = ((t1 + t2 + 0x10) >> 5) as i16; | |
282 | dst[2] = ((t1 - t2 + 0x10) >> 5) as i16; | |
283 | dst[3] = ((t0 - t3 + 0x10) >> 5) as i16; | |
284 | } | |
285 | } | |
286 | /*pub fn transform8x8_dc(&self, blk: &mut [i16]) { | |
287 | assert!(blk.len() >= 8 * 8); | |
288 | let dc = (((coeffs[0] * 37 + 0x40) >> 7) * 37 + 0x40) >> 7; | |
289 | for el in coeffs.iter_mut().take(8 * 8) { | |
290 | *el = dc; | |
291 | } | |
292 | }*/ | |
293 | pub fn transform8x8(&self, blk: &mut [i16]) { | |
294 | assert!(blk.len() >= 8 * 8); | |
295 | let mut tmp: [i32; 8 * 8] = [0; 8 * 8]; | |
296 | for i in 0..8 { | |
297 | let s0 = blk[i + 0 * 8] as i32; | |
298 | let s1 = blk[i + 1 * 8] as i32; | |
299 | let s2 = blk[i + 2 * 8] as i32; | |
300 | let s3 = blk[i + 3 * 8] as i32; | |
301 | let s4 = blk[i + 4 * 8] as i32; | |
302 | let s5 = blk[i + 5 * 8] as i32; | |
303 | let s6 = blk[i + 6 * 8] as i32; | |
304 | let s7 = blk[i + 7 * 8] as i32; | |
305 | ||
306 | let t0 = 37 * (s0 + s4); | |
307 | let t1 = 37 * (s0 - s4); | |
308 | let t2 = 48 * s2 + 20 * s6; | |
309 | let t3 = 20 * s2 - 48 * s6; | |
310 | let t4 = t0 + t2; | |
311 | let t5 = t0 - t2; | |
312 | let t6 = t1 + t3; | |
313 | let t7 = t1 - t3; | |
314 | let t8 = 51 * s1 + 43 * s3 + 29 * s5 + 10 * s7; | |
315 | let t9 = 43 * s1 - 10 * s3 - 51 * s5 - 29 * s7; | |
316 | let ta = 29 * s1 - 51 * s3 + 10 * s5 + 43 * s7; | |
317 | let tb = 10 * s1 - 29 * s3 + 43 * s5 - 51 * s7; | |
318 | tmp[i + 0 * 8] = (t4 + t8 + 0x40) >> 7; | |
319 | tmp[i + 1 * 8] = (t6 + t9 + 0x40) >> 7; | |
320 | tmp[i + 2 * 8] = (t7 + ta + 0x40) >> 7; | |
321 | tmp[i + 3 * 8] = (t5 + tb + 0x40) >> 7; | |
322 | tmp[i + 4 * 8] = (t5 - tb + 0x40) >> 7; | |
323 | tmp[i + 5 * 8] = (t7 - ta + 0x40) >> 7; | |
324 | tmp[i + 6 * 8] = (t6 - t9 + 0x40) >> 7; | |
325 | tmp[i + 7 * 8] = (t4 - t8 + 0x40) >> 7; | |
326 | } | |
327 | for (dst, src) in blk.chunks_mut(8).zip(tmp.chunks(8)) { | |
328 | let s0 = src[0]; | |
329 | let s1 = src[1]; | |
330 | let s2 = src[2]; | |
331 | let s3 = src[3]; | |
332 | let s4 = src[4]; | |
333 | let s5 = src[5]; | |
334 | let s6 = src[6]; | |
335 | let s7 = src[7]; | |
336 | ||
337 | let t0 = 37 * (s0 + s4); | |
338 | let t1 = 37 * (s0 - s4); | |
339 | let t2 = 48 * s2 + 20 * s6; | |
340 | let t3 = 20 * s2 - 48 * s6; | |
341 | let t4 = t0 + t2; | |
342 | let t5 = t0 - t2; | |
343 | let t6 = t1 + t3; | |
344 | let t7 = t1 - t3; | |
345 | let t8 = 51 * s1 + 43 * s3 + 29 * s5 + 10 * s7; | |
346 | let t9 = 43 * s1 - 10 * s3 - 51 * s5 - 29 * s7; | |
347 | let ta = 29 * s1 - 51 * s3 + 10 * s5 + 43 * s7; | |
348 | let tb = 10 * s1 - 29 * s3 + 43 * s5 - 51 * s7; | |
349 | dst[0] = ((t4 + t8 + 0x40) >> 7) as i16; | |
350 | dst[1] = ((t6 + t9 + 0x40) >> 7) as i16; | |
351 | dst[2] = ((t7 + ta + 0x40) >> 7) as i16; | |
352 | dst[3] = ((t5 + tb + 0x40) >> 7) as i16; | |
353 | dst[4] = ((t5 - tb + 0x40) >> 7) as i16; | |
354 | dst[5] = ((t7 - ta + 0x40) >> 7) as i16; | |
355 | dst[6] = ((t6 - t9 + 0x40) >> 7) as i16; | |
356 | dst[7] = ((t4 - t8 + 0x40) >> 7) as i16; | |
357 | } | |
358 | } | |
359 | /*pub fn transform16x16_dc(&self, blk: &mut [i16; 16 * 16]) { | |
360 | let dc = (((coeffs[0] * 26 + 0x40) >> 7) * 26 + 0x40) >> 7; | |
361 | for el in coeffs.iter_mut() { | |
362 | *el = dc; | |
363 | } | |
364 | }*/ | |
365 | #[allow(non_snake_case)] | |
366 | fn transform16(blk: &mut [i16; 16 * 16], off: usize, step: usize) { | |
367 | let src0 = blk[off + 0 * step] as i32; | |
368 | let src1 = blk[off + 1 * step] as i32; | |
369 | let src2 = blk[off + 2 * step] as i32; | |
370 | let src3 = blk[off + 3 * step] as i32; | |
371 | let src4 = blk[off + 4 * step] as i32; | |
372 | let src5 = blk[off + 5 * step] as i32; | |
373 | let src6 = blk[off + 6 * step] as i32; | |
374 | let src7 = blk[off + 7 * step] as i32; | |
375 | let src8 = blk[off + 8 * step] as i32; | |
376 | let src9 = blk[off + 9 * step] as i32; | |
377 | let srcA = blk[off + 10 * step] as i32; | |
378 | let srcB = blk[off + 11 * step] as i32; | |
379 | let srcC = blk[off + 12 * step] as i32; | |
380 | let srcD = blk[off + 13 * step] as i32; | |
381 | let srcE = blk[off + 14 * step] as i32; | |
382 | let srcF = blk[off + 15 * step] as i32; | |
383 | let t0 = 26 * (src0 + src8); | |
384 | let t1 = 26 * (src0 - src8); | |
385 | let t2 = 14 * src4 - 34 * srcC; | |
386 | let t3 = 34 * src4 + 14 * srcC; | |
387 | let t4 = t0 + t3; | |
388 | let t5 = t0 - t3; | |
389 | let t6 = t1 + t2; | |
390 | let t7 = t1 - t2; | |
391 | let tmp00 = 31 * src2 + -7 * src6 + -36 * srcA + -20 * srcE; | |
392 | let tmp01 = 36 * src2 + 31 * src6 + 20 * srcA + 7 * srcE; | |
393 | let tmp02 = 20 * src2 + -36 * src6 + 7 * srcA + 31 * srcE; | |
394 | let tmp03 = 7 * src2 + -20 * src6 + 31 * srcA + -36 * srcE; | |
395 | let tm0 = t4 + tmp01; | |
396 | let tm1 = t4 - tmp01; | |
397 | let tm2 = t5 + tmp03; | |
398 | let tm3 = t5 - tmp03; | |
399 | let tm4 = t6 + tmp00; | |
400 | let tm5 = t6 - tmp00; | |
401 | let tm6 = t7 + tmp02; | |
402 | let tm7 = t7 - tmp02; | |
403 | let tt0 = 37 * src1 + 35 * src3 + 32 * src5 + 28 * src7 + 23 * src9 + 17 * srcB + 11 * srcD + 4 * srcF; | |
404 | let tt1 = 35 * src1 + 23 * src3 + 4 * src5 + -17 * src7 + -32 * src9 + -37 * srcB + -28 * srcD + -11 * srcF; | |
405 | let tt2 = 32 * src1 + 4 * src3 + -28 * src5 + -35 * src7 + -11 * src9 + 23 * srcB + 37 * srcD + 17 * srcF; | |
406 | let tt3 = 28 * src1 + -17 * src3 + -35 * src5 + 4 * src7 + 37 * src9 + 11 * srcB + -32 * srcD + -23 * srcF; | |
407 | let tt4 = 23 * src1 + -32 * src3 + -11 * src5 + 37 * src7 + -4 * src9 + -35 * srcB + 17 * srcD + 28 * srcF; | |
408 | let tt5 = 17 * src1 + -37 * src3 + 23 * src5 + 11 * src7 + -35 * src9 + 28 * srcB + 4 * srcD + -32 * srcF; | |
409 | let tt6 = 11 * src1 + -28 * src3 + 37 * src5 + -32 * src7 + 17 * src9 + 4 * srcB + -23 * srcD + 35 * srcF; | |
410 | let tt7 = 4 * src1 + -11 * src3 + 17 * src5 + -23 * src7 + 28 * src9 + -32 * srcB + 35 * srcD + -37 * srcF; | |
411 | blk[off + 0 * step] = ((tm0 + tt0 + 64) >> 7) as i16; | |
412 | blk[off + 1 * step] = ((tm4 + tt1 + 64) >> 7) as i16; | |
413 | blk[off + 2 * step] = ((tm6 + tt2 + 64) >> 7) as i16; | |
fc85bd90 | 414 | blk[off + 3 * step] = ((tm2 + tt3 + 64) >> 7) as i16; |
52aad9fe KS |
415 | blk[off + 4 * step] = ((tm3 + tt4 + 64) >> 7) as i16; |
416 | blk[off + 5 * step] = ((tm7 + tt5 + 64) >> 7) as i16; | |
417 | blk[off + 6 * step] = ((tm5 + tt6 + 64) >> 7) as i16; | |
418 | blk[off + 7 * step] = ((tm1 + tt7 + 64) >> 7) as i16; | |
419 | blk[off + 8 * step] = ((tm1 - tt7 + 64) >> 7) as i16; | |
420 | blk[off + 9 * step] = ((tm5 - tt6 + 64) >> 7) as i16; | |
421 | blk[off + 10 * step] = ((tm7 - tt5 + 64) >> 7) as i16; | |
422 | blk[off + 11 * step] = ((tm3 - tt4 + 64) >> 7) as i16; | |
423 | blk[off + 12 * step] = ((tm2 - tt3 + 64) >> 7) as i16; | |
424 | blk[off + 13 * step] = ((tm6 - tt2 + 64) >> 7) as i16; | |
425 | blk[off + 14 * step] = ((tm4 - tt1 + 64) >> 7) as i16; | |
426 | blk[off + 15 * step] = ((tm0 - tt0 + 64) >> 7) as i16; | |
427 | } | |
428 | pub fn transform16x16(&self, blk: &mut [i16; 16 * 16]) { | |
429 | for i in 0..16 { | |
430 | Self::transform16(blk, i, 16); | |
431 | } | |
432 | for i in 0..16 { | |
433 | Self::transform16(blk, i * 16, 1); | |
434 | } | |
435 | } | |
436 | ||
437 | pub fn add_block(&self, dst: &mut [u8], mut doff: usize, dstride: usize, blk: &[i16], size: usize) { | |
438 | for y in 0..size { | |
439 | for x in 0..size { | |
440 | dst[doff + x] = clip8((dst[doff + x] as i16) + blk[x + y * size]); | |
441 | } | |
442 | doff += dstride; | |
443 | } | |
444 | } | |
445 | fn avg(&self, dst: &mut [u8], mut didx: usize, dstride: usize, | |
446 | src: &[u8], mut sidx: usize, sstride: usize, | |
447 | w: usize, h: usize) { | |
448 | for _ in 0..h { | |
449 | for x in 0..w { | |
450 | dst[didx + x] = (((dst[didx + x] as u16) + (src[sidx + x] as u16)) >> 1) as u8; | |
451 | } | |
452 | didx += dstride; | |
453 | sidx += sstride; | |
454 | } | |
455 | } | |
cd830591 | 456 | pub fn do_avg(&self, frame: &mut NASimpleVideoFrame<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, w: usize, h: usize) { |
52aad9fe | 457 | for comp in 0..3 { |
cd830591 | 458 | let dstride = frame.stride[comp]; |
52aad9fe | 459 | let sstride = prev_frame.get_stride(comp); |
cd830591 | 460 | let doff = if comp == 0 { x + y * dstride } else { frame.offset[comp] + (x >> 1) + (y >> 1) * dstride }; |
52aad9fe | 461 | let soff = prev_frame.get_offset(comp); |
cd830591 | 462 | let dst = &mut frame.data; |
52aad9fe KS |
463 | let sdata = prev_frame.get_data(); |
464 | let src: &[u8] = sdata.as_slice(); | |
465 | ||
466 | if comp == 0 { | |
467 | self.avg(dst, doff, dstride, src, soff, sstride, w, h); | |
468 | } else { | |
469 | self.avg(dst, doff, dstride, src, soff, sstride, w >> 1, h >> 1); | |
470 | } | |
471 | } | |
472 | } | |
cd830591 | 473 | pub fn do_mc(&self, frame: &mut NASimpleVideoFrame<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, w: usize, h: usize, mv: MV, avg: bool) { |
52aad9fe | 474 | { // luma |
cd830591 KS |
475 | let dstride = frame.stride[0]; |
476 | let doffset = frame.offset[0] + (if !avg { x + y * dstride } else { 0 }); | |
477 | let dst = &mut frame.data; | |
52aad9fe KS |
478 | |
479 | let (w_, h_) = prev_frame.get_dimensions(0); | |
480 | let fw = (w_ + 15) & !15; | |
481 | let fh = (h_ + 15) & !15; | |
482 | ||
483 | let dx = mv.x >> 2; | |
484 | let cx = (mv.x & 3) as usize; | |
485 | let dy = mv.y >> 2; | |
486 | let cy = (mv.y & 3) as usize; | |
487 | ||
488 | if check_pos(x, y, w, h, fw, fh, dx, dy, RV60_EDGE1[cx], RV60_EDGE2[cx], RV60_EDGE1[cy], RV60_EDGE2[cy]) { | |
489 | let sstride = prev_frame.get_stride(0); | |
490 | let mut soffset = prev_frame.get_offset(0) + x + y * sstride; | |
491 | let data = prev_frame.get_data(); | |
492 | let src: &[u8] = data.as_slice(); | |
493 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; | |
494 | luma_mc(dst, doffset, dstride, src, soffset, sstride, w, h, cx, cy); | |
495 | } else { | |
496 | let mut ebuf: [u8; 70*70] = [0; 70*70]; | |
70d30944 | 497 | edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, w+5, h+5, &mut ebuf, 70, 0, 4); |
52aad9fe KS |
498 | luma_mc(dst, doffset, dstride, &ebuf, 70*2 + 2, 70, w, h, cx, cy); |
499 | } | |
500 | } | |
501 | let (w_, h_) = prev_frame.get_dimensions(1); | |
502 | let fw = (w_ + 7) & !7; | |
503 | let fh = (h_ + 7) & !7; | |
504 | let mvx = mv.x / 2; | |
505 | let mvy = mv.y / 2; | |
506 | let dx = mvx >> 2; | |
507 | let cx = (mvx & 3) as usize; | |
508 | let dy = mvy >> 2; | |
509 | let cy = (mvy & 3) as usize; | |
510 | let cw = w >> 1; | |
511 | let ch = h >> 1; | |
512 | ||
513 | for comp in 1..3 { // chroma | |
cd830591 KS |
514 | let dstride = frame.stride[comp]; |
515 | let doffset = frame.offset[comp] + (if !avg { (x >> 1) + (y >> 1) * dstride } else { 0 }); | |
52aad9fe KS |
516 | if check_pos(x >> 1, y >> 1, cw, ch, fw, fh, dx, dy, 0, 1, 0, 1) { |
517 | let sstride = prev_frame.get_stride(comp); | |
518 | let mut soffset = prev_frame.get_offset(comp) + (x >> 1) + (y >> 1) * sstride; | |
519 | let data = prev_frame.get_data(); | |
520 | let src: &[u8] = data.as_slice(); | |
521 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; | |
cd830591 | 522 | chroma_mc(frame.data, doffset, dstride, src, soffset, sstride, cw, ch, cx, cy); |
52aad9fe KS |
523 | } else { |
524 | let mut ebuf: [u8; 40*40] = [0; 40*40]; | |
70d30944 | 525 | edge_emu(prev_frame, ((x >> 1) as isize) + (dx as isize), ((y >> 1) as isize) + (dy as isize), cw+1, ch+1, &mut ebuf, 40, comp, 3); |
cd830591 | 526 | chroma_mc(frame.data, doffset, dstride, &ebuf, 0, 40, cw, ch, cx, cy); |
52aad9fe KS |
527 | } |
528 | } | |
529 | } | |
cd830591 | 530 | fn deblock_edge4_ver(&self, frame: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, |
52aad9fe KS |
531 | dblk_l: u8, dblk_r: u8, deblock_chroma: bool) { |
532 | let qp_l = dblk_l >> 2; | |
533 | let str_l = dblk_l & 3; | |
534 | let qp_r = dblk_r >> 2; | |
535 | let str_r = dblk_r & 3; | |
536 | let dl_l = &RV60_DEB_LIMITS[qp_l as usize]; | |
537 | let dl_r = &RV60_DEB_LIMITS[qp_r as usize]; | |
538 | let mode_l = if str_l != 0 { dl_l[(str_l - 1) as usize] } else { 0 }; | |
539 | let mode_r = if str_r != 0 { dl_r[(str_r - 1) as usize] } else { 0 }; | |
540 | let lim1 = dl_r[2] as i16; | |
541 | let lim2 = (dl_r[3] * 4) as i16; | |
542 | { | |
cd830591 KS |
543 | let stride = frame.stride[0]; |
544 | let offset = frame.offset[0] + xpos + ypos * stride; | |
545 | filter_luma_edge(frame.data, offset, 1, stride, mode_l, mode_r, lim1, lim2); | |
52aad9fe KS |
546 | } |
547 | if ((str_l | str_r) >= 2) && deblock_chroma { | |
a15d97ad | 548 | for comp in 1..3 { |
cd830591 KS |
549 | let stride = frame.stride[comp]; |
550 | let offset = frame.offset[comp] + (xpos >> 1) + (ypos >> 1) * stride; | |
551 | filter_chroma_edge(frame.data, offset, 1, stride, mode_l, mode_r, lim1, lim2); | |
52aad9fe KS |
552 | } |
553 | } | |
554 | } | |
cd830591 | 555 | fn deblock_edge4_hor(&self, frame: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, |
52aad9fe KS |
556 | dblk_t: u8, dblk_d: u8, deblock_chroma: bool) { |
557 | let qp_t = dblk_t >> 2; | |
558 | let str_t = dblk_t & 3; | |
559 | let qp_d = dblk_d >> 2; | |
560 | let str_d = dblk_d & 3; | |
561 | let dl_t = &RV60_DEB_LIMITS[qp_t as usize]; | |
562 | let dl_d = &RV60_DEB_LIMITS[qp_d as usize]; | |
563 | let mode_t = if str_t != 0 { dl_t[(str_t - 1) as usize] } else { 0 }; | |
564 | let mode_d = if str_d != 0 { dl_d[(str_d - 1) as usize] } else { 0 }; | |
565 | let lim1 = dl_d[2] as i16; | |
566 | let lim2 = (dl_d[3] * 4) as i16; | |
567 | { | |
cd830591 KS |
568 | let stride = frame.stride[0]; |
569 | let offset = frame.offset[0] + xpos + ypos * stride; | |
570 | filter_luma_edge(frame.data, offset, stride, 1, mode_t, mode_d, lim1, lim2); | |
52aad9fe KS |
571 | } |
572 | if ((str_t | str_d) >= 2) && deblock_chroma { | |
a15d97ad | 573 | for comp in 1..3 { |
cd830591 KS |
574 | let stride = frame.stride[comp]; |
575 | let offset = frame.offset[comp] + (xpos >> 1) + (ypos >> 1) * stride; | |
576 | filter_chroma_edge(frame.data, offset, stride, 1, mode_t, mode_d, lim1, lim2); | |
52aad9fe KS |
577 | } |
578 | } | |
579 | } | |
cd830591 | 580 | fn deblock8x8(&self, dparams: &RV60DeblockParams, frame: &mut NASimpleVideoFrame<u8>, |
52aad9fe KS |
581 | xpos: usize, ypos: usize, top_str: &[u8], left_str: &[u8], dblkpos: usize) { |
582 | if xpos > 0 { | |
583 | if ypos > 0 { | |
0091a508 KS |
584 | let str_l = left_str[dblkpos - dparams.dblkstride - 1]; |
585 | let str_r = left_str[dblkpos - dparams.dblkstride]; | |
586 | if ((str_l | str_r) & 3) != 0 { | |
52aad9fe KS |
587 | self.deblock_edge4_ver(frame, xpos, ypos - 4, str_l, str_r, dparams.deblock_chroma); |
588 | } | |
589 | } | |
590 | { | |
0091a508 KS |
591 | let str_l = left_str[dblkpos - 1]; |
592 | let str_r = left_str[dblkpos]; | |
593 | if ((str_l | str_r) & 3) != 0 { | |
52aad9fe KS |
594 | self.deblock_edge4_ver(frame, xpos, ypos + 0, str_l, str_r, dparams.deblock_chroma); |
595 | } | |
596 | } | |
0091a508 KS |
597 | if ypos + 8 >= dparams.height { |
598 | let str_l = left_str[dblkpos + dparams.dblkstride - 1]; | |
599 | let str_r = left_str[dblkpos + dparams.dblkstride]; | |
600 | if ((str_l | str_r) & 3) != 0 { | |
52aad9fe KS |
601 | self.deblock_edge4_ver(frame, xpos, ypos + 4, str_l, str_r, dparams.deblock_chroma); |
602 | } | |
603 | } | |
604 | } | |
605 | if ypos > 0 { | |
606 | if xpos > 0 { | |
0091a508 KS |
607 | let str_t = top_str[dblkpos - dparams.dblkstride - 1]; |
608 | let str_d = top_str[dblkpos - 1]; | |
609 | if ((str_t | str_d) & 3) != 0 { | |
52aad9fe KS |
610 | self.deblock_edge4_hor(frame, xpos - 4, ypos, str_t, str_d, dparams.deblock_chroma); |
611 | } | |
612 | } | |
613 | { | |
0091a508 KS |
614 | let str_t = top_str[dblkpos - dparams.dblkstride]; |
615 | let str_d = top_str[dblkpos]; | |
616 | if ((str_t | str_d) & 3) != 0 { | |
52aad9fe KS |
617 | self.deblock_edge4_hor(frame, xpos + 0, ypos, str_t, str_d, dparams.deblock_chroma); |
618 | } | |
619 | } | |
0091a508 KS |
620 | if xpos + 8 >= dparams.width { |
621 | let str_t = top_str[dblkpos - dparams.dblkstride + 1]; | |
622 | let str_d = top_str[dblkpos + 1]; | |
623 | if ((str_t | str_d) & 3) != 0 { | |
52aad9fe KS |
624 | self.deblock_edge4_hor(frame, xpos + 4, ypos, str_t, str_d, dparams.deblock_chroma); |
625 | } | |
626 | } | |
627 | } | |
628 | } | |
cd830591 | 629 | pub fn do_deblock(&self, dparams: &RV60DeblockParams, frame: &mut NASimpleVideoFrame<u8>, |
52aad9fe KS |
630 | xpos: usize, ypos: usize, size: usize, top_str: &[u8], left_str: &[u8], dpos: usize) { |
631 | for x in 0..(size >> 3) { | |
632 | self.deblock8x8(dparams, frame, xpos + x * 8, ypos, | |
633 | top_str, left_str, dpos + x * 2); | |
634 | } | |
635 | for y in 1..(size >> 3) { | |
636 | self.deblock8x8(dparams, frame, xpos, ypos + y * 8, | |
637 | top_str, left_str, dpos + y * 2 * dparams.dblkstride); | |
638 | } | |
639 | } | |
640 | } | |
641 | ||
642 | const RV60_DEB_LIMITS: [[u8; 4]; 32] = [ | |
643 | [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], | |
644 | [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], | |
645 | [ 0, 0, 128, 3 ], [ 0, 1, 128, 3 ], [ 0, 1, 122, 3 ], [ 1, 1, 96, 4 ], | |
646 | [ 1, 1, 75, 4 ], [ 1, 1, 59, 4 ], [ 1, 1, 47, 6 ], [ 1, 1, 37, 6 ], | |
647 | [ 1, 1, 29, 6 ], [ 1, 2, 23, 7 ], [ 1, 2, 18, 8 ], [ 1, 2, 15, 8 ], | |
648 | [ 1, 2, 13, 9 ], [ 2, 3, 11, 9 ], [ 2, 3, 10, 10 ], [ 2, 3, 9, 10 ], | |
649 | [ 2, 4, 8, 11 ], [ 3, 4, 7, 11 ], [ 3, 5, 6, 12 ], [ 3, 5, 5, 13 ], | |
650 | [ 3, 5, 4, 14 ], [ 4, 7, 3, 15 ], [ 5, 8, 2, 16 ], [ 5, 9, 1, 17 ] | |
651 | ]; | |
652 | ||
653 | #[derive(Clone)] | |
654 | pub struct IntraPredContext { | |
655 | pub t: [u8; 129], // 0 - TL or 0x80, two block sizes or replicated last val from block0 | |
656 | pub l: [u8; 129], | |
657 | pub has_t: bool, | |
658 | pub has_tr: bool, | |
659 | pub has_l: bool, | |
660 | pub has_ld: bool, | |
661 | } | |
662 | ||
663 | impl IntraPredContext { | |
664 | pub fn new() -> Self { | |
665 | Self { | |
666 | t: [0x80; 129], l: [0x80; 129], has_t: false, has_tr: false, has_l: false, has_ld: false, | |
667 | } | |
668 | } | |
669 | pub fn pred_dc(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, filter: bool) { | |
670 | let dc; | |
671 | if !self.has_t && !self.has_l { | |
672 | dc = 0x80; | |
673 | } else { | |
674 | let mut sum = 0; | |
675 | if self.has_t { | |
676 | for x in 0..size { sum += self.t[x + 1] as u16; } | |
677 | } | |
678 | if self.has_l { | |
679 | for y in 0..size { sum += self.l[y + 1] as u16; } | |
680 | } | |
681 | if self.has_t && self.has_l { | |
682 | dc = ((sum + (size as u16)) / ((size as u16) * 2)) as u8; | |
683 | } else { | |
684 | dc = ((sum + ((size >> 1) as u16)) / (size as u16)) as u8; | |
685 | } | |
686 | } | |
687 | for _ in 0..size { | |
688 | for x in 0..size { dst[doff + x] = dc; } | |
689 | doff += dstride; | |
690 | } | |
691 | if filter && self.has_t && self.has_l { | |
692 | doff -= dstride * size; | |
693 | dst[doff] = (((self.t[1] as u16) + (self.l[1] as u16) + 2 * (dst[doff] as u16) + 2) >> 2) as u8; | |
694 | for x in 1..size { | |
695 | dst[doff + x] = (((self.t[x + 1] as u16) + 3 * (dst[doff + x] as u16) + 2) >> 2) as u8; | |
696 | } | |
697 | for y in 1..size { | |
698 | doff += dstride; | |
699 | dst[doff] = (((self.l[y + 1] as u16) + 3 * (dst[doff] as u16) + 2) >> 2) as u8; | |
700 | } | |
701 | } | |
702 | } | |
703 | pub fn pred_plane(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize) { | |
704 | let lastl = self.l[size + 1] as i32; | |
705 | let lastt = self.t[size + 1] as i32; | |
706 | let mut tmp1: [i32; 64] = [0; 64]; | |
707 | let mut tmp2: [i32; 64] = [0; 64]; | |
708 | for i in 0..size { | |
709 | tmp1[i] = lastl - (self.t[i + 1] as i32); | |
710 | tmp2[i] = lastt - (self.l[i + 1] as i32); | |
711 | } | |
712 | let shift = match size { | |
713 | 4 => 3, | |
714 | 8 => 4, | |
715 | 16 => 5, | |
716 | 32 => 6, | |
717 | _ => 7, | |
718 | }; | |
719 | let mut top_ref: [i32; 64] = [0; 64]; | |
720 | let mut left_ref:[i32; 64] = [0; 64]; | |
721 | for i in 0..size { | |
722 | top_ref [i] = (self.t[i + 1] as i32) << (shift - 1); | |
723 | left_ref[i] = (self.l[i + 1] as i32) << (shift - 1); | |
724 | } | |
725 | for y in 0..size { | |
726 | let add = tmp2[y]; | |
727 | let mut sum = left_ref[y] + (size as i32); | |
728 | for x in 0..size { | |
729 | let v = tmp1[x] + top_ref[x]; | |
730 | sum += add; | |
731 | top_ref[x] = v; | |
732 | dst[doff + x] = ((sum + v) >> shift) as u8; | |
733 | } | |
734 | doff += dstride; | |
735 | } | |
736 | } | |
737 | fn pred_hor_angle(dst: &mut [u8], doff: usize, dstride: usize, size: usize, weight: i16, src: &[u8]) { | |
738 | let mut sum = 0; | |
739 | for x in 0..size { | |
740 | sum += weight; | |
741 | let off = ((sum >> 5) + 32) as usize; | |
742 | let frac = (sum & 0x1F) as u16; | |
743 | if frac == 0 { | |
744 | for y in 0..size { | |
745 | dst[doff + x + y * dstride] = src[off + y]; | |
746 | } | |
747 | } else { | |
748 | for y in 0..size { | |
749 | let a = src[off + y + 0] as u16; | |
750 | let b = src[off + y + 1] as u16; | |
751 | dst[doff + x + y * dstride] = (((32 - frac) * a + frac * b + 0x10) >> 5) as u8; | |
752 | } | |
753 | } | |
754 | } | |
755 | } | |
756 | fn pred_ver_angle(dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, weight: i16, src: &[u8]) { | |
757 | let mut sum = 0; | |
758 | for _ in 0..size { | |
759 | sum += weight; | |
760 | let off = ((sum >> 5) + 32) as usize; | |
761 | let frac = (sum & 0x1F) as u16; | |
762 | if frac == 0 { | |
fa57381e | 763 | dst[doff..][..size].copy_from_slice(&src[off..][..size]); |
52aad9fe KS |
764 | } else { |
765 | for x in 0..size { | |
766 | let a = src[off + x + 0] as u16; | |
767 | let b = src[off + x + 1] as u16; | |
768 | dst[doff + x] = (((32 - frac) * a + frac * b + 0x10) >> 5) as u8; | |
769 | } | |
770 | } | |
771 | doff += dstride; | |
772 | } | |
773 | } | |
774 | fn filter_weak(dst: &mut [u8], src: &[u8], size: usize) { | |
775 | dst[0] = src[0]; | |
776 | for i in 1..size-1 { | |
777 | dst[i] = (((src[i - 1] as u16) + 2 * (src[i] as u16) + (src[i + 1] as u16) + 2) >> 2) as u8; | |
778 | } | |
779 | dst[size - 1] = src[size - 1]; | |
780 | } | |
781 | fn filter_bilin32(dst: &mut [u8], v0: u8, v1: u8, size: usize) { | |
782 | let diff = (v1 as i16) - (v0 as i16); | |
783 | let mut sum = ((v0 as i16) << 5) + (1 << (5 - 1)); | |
784 | for i in 0..size { | |
785 | dst[i] = (sum >> 5) as u8; | |
786 | sum += diff; | |
787 | } | |
788 | } | |
b7c882c1 | 789 | #[allow(clippy::cognitive_complexity)] |
52aad9fe KS |
790 | pub fn pred_angle(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, angle: usize, filter: bool) { |
791 | let mut filtered1: [u8; 96] = [0; 96]; | |
792 | let mut filtered2: [u8; 96] = [0; 96]; | |
793 | if angle == 0 { | |
794 | self.pred_plane(dst, doff, dstride, size); | |
795 | } else if angle == 1 { | |
796 | self.pred_dc(dst, doff, dstride, size, filter); | |
797 | } else if angle <= 9 { | |
798 | let ang_weight = RV60_IPRED_ANGLE[10 - angle]; | |
799 | let add_size = (size * (ang_weight as usize) + 31) >> 5; | |
800 | if size <= 16 { | |
801 | Self::filter_weak(&mut filtered1[32..], &self.l[1..], size + add_size); | |
802 | } else { | |
803 | Self::filter_bilin32(&mut filtered1[32..], self.l[1], self.l[33], 32); | |
804 | Self::filter_bilin32(&mut filtered1[64..], self.l[32], self.l[64], add_size); | |
805 | } | |
806 | Self::pred_hor_angle(dst, doff, dstride, size, ang_weight as i16, &filtered1); | |
807 | } else if angle == 10 { | |
808 | if size <= 16 { | |
809 | Self::filter_weak(&mut filtered1[32..], &self.l[1..], size); | |
810 | } else { | |
811 | Self::filter_bilin32(&mut filtered1[32..], self.l[1], self.l[33], 32); | |
812 | } | |
813 | for y in 0..size { | |
814 | for x in 0..size { | |
815 | dst[doff + x] = filtered1[32 + y]; | |
816 | } | |
817 | doff += dstride; | |
818 | } | |
819 | if filter { | |
820 | doff -= dstride * size; | |
821 | let tl = self.t[0] as i16; | |
822 | for x in 0..size { | |
823 | dst[doff + x] = clip8((dst[doff + x] as i16) + (((self.t[x + 1] as i16) - tl) >> 1)); | |
824 | } | |
825 | } | |
826 | } else if angle <= 17 { | |
827 | let ang_weight = RV60_IPRED_ANGLE [angle - 10]; | |
828 | let inv_angle = RV60_IPRED_INV_ANGLE[angle - 10]; | |
829 | let add_size = (size * (ang_weight as usize) + 31) >> 5; | |
830 | if size <= 16 { | |
e07387c7 | 831 | for i in 0..=size { |
52aad9fe KS |
832 | filtered1[32-1 + i] = self.l[i]; |
833 | } | |
e07387c7 | 834 | for i in 0..=size { |
52aad9fe KS |
835 | filtered2[32-1 + i] = self.t[i]; |
836 | } | |
837 | } else { | |
838 | filtered1[32-1] = self.l[0]; | |
839 | Self::filter_bilin32(&mut filtered1[32..], self.l[0], self.l[32], 32); | |
840 | filtered2[32-1] = self.t[0]; | |
841 | Self::filter_bilin32(&mut filtered2[32..], self.t[0], self.t[32], 32); | |
842 | } | |
843 | if add_size > 1 { | |
844 | let mut sum = 0x80; | |
845 | for i in 1..add_size { | |
846 | sum += inv_angle; | |
847 | let pos = ((sum >> 8) + 32 - 1) as usize; | |
848 | filtered1[32 - 1 - i] = filtered2[pos]; | |
849 | } | |
850 | } | |
851 | Self::pred_hor_angle(dst, doff, dstride, size, -(ang_weight as i16), &filtered1); | |
852 | } else if angle <= 25 { | |
853 | let ang_weight = RV60_IPRED_ANGLE[26 - angle]; | |
854 | let inv_angle = RV60_IPRED_INV_ANGLE[26 - angle]; | |
855 | let add_size = (size * (ang_weight as usize) + 31) >> 5; | |
856 | if size <= 16 { | |
e07387c7 | 857 | for i in 0..=size { |
52aad9fe KS |
858 | filtered1[32-1 + i] = self.t[i]; |
859 | } | |
e07387c7 | 860 | for i in 0..=size { |
52aad9fe KS |
861 | filtered2[32-1 + i] = self.l[i]; |
862 | } | |
863 | } else { | |
864 | filtered1[32-1] = self.t[0]; | |
865 | Self::filter_bilin32(&mut filtered1[32..], self.t[0], self.t[32], 32); | |
866 | filtered2[32-1] = self.l[0]; | |
867 | Self::filter_bilin32(&mut filtered2[32..], self.l[0], self.l[32], 32); | |
868 | } | |
869 | if add_size > 1 { | |
870 | let mut sum = 0x80; | |
871 | for i in 1..add_size { | |
872 | sum += inv_angle; | |
873 | let pos = ((sum >> 8) + 32 - 1) as usize; | |
874 | filtered1[32 - 1 - i] = filtered2[pos]; | |
875 | } | |
876 | } | |
877 | Self::pred_ver_angle(dst, doff, dstride, size, -(ang_weight as i16), &filtered1); | |
878 | } else if angle == 26 { | |
879 | if size <= 16 { | |
880 | Self::filter_weak(&mut filtered1[32..], &self.t[1..], size); | |
881 | } else { | |
882 | Self::filter_bilin32(&mut filtered1[32..], self.t[1], self.t[33], 32); | |
883 | } | |
884 | for _ in 0..size { | |
fa57381e | 885 | dst[doff..][..size].copy_from_slice(&filtered1[32..][..size]); |
52aad9fe KS |
886 | doff += dstride; |
887 | } | |
888 | if filter { | |
889 | doff -= dstride * size; | |
890 | let tl = self.l[0] as i16; | |
891 | for y in 0..size { | |
892 | dst[doff] = clip8((dst[doff] as i16) + (((self.l[y + 1] as i16) - tl) >> 1)); | |
893 | doff += dstride; | |
894 | } | |
895 | } | |
896 | } else if angle <= 34 { | |
897 | let ang_weight = RV60_IPRED_ANGLE[angle - 26]; | |
898 | let add_size = (size * (ang_weight as usize) + 31) >> 5; | |
899 | if size <= 16 { | |
900 | Self::filter_weak(&mut filtered1[32..], &self.t[1..], size + add_size); | |
901 | } else { | |
902 | Self::filter_bilin32(&mut filtered1[32..], self.t[1], self.t[33], 32); | |
903 | Self::filter_bilin32(&mut filtered1[64..], self.t[32], self.t[64], add_size); | |
904 | } | |
905 | Self::pred_ver_angle(dst, doff, dstride, size, ang_weight as i16, &filtered1); | |
906 | } else { | |
907 | unreachable!(); | |
908 | } | |
909 | } | |
910 | } | |
911 | ||
912 | const RV60_IPRED_ANGLE: [u8; 9] = [ 0, 2, 5, 9, 13, 17, 21, 26, 32 ]; | |
913 | const RV60_IPRED_INV_ANGLE: [i16; 9] = [ 0, 4096, 1638, 910, 630, 482, 390, 315, 256 ]; | |
914 | const RV60_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ]; | |
915 | const RV60_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ]; | |
916 |