]>
Commit | Line | Data |
---|---|---|
4c1582cf KS |
1 | use nihav_codec_support::codecs::blockdsp::*; |
2 | ||
3 | #[allow(dead_code)] | |
4 | #[derive(Debug,Clone,Copy)] | |
5 | pub enum PredType4x4 { | |
6 | Ver, | |
7 | Hor, | |
8 | DC, | |
9 | DiagDownLeft, | |
10 | DiagDownRight, | |
11 | VerRight, | |
12 | HorDown, | |
13 | VerLeft, | |
14 | HorUp, | |
15 | LeftDC, | |
16 | TopDC, | |
17 | DC128, | |
18 | } | |
19 | ||
20 | #[allow(dead_code)] | |
21 | #[derive(Debug,Clone,Copy)] | |
22 | pub enum PredType8x8 { | |
23 | DC, | |
24 | Hor, | |
25 | Ver, | |
26 | Plane, | |
27 | LeftDC, | |
28 | TopDC, | |
29 | DC128 | |
30 | } | |
31 | ||
32 | pub const INTRA_PRED16: [PredType8x8; 4] = [ | |
33 | PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane | |
34 | ]; | |
35 | pub const INTRA_PRED4: [PredType4x4; 9] = [ | |
36 | PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, | |
37 | PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight, | |
38 | PredType4x4::VerRight, PredType4x4::HorDown, | |
39 | PredType4x4::VerLeft, PredType4x4::HorUp | |
40 | ]; | |
41 | ||
42 | ||
43 | const SVQ3_QUANTS: [i32; 32] = [ | |
44 | 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718, | |
45 | 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873, | |
46 | 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683, | |
47 | 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533 | |
48 | ]; | |
49 | ||
50 | pub fn chroma_transform(blk: &mut [i16; 4]) { | |
51 | let t0 = blk[0] + blk[2]; | |
52 | let t1 = blk[0] - blk[2]; | |
53 | let t2 = blk[1] + blk[3]; | |
54 | let t3 = blk[1] - blk[3]; | |
55 | blk[0] = t0 + t2; | |
56 | blk[1] = t0 - t2; | |
57 | blk[2] = t1 + t3; | |
58 | blk[3] = t1 - t3; | |
59 | } | |
60 | ||
61 | pub fn idct_dc_coeffs(blk: &mut [i16; 16], q: u8) { | |
62 | let quant = SVQ3_QUANTS[q as usize]; | |
63 | let mut tmp = [0i32; 16]; | |
64 | for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { | |
65 | let s0 = i32::from(src[0]); | |
66 | let s1 = i32::from(src[1]); | |
67 | let s2 = i32::from(src[2]); | |
68 | let s3 = i32::from(src[3]); | |
69 | let t0 = 13 * (s0 + s2); | |
70 | let t1 = 13 * (s0 - s2); | |
71 | let t2 = 17 * s1 + 7 * s3; | |
72 | let t3 = 7 * s1 - 17 * s3; | |
73 | dst[0] = t0 + t2; | |
74 | dst[1] = t1 + t3; | |
75 | dst[2] = t1 - t3; | |
76 | dst[3] = t0 - t2; | |
77 | } | |
78 | for i in 0..4 { | |
79 | let s0 = tmp[i]; | |
80 | let s1 = tmp[i + 4]; | |
81 | let s2 = tmp[i + 4 * 2]; | |
82 | let s3 = tmp[i + 4 * 3]; | |
83 | let t0 = 13 * (s0 + s2); | |
84 | let t1 = 13 * (s0 - s2); | |
85 | let t2 = 17 * s1 + 7 * s3; | |
86 | let t3 = 7 * s1 - 17 * s3; | |
87 | blk[i] = (((t0 + t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
88 | blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
89 | blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
90 | blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
91 | } | |
92 | } | |
93 | ||
94 | pub fn idct(blk: &mut [i16; 16], q: u8, chroma: bool) { | |
95 | let quant = SVQ3_QUANTS[q as usize]; | |
96 | let mut tmp = [0i32; 16]; | |
97 | let dc = 13 * 13 * if chroma { quant * i32::from(blk[0]) / 2 } else { i32::from(blk[0]) * 1538 }; | |
98 | blk[0] = 0; | |
99 | for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { | |
100 | let s0 = i32::from(src[0]); | |
101 | let s1 = i32::from(src[1]); | |
102 | let s2 = i32::from(src[2]); | |
103 | let s3 = i32::from(src[3]); | |
104 | let t0 = 13 * (s0 + s2); | |
105 | let t1 = 13 * (s0 - s2); | |
106 | let t2 = 17 * s1 + 7 * s3; | |
107 | let t3 = 7 * s1 - 17 * s3; | |
108 | dst[0] = t0 + t2; | |
109 | dst[1] = t1 + t3; | |
110 | dst[2] = t1 - t3; | |
111 | dst[3] = t0 - t2; | |
112 | } | |
113 | for i in 0..4 { | |
114 | let s0 = tmp[i]; | |
115 | let s1 = tmp[i + 4]; | |
116 | let s2 = tmp[i + 4 * 2]; | |
117 | let s3 = tmp[i + 4 * 3]; | |
118 | let t0 = 13 * (s0 + s2); | |
119 | let t1 = 13 * (s0 - s2); | |
120 | let t2 = 17 * s1 + 7 * s3; | |
121 | let t3 = 7 * s1 - 17 * s3; | |
122 | blk[i] = (((t0 + t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
123 | blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
124 | blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
125 | blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
126 | } | |
127 | } | |
128 | ||
129 | pub fn add_coeffs(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16]) { | |
130 | let out = &mut dst[offset..][..stride * 3 + 4]; | |
131 | for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks(4)) { | |
132 | for (dst, src) in line.iter_mut().take(4).zip(src.iter()) { | |
133 | *dst = (i32::from(*dst) + i32::from(*src)).max(0).min(255) as u8; | |
134 | } | |
135 | } | |
136 | } | |
137 | ||
138 | pub fn avg(dst: &mut [u8], dstride: usize, | |
139 | src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
140 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
141 | for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { | |
142 | *dst = ((u16::from(*dst) + u16::from(*src) + 1) >> 1) as u8; | |
143 | } | |
144 | } | |
145 | } | |
146 | ||
147 | fn clip8(val: i16) -> u8 { val.max(0).min(255) as u8 } | |
148 | ||
149 | fn ipred_dc128(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { | |
150 | for _ in 0..bsize { | |
151 | for x in 0..bsize { buf[idx + x] = 128; } | |
152 | idx += stride; | |
153 | } | |
154 | } | |
155 | fn ipred_ver(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { | |
156 | let oidx = idx - stride; | |
157 | for _ in 0..bsize { | |
158 | for x in 0..bsize { buf[idx + x] = buf[oidx + x]; } | |
159 | idx += stride; | |
160 | } | |
161 | } | |
162 | fn ipred_hor(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { | |
163 | for _ in 0..bsize { | |
164 | for x in 0..bsize { buf[idx + x] = buf[idx - 1]; } | |
165 | idx += stride; | |
166 | } | |
167 | } | |
168 | fn ipred_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { | |
169 | let mut adc: u16 = 0; | |
170 | for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } | |
171 | for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } | |
172 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; | |
173 | ||
174 | for _ in 0..bsize { | |
175 | for x in 0..bsize { buf[idx + x] = dc; } | |
176 | idx += stride; | |
177 | } | |
178 | } | |
179 | fn ipred_left_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { | |
180 | let mut adc: u16 = 0; | |
181 | for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } | |
182 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; | |
183 | ||
184 | for _ in 0..bsize { | |
185 | for x in 0..bsize { buf[idx + x] = dc; } | |
186 | idx += stride; | |
187 | } | |
188 | } | |
189 | fn ipred_top_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { | |
190 | let mut adc: u16 = 0; | |
191 | for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } | |
192 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; | |
193 | ||
194 | for _ in 0..bsize { | |
195 | for x in 0..bsize { buf[idx + x] = dc; } | |
196 | idx += stride; | |
197 | } | |
198 | } | |
199 | ||
200 | fn load_top(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { | |
201 | for i in 0..len { dst[i] = u16::from(buf[idx - stride + i]); } | |
202 | } | |
203 | fn load_left(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { | |
204 | for i in 0..len { dst[i] = u16::from(buf[idx - 1 + i * stride]); } | |
205 | } | |
206 | ||
207 | fn ipred_4x4_ver(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
208 | ipred_ver(buf, idx, stride, 4); | |
209 | } | |
210 | fn ipred_4x4_hor(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
211 | ipred_hor(buf, idx, stride, 4); | |
212 | } | |
213 | fn ipred_4x4_diag_down_left(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
214 | let mut t: [u16; 8] = [0; 8]; | |
215 | let mut l: [u16; 8] = [0; 8]; | |
216 | load_top(&mut t, buf, idx, stride, 4); | |
217 | load_left(&mut l, buf, idx, stride, 4); | |
218 | let a = ((l[1] + t[1]) >> 1) as u8; | |
219 | let b = ((l[2] + t[2]) >> 1) as u8; | |
220 | let c = ((l[3] + t[3]) >> 1) as u8; | |
221 | ||
222 | let dst = &mut buf[idx..]; | |
223 | dst[0] = a; dst[1] = b; dst[2] = c; dst[3] = c; | |
224 | let dst = &mut buf[idx + stride..]; | |
225 | dst[0] = b; dst[1] = c; dst[2] = c; dst[3] = c; | |
226 | let dst = &mut buf[idx + stride * 2..]; | |
227 | dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; | |
228 | let dst = &mut buf[idx + stride * 3..]; | |
229 | dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; | |
230 | } | |
231 | fn ipred_4x4_diag_down_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
232 | let mut t: [u16; 5] = [0; 5]; | |
233 | let mut l: [u16; 5] = [0; 5]; | |
234 | load_top(&mut t, buf, idx - 1, stride, 5); | |
235 | load_left(&mut l, buf, idx - stride, stride, 5); | |
236 | let dst = &mut buf[idx..]; | |
237 | ||
238 | for j in 0..4 { | |
239 | for i in 0..j { | |
240 | dst[i + j * stride] = ((l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2) as u8; | |
241 | } | |
242 | dst[j + j * stride] = ((l[1] + 2 * l[0] + t[1] + 2) >> 2) as u8; | |
243 | for i in (j+1)..4 { | |
244 | dst[i + j * stride] = ((t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2) as u8; | |
245 | } | |
246 | } | |
247 | } | |
248 | fn ipred_4x4_ver_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
249 | let mut t: [u16; 5] = [0; 5]; | |
250 | let mut l: [u16; 5] = [0; 5]; | |
251 | load_top(&mut t, buf, idx - 1, stride, 5); | |
252 | load_left(&mut l, buf, idx - stride, stride, 5); | |
253 | let dst = &mut buf[idx..]; | |
254 | ||
255 | for j in 0..4 { | |
256 | for i in 0..4 { | |
257 | let zvr = ((2 * i) as i8) - (j as i8); | |
258 | let pix; | |
259 | if zvr >= 0 { | |
260 | if (zvr & 1) == 0 { | |
261 | pix = (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1; | |
262 | } else { | |
263 | pix = (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2; | |
264 | } | |
265 | } else { | |
266 | if zvr == -1 { | |
267 | pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; | |
268 | } else { | |
269 | pix = (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2; | |
270 | } | |
271 | } | |
272 | dst[i + j * stride] = pix as u8; | |
273 | } | |
274 | } | |
275 | } | |
276 | fn ipred_4x4_ver_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { | |
277 | let mut t: [u16; 8] = [0; 8]; | |
278 | load_top(&mut t, buf, idx, stride, 4); | |
279 | for i in 0..4 { t[i + 4] = u16::from(tr[i]); } | |
280 | let dst = &mut buf[idx..]; | |
281 | ||
282 | dst[0 + 0 * stride] = ((t[0] + t[1] + 1) >> 1) as u8; | |
283 | let pix = ((t[1] + t[2] + 1) >> 1) as u8; | |
284 | dst[1 + 0 * stride] = pix; | |
285 | dst[0 + 2 * stride] = pix; | |
286 | let pix = ((t[2] + t[3] + 1) >> 1) as u8; | |
287 | dst[2 + 0 * stride] = pix; | |
288 | dst[1 + 2 * stride] = pix; | |
289 | let pix = ((t[3] + t[4] + 1) >> 1) as u8; | |
290 | dst[3 + 0 * stride] = pix; | |
291 | dst[2 + 2 * stride] = pix; | |
292 | dst[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8; | |
293 | dst[0 + 1 * stride] = ((t[0] + 2*t[1] + t[2] + 2) >> 2) as u8; | |
294 | let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8; | |
295 | dst[1 + 1 * stride] = pix; | |
296 | dst[0 + 3 * stride] = pix; | |
297 | let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8; | |
298 | dst[2 + 1 * stride] = pix; | |
299 | dst[1 + 3 * stride] = pix; | |
300 | let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8; | |
301 | dst[3 + 1 * stride] = pix; | |
302 | dst[2 + 3 * stride] = pix; | |
303 | dst[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; | |
304 | } | |
305 | fn ipred_4x4_hor_down(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
306 | let mut t: [u16; 5] = [0; 5]; | |
307 | let mut l: [u16; 5] = [0; 5]; | |
308 | load_top(&mut t, buf, idx - 1, stride, 5); | |
309 | load_left(&mut l, buf, idx - stride, stride, 5); | |
310 | let dst = &mut buf[idx..]; | |
311 | ||
312 | for j in 0..4 { | |
313 | for i in 0..4 { | |
314 | let zhd = ((2 * j) as i8) - (i as i8); | |
315 | let pix; | |
316 | if zhd >= 0 { | |
317 | if (zhd & 1) == 0 { | |
318 | pix = (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1; | |
319 | } else { | |
320 | pix = (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2; | |
321 | } | |
322 | } else { | |
323 | if zhd == -1 { | |
324 | pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; | |
325 | } else { | |
326 | pix = (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2; | |
327 | } | |
328 | } | |
329 | dst[i + j * stride] = pix as u8; | |
330 | } | |
331 | } | |
332 | } | |
333 | fn ipred_4x4_hor_up(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
334 | let mut l: [u16; 8] = [0; 8]; | |
335 | load_left(&mut l, buf, idx, stride, 8); | |
336 | let dst = &mut buf[idx..]; | |
337 | ||
338 | dst[0 + 0 * stride] = ((l[0] + l[1] + 1) >> 1) as u8; | |
339 | dst[1 + 0 * stride] = ((l[0] + 2*l[1] + l[2] + 2) >> 2) as u8; | |
340 | let pix = ((l[1] + l[2] + 1) >> 1) as u8; | |
341 | dst[2 + 0 * stride] = pix; | |
342 | dst[0 + 1 * stride] = pix; | |
343 | let pix = ((l[1] + 2*l[2] + l[3] + 2) >> 2) as u8; | |
344 | dst[3 + 0 * stride] = pix; | |
345 | dst[1 + 1 * stride] = pix; | |
346 | let pix = ((l[2] + l[3] + 1) >> 1) as u8; | |
347 | dst[2 + 1 * stride] = pix; | |
348 | dst[0 + 2 * stride] = pix; | |
349 | let pix = ((l[2] + 3*l[3] + 2) >> 2) as u8; | |
350 | dst[3 + 1 * stride] = pix; | |
351 | dst[1 + 2 * stride] = pix; | |
352 | dst[3 + 2 * stride] = l[3] as u8; | |
353 | dst[1 + 3 * stride] = l[3] as u8; | |
354 | dst[0 + 3 * stride] = l[3] as u8; | |
355 | dst[2 + 2 * stride] = l[3] as u8; | |
356 | dst[2 + 3 * stride] = l[3] as u8; | |
357 | dst[3 + 3 * stride] = l[3] as u8; | |
358 | } | |
359 | fn ipred_4x4_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
360 | ipred_dc(buf, idx, stride, 4, 3); | |
361 | } | |
362 | fn ipred_4x4_left_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
363 | ipred_left_dc(buf, idx, stride, 4, 2); | |
364 | } | |
365 | fn ipred_4x4_top_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
366 | ipred_top_dc(buf, idx, stride, 4, 2); | |
367 | } | |
368 | fn ipred_4x4_dc128(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
369 | ipred_dc128(buf, idx, stride, 4); | |
370 | } | |
371 | ||
372 | fn ipred_8x8_ver(buf: &mut [u8], idx: usize, stride: usize) { | |
373 | ipred_ver(buf, idx, stride, 8); | |
374 | } | |
375 | fn ipred_8x8_hor(buf: &mut [u8], idx: usize, stride: usize) { | |
376 | ipred_hor(buf, idx, stride, 8); | |
377 | } | |
378 | fn ipred_8x8_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
379 | let mut t: [u16; 8] = [0; 8]; | |
380 | load_top(&mut t, buf, idx, stride, 8); | |
381 | let mut l: [u16; 8] = [0; 8]; | |
382 | load_left(&mut l, buf, idx, stride, 8); | |
383 | ||
384 | let dc0 = ((t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3) as u8; | |
385 | let sum1 = t[4] + t[5] + t[6] + t[7]; | |
386 | let dc1 = ((sum1 + 2) >> 2) as u8; | |
387 | let sum2 = l[4] + l[5] + l[6] + l[7]; | |
388 | let dc2 = ((sum2 + 2) >> 2) as u8; | |
389 | let dc3 = ((sum1 + sum2 + 4) >> 3) as u8; | |
390 | ||
391 | let dst = &mut buf[idx..]; | |
392 | for row in dst.chunks_mut(stride).take(4) { | |
393 | row[..4].copy_from_slice(&[dc0; 4]); | |
394 | row[4..8].copy_from_slice(&[dc1; 4]); | |
395 | } | |
396 | for row in dst.chunks_mut(stride).skip(4).take(4) { | |
397 | row[..4].copy_from_slice(&[dc2; 4]); | |
398 | row[4..8].copy_from_slice(&[dc3; 4]); | |
399 | } | |
400 | } | |
401 | fn ipred_8x8_left_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
402 | ipred_left_dc(buf, idx, stride, 8, 3); | |
403 | } | |
404 | fn ipred_8x8_top_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
405 | ipred_top_dc(buf, idx, stride, 8, 3); | |
406 | } | |
407 | fn ipred_8x8_dc128(buf: &mut [u8], idx: usize, stride: usize) { | |
408 | ipred_dc128(buf, idx, stride, 8); | |
409 | } | |
410 | fn ipred_8x8_plane(_buf: &mut [u8], _idx: usize, _stride: usize) { | |
411 | unreachable!(); | |
412 | /* let mut h: i16 = 0; | |
413 | let mut v: i16 = 0; | |
414 | let idx0 = idx + 3 - stride; | |
415 | let mut idx1 = idx + 4 * stride - 1; | |
416 | let mut idx2 = idx + 2 * stride - 1; | |
417 | for i in 0..4 { | |
418 | let i1 = (i + 1) as i16; | |
419 | h += i1 * (i16::from(buf[idx0 + i + 1]) - i16::from(buf[idx0 - i - 1])); | |
420 | v += i1 * (i16::from(buf[idx1]) - i16::from(buf[idx2])); | |
421 | idx1 += stride; | |
422 | idx2 -= stride; | |
423 | } | |
424 | let b = (17 * h + 16) >> 5; | |
425 | let c = (17 * v + 16) >> 5; | |
426 | let mut a = 16 * (i16::from(buf[idx - 1 + 7 * stride]) + i16::from(buf[idx + 7 - stride])) - 3 * (b + c) + 16; | |
427 | for line in buf[idx..].chunks_mut(stride).take(8) { | |
428 | let mut acc = a; | |
429 | for el in line.iter_mut().take(8) { | |
430 | *el = clip8(acc >> 5); | |
431 | acc += b; | |
432 | } | |
433 | a += c; | |
434 | }*/ | |
435 | } | |
436 | ||
437 | fn ipred_16x16_ver(buf: &mut [u8], idx: usize, stride: usize) { | |
438 | ipred_ver(buf, idx, stride, 16); | |
439 | } | |
440 | fn ipred_16x16_hor(buf: &mut [u8], idx: usize, stride: usize) { | |
441 | ipred_hor(buf, idx, stride, 16); | |
442 | } | |
443 | fn ipred_16x16_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
444 | ipred_dc(buf, idx, stride, 16, 5); | |
445 | } | |
446 | fn ipred_16x16_left_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
447 | ipred_left_dc(buf, idx, stride, 16, 4); | |
448 | } | |
449 | fn ipred_16x16_top_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
450 | ipred_top_dc(buf, idx, stride, 16, 4); | |
451 | } | |
452 | fn ipred_16x16_dc128(buf: &mut [u8], idx: usize, stride: usize) { | |
453 | ipred_dc128(buf, idx, stride, 16); | |
454 | } | |
455 | fn ipred_16x16_plane(buf: &mut [u8], mut idx: usize, stride: usize) { | |
456 | let idx0 = idx + 7 - stride; | |
457 | let mut idx1 = idx + 8 * stride - 1; | |
458 | let mut idx2 = idx1 - 2 * stride; | |
459 | ||
460 | let mut h = i16::from(buf[idx0 + 1]) - i16::from(buf[idx0 - 1]); | |
461 | let mut v = i16::from(buf[idx1]) - i16::from(buf[idx2]); | |
462 | ||
463 | for k in 2..9 { | |
464 | idx1 += stride; | |
465 | idx2 -= stride; | |
466 | h += (k as i16) * (i16::from(buf[idx0 + k]) - i16::from(buf[idx0 - k])); | |
467 | v += (k as i16) * (i16::from(buf[idx1]) - i16::from(buf[idx2])); | |
468 | } | |
469 | h = 5 * (h / 4) / 16; | |
470 | v = 5 * (v / 4) / 16; | |
471 | std::mem::swap(&mut h, &mut v); | |
472 | ||
473 | let mut a = 16 * (i16::from(buf[idx - 1 + 15 * stride]) + i16::from(buf[idx + 15 - stride]) + 1) - 7 * (v + h); | |
474 | ||
475 | for _ in 0..16 { | |
476 | let mut b = a; | |
477 | a += v; | |
478 | ||
479 | for dst in buf[idx..].chunks_mut(4).take(4) { | |
480 | dst[0] = clip8((b ) >> 5); | |
481 | dst[1] = clip8((b + h) >> 5); | |
482 | dst[2] = clip8((b + 2*h) >> 5); | |
483 | dst[3] = clip8((b + 3*h) >> 5); | |
484 | b += h * 4; | |
485 | } | |
486 | idx += stride; | |
487 | } | |
488 | } | |
489 | ||
490 | pub type IPred4x4Func = fn(buf: &mut [u8], off: usize, stride: usize, tr: &[u8]); | |
491 | pub type IPred8x8Func = fn(buf: &mut [u8], off: usize, stride: usize); | |
492 | ||
493 | pub const IPRED_FUNCS4X4: [IPred4x4Func; 12] = [ | |
494 | ipred_4x4_ver, ipred_4x4_hor, ipred_4x4_dc, | |
495 | ipred_4x4_diag_down_left, ipred_4x4_diag_down_right, | |
496 | ipred_4x4_ver_right, ipred_4x4_hor_down, ipred_4x4_ver_left, ipred_4x4_hor_up, | |
497 | ipred_4x4_left_dc, ipred_4x4_top_dc, ipred_4x4_dc128 | |
498 | ]; | |
499 | ||
500 | pub const IPRED_FUNCS8X8: [IPred8x8Func; 7] = [ | |
501 | ipred_8x8_dc, ipred_8x8_hor, ipred_8x8_ver, ipred_8x8_plane, | |
502 | ipred_8x8_left_dc, ipred_8x8_top_dc, ipred_8x8_dc128 | |
503 | ]; | |
504 | ||
505 | pub const IPRED_FUNCS16X16: [IPred8x8Func; 7] = [ | |
506 | ipred_16x16_dc, ipred_16x16_hor, ipred_16x16_ver, ipred_16x16_plane, | |
507 | ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128 | |
508 | ]; | |
509 | ||
510 | fn tpel_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
511 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
512 | dline[..bw].copy_from_slice(&sline[..bw]); | |
513 | } | |
514 | } | |
515 | ||
516 | fn interp2(val: u32) -> u8 { | |
517 | (((val + 1) * 683) >> 11) as u8 | |
518 | } | |
519 | ||
520 | fn interp4(val: u32) -> u8 { | |
521 | (((val + 6) * 2731) >> 15) as u8 | |
522 | } | |
523 | ||
524 | fn tpel_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
525 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
526 | let mut last = u32::from(sline[0]); | |
527 | for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { | |
528 | let new = u32::from(*src); | |
529 | *dst = interp2(last * 2 + new); | |
530 | last = new; | |
531 | } | |
532 | } | |
533 | } | |
534 | ||
535 | fn tpel_interp02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
536 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
537 | let mut last = u32::from(sline[0]); | |
538 | for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { | |
539 | let new = u32::from(*src); | |
540 | *dst = interp2(last + new * 2); | |
541 | last = new; | |
542 | } | |
543 | } | |
544 | } | |
545 | ||
546 | fn tpel_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
547 | let src1 = &src[sstride..]; | |
548 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { | |
549 | for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { | |
550 | *dst = interp2(u32::from(*s0) * 2 + u32::from(*s1)); | |
551 | } | |
552 | } | |
553 | } | |
554 | ||
555 | fn tpel_interp11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
556 | let mut sidx0 = 0; | |
557 | let mut sidx1 = sstride; | |
558 | for dline in dst.chunks_mut(dstride).take(bh) { | |
559 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
560 | *dst = interp4(u32::from(src[sidx0 + x]) * 4 + u32::from(src[sidx0 + x + 1]) * 3 + | |
561 | u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 2); | |
562 | } | |
563 | sidx0 += sstride; | |
564 | sidx1 += sstride; | |
565 | } | |
566 | } | |
567 | ||
568 | fn tpel_interp12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
569 | let mut sidx0 = 0; | |
570 | let mut sidx1 = sstride; | |
571 | for dline in dst.chunks_mut(dstride).take(bh) { | |
572 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
573 | *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 4 + | |
574 | u32::from(src[sidx1 + x]) * 2 + u32::from(src[sidx1 + x + 1]) * 3); | |
575 | } | |
576 | sidx0 += sstride; | |
577 | sidx1 += sstride; | |
578 | } | |
579 | } | |
580 | ||
581 | fn tpel_interp20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
582 | let src1 = &src[sstride..]; | |
583 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { | |
584 | for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { | |
585 | *dst = interp2(u32::from(*s0) + u32::from(*s1) * 2); | |
586 | } | |
587 | } | |
588 | } | |
589 | ||
590 | fn tpel_interp21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
591 | let mut sidx0 = 0; | |
592 | let mut sidx1 = sstride; | |
593 | for dline in dst.chunks_mut(dstride).take(bh) { | |
594 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
595 | *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 2 + | |
596 | u32::from(src[sidx1 + x]) * 4 + u32::from(src[sidx1 + x + 1]) * 3); | |
597 | } | |
598 | sidx0 += sstride; | |
599 | sidx1 += sstride; | |
600 | } | |
601 | } | |
602 | ||
603 | fn tpel_interp22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
604 | let mut sidx0 = 0; | |
605 | let mut sidx1 = sstride; | |
606 | for dline in dst.chunks_mut(dstride).take(bh) { | |
607 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
608 | *dst = interp4(u32::from(src[sidx0 + x]) * 2 + u32::from(src[sidx0 + x + 1]) * 3 + | |
609 | u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 4); | |
610 | } | |
611 | sidx0 += sstride; | |
612 | sidx1 += sstride; | |
613 | } | |
614 | } | |
615 | ||
616 | pub const THIRDPEL_INTERP_FUNCS: &[BlkInterpFunc] = &[ | |
617 | tpel_interp00, tpel_interp01, tpel_interp02, | |
618 | tpel_interp10, tpel_interp11, tpel_interp12, | |
619 | tpel_interp20, tpel_interp21, tpel_interp22 | |
620 | ]; |