]>
Commit | Line | Data |
---|---|---|
4c1582cf KS |
1 | use nihav_codec_support::codecs::blockdsp::*; |
2 | ||
4c1582cf KS |
3 | #[derive(Debug,Clone,Copy)] |
4 | pub enum PredType4x4 { | |
5 | Ver, | |
6 | Hor, | |
7 | DC, | |
8 | DiagDownLeft, | |
9 | DiagDownRight, | |
10 | VerRight, | |
11 | HorDown, | |
12 | VerLeft, | |
13 | HorUp, | |
14 | LeftDC, | |
15 | TopDC, | |
16 | DC128, | |
17 | } | |
18 | ||
4c1582cf KS |
19 | #[derive(Debug,Clone,Copy)] |
20 | pub enum PredType8x8 { | |
21 | DC, | |
22 | Hor, | |
23 | Ver, | |
24 | Plane, | |
25 | LeftDC, | |
26 | TopDC, | |
27 | DC128 | |
28 | } | |
29 | ||
30 | pub const INTRA_PRED16: [PredType8x8; 4] = [ | |
31 | PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane | |
32 | ]; | |
33 | pub const INTRA_PRED4: [PredType4x4; 9] = [ | |
34 | PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, | |
35 | PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight, | |
36 | PredType4x4::VerRight, PredType4x4::HorDown, | |
37 | PredType4x4::VerLeft, PredType4x4::HorUp | |
38 | ]; | |
39 | ||
40 | ||
41 | const SVQ3_QUANTS: [i32; 32] = [ | |
42 | 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718, | |
43 | 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873, | |
44 | 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683, | |
45 | 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533 | |
46 | ]; | |
47 | ||
48 | pub fn chroma_transform(blk: &mut [i16; 4]) { | |
49 | let t0 = blk[0] + blk[2]; | |
50 | let t1 = blk[0] - blk[2]; | |
51 | let t2 = blk[1] + blk[3]; | |
52 | let t3 = blk[1] - blk[3]; | |
53 | blk[0] = t0 + t2; | |
54 | blk[1] = t0 - t2; | |
55 | blk[2] = t1 + t3; | |
56 | blk[3] = t1 - t3; | |
57 | } | |
58 | ||
59 | pub fn idct_dc_coeffs(blk: &mut [i16; 16], q: u8) { | |
60 | let quant = SVQ3_QUANTS[q as usize]; | |
61 | let mut tmp = [0i32; 16]; | |
62 | for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { | |
63 | let s0 = i32::from(src[0]); | |
64 | let s1 = i32::from(src[1]); | |
65 | let s2 = i32::from(src[2]); | |
66 | let s3 = i32::from(src[3]); | |
67 | let t0 = 13 * (s0 + s2); | |
68 | let t1 = 13 * (s0 - s2); | |
69 | let t2 = 17 * s1 + 7 * s3; | |
70 | let t3 = 7 * s1 - 17 * s3; | |
71 | dst[0] = t0 + t2; | |
72 | dst[1] = t1 + t3; | |
73 | dst[2] = t1 - t3; | |
74 | dst[3] = t0 - t2; | |
75 | } | |
76 | for i in 0..4 { | |
77 | let s0 = tmp[i]; | |
78 | let s1 = tmp[i + 4]; | |
79 | let s2 = tmp[i + 4 * 2]; | |
80 | let s3 = tmp[i + 4 * 3]; | |
81 | let t0 = 13 * (s0 + s2); | |
82 | let t1 = 13 * (s0 - s2); | |
83 | let t2 = 17 * s1 + 7 * s3; | |
84 | let t3 = 7 * s1 - 17 * s3; | |
85 | blk[i] = (((t0 + t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
86 | blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
87 | blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
88 | blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; | |
89 | } | |
90 | } | |
91 | ||
92 | pub fn idct(blk: &mut [i16; 16], q: u8, chroma: bool) { | |
93 | let quant = SVQ3_QUANTS[q as usize]; | |
94 | let mut tmp = [0i32; 16]; | |
95 | let dc = 13 * 13 * if chroma { quant * i32::from(blk[0]) / 2 } else { i32::from(blk[0]) * 1538 }; | |
96 | blk[0] = 0; | |
97 | for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { | |
98 | let s0 = i32::from(src[0]); | |
99 | let s1 = i32::from(src[1]); | |
100 | let s2 = i32::from(src[2]); | |
101 | let s3 = i32::from(src[3]); | |
102 | let t0 = 13 * (s0 + s2); | |
103 | let t1 = 13 * (s0 - s2); | |
104 | let t2 = 17 * s1 + 7 * s3; | |
105 | let t3 = 7 * s1 - 17 * s3; | |
106 | dst[0] = t0 + t2; | |
107 | dst[1] = t1 + t3; | |
108 | dst[2] = t1 - t3; | |
109 | dst[3] = t0 - t2; | |
110 | } | |
111 | for i in 0..4 { | |
112 | let s0 = tmp[i]; | |
113 | let s1 = tmp[i + 4]; | |
114 | let s2 = tmp[i + 4 * 2]; | |
115 | let s3 = tmp[i + 4 * 3]; | |
116 | let t0 = 13 * (s0 + s2); | |
117 | let t1 = 13 * (s0 - s2); | |
118 | let t2 = 17 * s1 + 7 * s3; | |
119 | let t3 = 7 * s1 - 17 * s3; | |
120 | blk[i] = (((t0 + t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
121 | blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
122 | blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
123 | blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; | |
124 | } | |
125 | } | |
126 | ||
127 | pub fn add_coeffs(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16]) { | |
128 | let out = &mut dst[offset..][..stride * 3 + 4]; | |
129 | for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks(4)) { | |
130 | for (dst, src) in line.iter_mut().take(4).zip(src.iter()) { | |
131 | *dst = (i32::from(*dst) + i32::from(*src)).max(0).min(255) as u8; | |
132 | } | |
133 | } | |
134 | } | |
135 | ||
136 | pub fn avg(dst: &mut [u8], dstride: usize, | |
137 | src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
138 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
139 | for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { | |
140 | *dst = ((u16::from(*dst) + u16::from(*src) + 1) >> 1) as u8; | |
141 | } | |
142 | } | |
143 | } | |
144 | ||
145 | fn clip8(val: i16) -> u8 { val.max(0).min(255) as u8 } | |
146 | ||
147 | fn ipred_dc128(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { | |
148 | for _ in 0..bsize { | |
149 | for x in 0..bsize { buf[idx + x] = 128; } | |
150 | idx += stride; | |
151 | } | |
152 | } | |
153 | fn ipred_ver(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { | |
154 | let oidx = idx - stride; | |
155 | for _ in 0..bsize { | |
156 | for x in 0..bsize { buf[idx + x] = buf[oidx + x]; } | |
157 | idx += stride; | |
158 | } | |
159 | } | |
160 | fn ipred_hor(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { | |
161 | for _ in 0..bsize { | |
162 | for x in 0..bsize { buf[idx + x] = buf[idx - 1]; } | |
163 | idx += stride; | |
164 | } | |
165 | } | |
166 | fn ipred_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { | |
167 | let mut adc: u16 = 0; | |
168 | for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } | |
169 | for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } | |
170 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; | |
171 | ||
172 | for _ in 0..bsize { | |
173 | for x in 0..bsize { buf[idx + x] = dc; } | |
174 | idx += stride; | |
175 | } | |
176 | } | |
177 | fn ipred_left_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { | |
178 | let mut adc: u16 = 0; | |
179 | for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } | |
180 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; | |
181 | ||
182 | for _ in 0..bsize { | |
183 | for x in 0..bsize { buf[idx + x] = dc; } | |
184 | idx += stride; | |
185 | } | |
186 | } | |
187 | fn ipred_top_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { | |
188 | let mut adc: u16 = 0; | |
189 | for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } | |
190 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; | |
191 | ||
192 | for _ in 0..bsize { | |
193 | for x in 0..bsize { buf[idx + x] = dc; } | |
194 | idx += stride; | |
195 | } | |
196 | } | |
197 | ||
198 | fn load_top(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { | |
199 | for i in 0..len { dst[i] = u16::from(buf[idx - stride + i]); } | |
200 | } | |
201 | fn load_left(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { | |
202 | for i in 0..len { dst[i] = u16::from(buf[idx - 1 + i * stride]); } | |
203 | } | |
204 | ||
205 | fn ipred_4x4_ver(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
206 | ipred_ver(buf, idx, stride, 4); | |
207 | } | |
208 | fn ipred_4x4_hor(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
209 | ipred_hor(buf, idx, stride, 4); | |
210 | } | |
211 | fn ipred_4x4_diag_down_left(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
212 | let mut t: [u16; 8] = [0; 8]; | |
213 | let mut l: [u16; 8] = [0; 8]; | |
214 | load_top(&mut t, buf, idx, stride, 4); | |
215 | load_left(&mut l, buf, idx, stride, 4); | |
216 | let a = ((l[1] + t[1]) >> 1) as u8; | |
217 | let b = ((l[2] + t[2]) >> 1) as u8; | |
218 | let c = ((l[3] + t[3]) >> 1) as u8; | |
219 | ||
220 | let dst = &mut buf[idx..]; | |
221 | dst[0] = a; dst[1] = b; dst[2] = c; dst[3] = c; | |
222 | let dst = &mut buf[idx + stride..]; | |
223 | dst[0] = b; dst[1] = c; dst[2] = c; dst[3] = c; | |
224 | let dst = &mut buf[idx + stride * 2..]; | |
225 | dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; | |
226 | let dst = &mut buf[idx + stride * 3..]; | |
227 | dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; | |
228 | } | |
229 | fn ipred_4x4_diag_down_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
230 | let mut t: [u16; 5] = [0; 5]; | |
231 | let mut l: [u16; 5] = [0; 5]; | |
232 | load_top(&mut t, buf, idx - 1, stride, 5); | |
233 | load_left(&mut l, buf, idx - stride, stride, 5); | |
234 | let dst = &mut buf[idx..]; | |
235 | ||
236 | for j in 0..4 { | |
237 | for i in 0..j { | |
238 | dst[i + j * stride] = ((l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2) as u8; | |
239 | } | |
240 | dst[j + j * stride] = ((l[1] + 2 * l[0] + t[1] + 2) >> 2) as u8; | |
241 | for i in (j+1)..4 { | |
242 | dst[i + j * stride] = ((t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2) as u8; | |
243 | } | |
244 | } | |
245 | } | |
246 | fn ipred_4x4_ver_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
247 | let mut t: [u16; 5] = [0; 5]; | |
248 | let mut l: [u16; 5] = [0; 5]; | |
249 | load_top(&mut t, buf, idx - 1, stride, 5); | |
250 | load_left(&mut l, buf, idx - stride, stride, 5); | |
251 | let dst = &mut buf[idx..]; | |
252 | ||
253 | for j in 0..4 { | |
254 | for i in 0..4 { | |
255 | let zvr = ((2 * i) as i8) - (j as i8); | |
256 | let pix; | |
257 | if zvr >= 0 { | |
258 | if (zvr & 1) == 0 { | |
259 | pix = (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1; | |
260 | } else { | |
261 | pix = (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2; | |
262 | } | |
263 | } else { | |
264 | if zvr == -1 { | |
265 | pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; | |
266 | } else { | |
267 | pix = (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2; | |
268 | } | |
269 | } | |
270 | dst[i + j * stride] = pix as u8; | |
271 | } | |
272 | } | |
273 | } | |
274 | fn ipred_4x4_ver_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { | |
275 | let mut t: [u16; 8] = [0; 8]; | |
276 | load_top(&mut t, buf, idx, stride, 4); | |
277 | for i in 0..4 { t[i + 4] = u16::from(tr[i]); } | |
278 | let dst = &mut buf[idx..]; | |
279 | ||
280 | dst[0 + 0 * stride] = ((t[0] + t[1] + 1) >> 1) as u8; | |
281 | let pix = ((t[1] + t[2] + 1) >> 1) as u8; | |
282 | dst[1 + 0 * stride] = pix; | |
283 | dst[0 + 2 * stride] = pix; | |
284 | let pix = ((t[2] + t[3] + 1) >> 1) as u8; | |
285 | dst[2 + 0 * stride] = pix; | |
286 | dst[1 + 2 * stride] = pix; | |
287 | let pix = ((t[3] + t[4] + 1) >> 1) as u8; | |
288 | dst[3 + 0 * stride] = pix; | |
289 | dst[2 + 2 * stride] = pix; | |
290 | dst[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8; | |
291 | dst[0 + 1 * stride] = ((t[0] + 2*t[1] + t[2] + 2) >> 2) as u8; | |
292 | let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8; | |
293 | dst[1 + 1 * stride] = pix; | |
294 | dst[0 + 3 * stride] = pix; | |
295 | let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8; | |
296 | dst[2 + 1 * stride] = pix; | |
297 | dst[1 + 3 * stride] = pix; | |
298 | let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8; | |
299 | dst[3 + 1 * stride] = pix; | |
300 | dst[2 + 3 * stride] = pix; | |
301 | dst[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; | |
302 | } | |
303 | fn ipred_4x4_hor_down(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
304 | let mut t: [u16; 5] = [0; 5]; | |
305 | let mut l: [u16; 5] = [0; 5]; | |
306 | load_top(&mut t, buf, idx - 1, stride, 5); | |
307 | load_left(&mut l, buf, idx - stride, stride, 5); | |
308 | let dst = &mut buf[idx..]; | |
309 | ||
310 | for j in 0..4 { | |
311 | for i in 0..4 { | |
312 | let zhd = ((2 * j) as i8) - (i as i8); | |
313 | let pix; | |
314 | if zhd >= 0 { | |
315 | if (zhd & 1) == 0 { | |
316 | pix = (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1; | |
317 | } else { | |
318 | pix = (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2; | |
319 | } | |
320 | } else { | |
321 | if zhd == -1 { | |
322 | pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; | |
323 | } else { | |
324 | pix = (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2; | |
325 | } | |
326 | } | |
327 | dst[i + j * stride] = pix as u8; | |
328 | } | |
329 | } | |
330 | } | |
331 | fn ipred_4x4_hor_up(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
332 | let mut l: [u16; 8] = [0; 8]; | |
333 | load_left(&mut l, buf, idx, stride, 8); | |
334 | let dst = &mut buf[idx..]; | |
335 | ||
336 | dst[0 + 0 * stride] = ((l[0] + l[1] + 1) >> 1) as u8; | |
337 | dst[1 + 0 * stride] = ((l[0] + 2*l[1] + l[2] + 2) >> 2) as u8; | |
338 | let pix = ((l[1] + l[2] + 1) >> 1) as u8; | |
339 | dst[2 + 0 * stride] = pix; | |
340 | dst[0 + 1 * stride] = pix; | |
341 | let pix = ((l[1] + 2*l[2] + l[3] + 2) >> 2) as u8; | |
342 | dst[3 + 0 * stride] = pix; | |
343 | dst[1 + 1 * stride] = pix; | |
344 | let pix = ((l[2] + l[3] + 1) >> 1) as u8; | |
345 | dst[2 + 1 * stride] = pix; | |
346 | dst[0 + 2 * stride] = pix; | |
347 | let pix = ((l[2] + 3*l[3] + 2) >> 2) as u8; | |
348 | dst[3 + 1 * stride] = pix; | |
349 | dst[1 + 2 * stride] = pix; | |
350 | dst[3 + 2 * stride] = l[3] as u8; | |
351 | dst[1 + 3 * stride] = l[3] as u8; | |
352 | dst[0 + 3 * stride] = l[3] as u8; | |
353 | dst[2 + 2 * stride] = l[3] as u8; | |
354 | dst[2 + 3 * stride] = l[3] as u8; | |
355 | dst[3 + 3 * stride] = l[3] as u8; | |
356 | } | |
357 | fn ipred_4x4_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
358 | ipred_dc(buf, idx, stride, 4, 3); | |
359 | } | |
360 | fn ipred_4x4_left_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
361 | ipred_left_dc(buf, idx, stride, 4, 2); | |
362 | } | |
363 | fn ipred_4x4_top_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
364 | ipred_top_dc(buf, idx, stride, 4, 2); | |
365 | } | |
366 | fn ipred_4x4_dc128(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { | |
367 | ipred_dc128(buf, idx, stride, 4); | |
368 | } | |
369 | ||
370 | fn ipred_8x8_ver(buf: &mut [u8], idx: usize, stride: usize) { | |
371 | ipred_ver(buf, idx, stride, 8); | |
372 | } | |
373 | fn ipred_8x8_hor(buf: &mut [u8], idx: usize, stride: usize) { | |
374 | ipred_hor(buf, idx, stride, 8); | |
375 | } | |
376 | fn ipred_8x8_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
377 | let mut t: [u16; 8] = [0; 8]; | |
378 | load_top(&mut t, buf, idx, stride, 8); | |
379 | let mut l: [u16; 8] = [0; 8]; | |
380 | load_left(&mut l, buf, idx, stride, 8); | |
381 | ||
382 | let dc0 = ((t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3) as u8; | |
383 | let sum1 = t[4] + t[5] + t[6] + t[7]; | |
384 | let dc1 = ((sum1 + 2) >> 2) as u8; | |
385 | let sum2 = l[4] + l[5] + l[6] + l[7]; | |
386 | let dc2 = ((sum2 + 2) >> 2) as u8; | |
387 | let dc3 = ((sum1 + sum2 + 4) >> 3) as u8; | |
388 | ||
389 | let dst = &mut buf[idx..]; | |
390 | for row in dst.chunks_mut(stride).take(4) { | |
391 | row[..4].copy_from_slice(&[dc0; 4]); | |
392 | row[4..8].copy_from_slice(&[dc1; 4]); | |
393 | } | |
394 | for row in dst.chunks_mut(stride).skip(4).take(4) { | |
395 | row[..4].copy_from_slice(&[dc2; 4]); | |
396 | row[4..8].copy_from_slice(&[dc3; 4]); | |
397 | } | |
398 | } | |
399 | fn ipred_8x8_left_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
400 | ipred_left_dc(buf, idx, stride, 8, 3); | |
401 | } | |
402 | fn ipred_8x8_top_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
403 | ipred_top_dc(buf, idx, stride, 8, 3); | |
404 | } | |
405 | fn ipred_8x8_dc128(buf: &mut [u8], idx: usize, stride: usize) { | |
406 | ipred_dc128(buf, idx, stride, 8); | |
407 | } | |
408 | fn ipred_8x8_plane(_buf: &mut [u8], _idx: usize, _stride: usize) { | |
409 | unreachable!(); | |
410 | /* let mut h: i16 = 0; | |
411 | let mut v: i16 = 0; | |
412 | let idx0 = idx + 3 - stride; | |
413 | let mut idx1 = idx + 4 * stride - 1; | |
414 | let mut idx2 = idx + 2 * stride - 1; | |
415 | for i in 0..4 { | |
416 | let i1 = (i + 1) as i16; | |
417 | h += i1 * (i16::from(buf[idx0 + i + 1]) - i16::from(buf[idx0 - i - 1])); | |
418 | v += i1 * (i16::from(buf[idx1]) - i16::from(buf[idx2])); | |
419 | idx1 += stride; | |
420 | idx2 -= stride; | |
421 | } | |
422 | let b = (17 * h + 16) >> 5; | |
423 | let c = (17 * v + 16) >> 5; | |
424 | let mut a = 16 * (i16::from(buf[idx - 1 + 7 * stride]) + i16::from(buf[idx + 7 - stride])) - 3 * (b + c) + 16; | |
425 | for line in buf[idx..].chunks_mut(stride).take(8) { | |
426 | let mut acc = a; | |
427 | for el in line.iter_mut().take(8) { | |
428 | *el = clip8(acc >> 5); | |
429 | acc += b; | |
430 | } | |
431 | a += c; | |
432 | }*/ | |
433 | } | |
434 | ||
435 | fn ipred_16x16_ver(buf: &mut [u8], idx: usize, stride: usize) { | |
436 | ipred_ver(buf, idx, stride, 16); | |
437 | } | |
438 | fn ipred_16x16_hor(buf: &mut [u8], idx: usize, stride: usize) { | |
439 | ipred_hor(buf, idx, stride, 16); | |
440 | } | |
441 | fn ipred_16x16_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
442 | ipred_dc(buf, idx, stride, 16, 5); | |
443 | } | |
444 | fn ipred_16x16_left_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
445 | ipred_left_dc(buf, idx, stride, 16, 4); | |
446 | } | |
447 | fn ipred_16x16_top_dc(buf: &mut [u8], idx: usize, stride: usize) { | |
448 | ipred_top_dc(buf, idx, stride, 16, 4); | |
449 | } | |
450 | fn ipred_16x16_dc128(buf: &mut [u8], idx: usize, stride: usize) { | |
451 | ipred_dc128(buf, idx, stride, 16); | |
452 | } | |
453 | fn ipred_16x16_plane(buf: &mut [u8], mut idx: usize, stride: usize) { | |
454 | let idx0 = idx + 7 - stride; | |
455 | let mut idx1 = idx + 8 * stride - 1; | |
456 | let mut idx2 = idx1 - 2 * stride; | |
457 | ||
458 | let mut h = i16::from(buf[idx0 + 1]) - i16::from(buf[idx0 - 1]); | |
459 | let mut v = i16::from(buf[idx1]) - i16::from(buf[idx2]); | |
460 | ||
461 | for k in 2..9 { | |
462 | idx1 += stride; | |
463 | idx2 -= stride; | |
464 | h += (k as i16) * (i16::from(buf[idx0 + k]) - i16::from(buf[idx0 - k])); | |
465 | v += (k as i16) * (i16::from(buf[idx1]) - i16::from(buf[idx2])); | |
466 | } | |
467 | h = 5 * (h / 4) / 16; | |
468 | v = 5 * (v / 4) / 16; | |
469 | std::mem::swap(&mut h, &mut v); | |
470 | ||
471 | let mut a = 16 * (i16::from(buf[idx - 1 + 15 * stride]) + i16::from(buf[idx + 15 - stride]) + 1) - 7 * (v + h); | |
472 | ||
473 | for _ in 0..16 { | |
474 | let mut b = a; | |
475 | a += v; | |
476 | ||
477 | for dst in buf[idx..].chunks_mut(4).take(4) { | |
478 | dst[0] = clip8((b ) >> 5); | |
479 | dst[1] = clip8((b + h) >> 5); | |
480 | dst[2] = clip8((b + 2*h) >> 5); | |
481 | dst[3] = clip8((b + 3*h) >> 5); | |
482 | b += h * 4; | |
483 | } | |
484 | idx += stride; | |
485 | } | |
486 | } | |
487 | ||
488 | pub type IPred4x4Func = fn(buf: &mut [u8], off: usize, stride: usize, tr: &[u8]); | |
489 | pub type IPred8x8Func = fn(buf: &mut [u8], off: usize, stride: usize); | |
490 | ||
491 | pub const IPRED_FUNCS4X4: [IPred4x4Func; 12] = [ | |
492 | ipred_4x4_ver, ipred_4x4_hor, ipred_4x4_dc, | |
493 | ipred_4x4_diag_down_left, ipred_4x4_diag_down_right, | |
494 | ipred_4x4_ver_right, ipred_4x4_hor_down, ipred_4x4_ver_left, ipred_4x4_hor_up, | |
495 | ipred_4x4_left_dc, ipred_4x4_top_dc, ipred_4x4_dc128 | |
496 | ]; | |
497 | ||
498 | pub const IPRED_FUNCS8X8: [IPred8x8Func; 7] = [ | |
499 | ipred_8x8_dc, ipred_8x8_hor, ipred_8x8_ver, ipred_8x8_plane, | |
500 | ipred_8x8_left_dc, ipred_8x8_top_dc, ipred_8x8_dc128 | |
501 | ]; | |
502 | ||
503 | pub const IPRED_FUNCS16X16: [IPred8x8Func; 7] = [ | |
504 | ipred_16x16_dc, ipred_16x16_hor, ipred_16x16_ver, ipred_16x16_plane, | |
505 | ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128 | |
506 | ]; | |
507 | ||
508 | fn tpel_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
509 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
510 | dline[..bw].copy_from_slice(&sline[..bw]); | |
511 | } | |
512 | } | |
513 | ||
514 | fn interp2(val: u32) -> u8 { | |
515 | (((val + 1) * 683) >> 11) as u8 | |
516 | } | |
517 | ||
518 | fn interp4(val: u32) -> u8 { | |
519 | (((val + 6) * 2731) >> 15) as u8 | |
520 | } | |
521 | ||
522 | fn tpel_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
523 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
524 | let mut last = u32::from(sline[0]); | |
525 | for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { | |
526 | let new = u32::from(*src); | |
527 | *dst = interp2(last * 2 + new); | |
528 | last = new; | |
529 | } | |
530 | } | |
531 | } | |
532 | ||
533 | fn tpel_interp02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
534 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { | |
535 | let mut last = u32::from(sline[0]); | |
536 | for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { | |
537 | let new = u32::from(*src); | |
538 | *dst = interp2(last + new * 2); | |
539 | last = new; | |
540 | } | |
541 | } | |
542 | } | |
543 | ||
544 | fn tpel_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
545 | let src1 = &src[sstride..]; | |
546 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { | |
547 | for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { | |
548 | *dst = interp2(u32::from(*s0) * 2 + u32::from(*s1)); | |
549 | } | |
550 | } | |
551 | } | |
552 | ||
553 | fn tpel_interp11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
554 | let mut sidx0 = 0; | |
555 | let mut sidx1 = sstride; | |
556 | for dline in dst.chunks_mut(dstride).take(bh) { | |
557 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
558 | *dst = interp4(u32::from(src[sidx0 + x]) * 4 + u32::from(src[sidx0 + x + 1]) * 3 + | |
559 | u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 2); | |
560 | } | |
561 | sidx0 += sstride; | |
562 | sidx1 += sstride; | |
563 | } | |
564 | } | |
565 | ||
566 | fn tpel_interp12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
567 | let mut sidx0 = 0; | |
568 | let mut sidx1 = sstride; | |
569 | for dline in dst.chunks_mut(dstride).take(bh) { | |
570 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
571 | *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 4 + | |
572 | u32::from(src[sidx1 + x]) * 2 + u32::from(src[sidx1 + x + 1]) * 3); | |
573 | } | |
574 | sidx0 += sstride; | |
575 | sidx1 += sstride; | |
576 | } | |
577 | } | |
578 | ||
579 | fn tpel_interp20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
580 | let src1 = &src[sstride..]; | |
581 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { | |
582 | for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { | |
583 | *dst = interp2(u32::from(*s0) + u32::from(*s1) * 2); | |
584 | } | |
585 | } | |
586 | } | |
587 | ||
588 | fn tpel_interp21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
589 | let mut sidx0 = 0; | |
590 | let mut sidx1 = sstride; | |
591 | for dline in dst.chunks_mut(dstride).take(bh) { | |
592 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
593 | *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 2 + | |
594 | u32::from(src[sidx1 + x]) * 4 + u32::from(src[sidx1 + x + 1]) * 3); | |
595 | } | |
596 | sidx0 += sstride; | |
597 | sidx1 += sstride; | |
598 | } | |
599 | } | |
600 | ||
601 | fn tpel_interp22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { | |
602 | let mut sidx0 = 0; | |
603 | let mut sidx1 = sstride; | |
604 | for dline in dst.chunks_mut(dstride).take(bh) { | |
605 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { | |
606 | *dst = interp4(u32::from(src[sidx0 + x]) * 2 + u32::from(src[sidx0 + x + 1]) * 3 + | |
607 | u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 4); | |
608 | } | |
609 | sidx0 += sstride; | |
610 | sidx1 += sstride; | |
611 | } | |
612 | } | |
613 | ||
614 | pub const THIRDPEL_INTERP_FUNCS: &[BlkInterpFunc] = &[ | |
615 | tpel_interp00, tpel_interp01, tpel_interp02, | |
616 | tpel_interp10, tpel_interp11, tpel_interp12, | |
617 | tpel_interp20, tpel_interp21, tpel_interp22 | |
618 | ]; |