]>
Commit | Line | Data |
---|---|---|
1 | use super::{BlockTokens, MAX_DIST, RateControl, calc_diff}; | |
2 | use super::super::binkviddata::*; | |
3 | ||
4 | const BINK_INV_SCAN: [usize; 64] = [ | |
5 | 0, 1, 4, 5, 8, 9, 12, 13, | |
6 | 2, 3, 6, 7, 10, 11, 14, 15, | |
7 | 24, 25, 44, 45, 16, 17, 20, 21, | |
8 | 26, 27, 46, 47, 18, 19, 22, 23, | |
9 | 28, 29, 32, 33, 48, 49, 52, 53, | |
10 | 30, 31, 34, 35, 50, 51, 54, 55, | |
11 | 36, 37, 40, 41, 56, 57, 60, 61, | |
12 | 38, 39, 42, 43, 58, 59, 62, 63 | |
13 | ]; | |
14 | ||
15 | trait WriteBit { | |
16 | fn write_bit(&mut self, val: u16); | |
17 | fn write_coef(&mut self, aval: u32, sign: bool, bits: u8); | |
18 | } | |
19 | ||
20 | impl WriteBit for BlockTokens { | |
21 | fn write_bit(&mut self, val: u16) { | |
22 | self.other.push((val, 1)); | |
23 | } | |
24 | fn write_coef(&mut self, aval: u32, sign: bool, bits: u8) { | |
25 | if bits > 1 { | |
26 | self.other.push((aval as u16 & ((1 << (bits - 1)) - 1), bits - 1)); | |
27 | } | |
28 | self.write_bit(sign as u16); | |
29 | } | |
30 | } | |
31 | ||
32 | pub struct DSP { | |
33 | diff: [i16; 64], | |
34 | dct_i: [u8; 64], | |
35 | dct_p: [u8; 64], | |
36 | qmats: QuantMats, | |
37 | i_start: usize, | |
38 | i_len: usize, | |
39 | p_start: usize, | |
40 | p_len: usize, | |
41 | } | |
42 | ||
43 | impl DSP { | |
44 | pub fn new() -> Self { | |
45 | let mut qmats = QuantMats::default(); | |
46 | qmats.calc_binkb_quants(); | |
47 | Self { | |
48 | diff: [0; 64], | |
49 | dct_i: [0; 64], | |
50 | dct_p: [0; 64], | |
51 | qmats, | |
52 | i_start: 0, | |
53 | i_len: 16, | |
54 | p_start: 0, | |
55 | p_len: 16, | |
56 | } | |
57 | } | |
58 | pub fn get_diff(&mut self, mc_blk: &[u8; 64], cur_blk: &[u8; 64]) { | |
59 | for (dst, (&prev, &cur)) in self.diff.iter_mut() | |
60 | .zip(mc_blk.iter().zip(cur_blk.iter())) { | |
61 | *dst = i16::from(cur) - i16::from(prev); | |
62 | } | |
63 | } | |
64 | pub fn recon_residue(&self, dst: &mut [u8], dstride: usize, mc_blk: &[u8; 64]) { | |
65 | for (dline, (prow, drow)) in dst.chunks_mut(dstride) | |
66 | .zip(mc_blk.chunks_exact(8).zip(self.diff.chunks_exact(8))) { | |
67 | for (dst, (&prev, &diff)) in dline.iter_mut().zip(prow.iter().zip(drow.iter())) { | |
68 | *dst = (i16::from(prev) + diff) as u8; | |
69 | } | |
70 | } | |
71 | } | |
72 | pub fn recon_dct_i(&self, dst: &mut [u8], dstride: usize) { | |
73 | for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_i.chunks_exact(8)) { | |
74 | dline[..8].copy_from_slice(srow); | |
75 | } | |
76 | } | |
77 | pub fn recon_dct_p(&self, dst: &mut [u8], dstride: usize) { | |
78 | for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_p.chunks_exact(8)) { | |
79 | dline[..8].copy_from_slice(srow); | |
80 | } | |
81 | } | |
82 | ||
83 | pub fn try_residue(&self, tokens: &mut BlockTokens) -> u32 { | |
84 | let mut tree = Tree::new(true); | |
85 | let mut flat = [0; 64]; | |
86 | let mut blen = [0; 64]; | |
87 | ||
88 | for (&idx, &val) in BINK_INV_SCAN.iter().zip(self.diff.iter()) { | |
89 | flat[idx] = val; | |
90 | let aval = val.unsigned_abs(); | |
91 | let mut b = 0u8; | |
92 | while (1 << b) <= aval { | |
93 | b += 1; | |
94 | } | |
95 | blen[idx] = b.saturating_sub(1); | |
96 | } | |
97 | ||
98 | let mut max_val = 0; | |
99 | let mut max_bits = 0; | |
100 | let mut bits = 0; | |
101 | let mut avals = [0; 64]; | |
102 | let mut signs = [false; 64]; | |
103 | for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut()) | |
104 | .zip(flat.iter().zip(blen.iter())) { | |
105 | *aval = val.unsigned_abs(); | |
106 | *sign = val < 0; | |
107 | max_val = max_val.max(*aval); | |
108 | max_bits = max_bits.max(vlen); | |
109 | bits += aval.count_ones(); | |
110 | } | |
111 | ||
112 | if max_bits > 7 || bits > 127 { | |
113 | return MAX_DIST; | |
114 | } | |
115 | ||
116 | tokens.nresidues.push(bits as u8); | |
117 | tokens.other.push((max_bits as u16, 3)); | |
118 | ||
119 | let mut nz_cand = Vec::with_capacity(64); | |
120 | let mut masks_left = bits + 1; | |
121 | 'tree_loop: for cur_bits in (0..=max_bits).rev() { | |
122 | let mask = 1 << cur_bits; | |
123 | for &idx in nz_cand.iter() { | |
124 | tokens.write_bit(((avals[idx] & mask) != 0) as u16); | |
125 | if (avals[idx] & mask) != 0 { | |
126 | masks_left -= 1; | |
127 | if masks_left == 0 { | |
128 | break 'tree_loop; | |
129 | } | |
130 | } | |
131 | } | |
132 | ||
133 | let mut pos = tree.start; | |
134 | while pos < tree.end { | |
135 | if tree.state[pos] == TreeState::None { | |
136 | pos += 1; | |
137 | continue; | |
138 | } | |
139 | if let TreeState::Candidate(idx) = tree.state[pos] { | |
140 | let idx = usize::from(idx); | |
141 | if blen[idx] == cur_bits && flat[idx] != 0 { | |
142 | tree.state[pos] = TreeState::None; | |
143 | tokens.write_bit(1); | |
144 | tokens.write_bit(signs[idx] as u16); | |
145 | nz_cand.push(idx); | |
146 | masks_left -= 1; | |
147 | if masks_left == 0 { | |
148 | break 'tree_loop; | |
149 | } | |
150 | } else { | |
151 | tokens.write_bit(0); | |
152 | pos += 1; | |
153 | } | |
154 | continue; | |
155 | } | |
156 | let range = tree.state[pos].get_range(); | |
157 | let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a)); | |
158 | if cur_max_bits == cur_bits { | |
159 | tokens.write_bit(1); | |
160 | match tree.state[pos] { | |
161 | TreeState::Twenty(val) => { | |
162 | tree.state[pos] = TreeState::Sixteen(val + 4); | |
163 | for i in 0..4 { | |
164 | let idx = usize::from(val) + i; | |
165 | if blen[idx] == cur_bits && flat[idx] != 0 { | |
166 | tokens.write_bit(0); | |
167 | tokens.write_bit(signs[idx] as u16); | |
168 | nz_cand.push(idx); | |
169 | masks_left -= 1; | |
170 | if masks_left == 0 { | |
171 | break 'tree_loop; | |
172 | } | |
173 | } else { | |
174 | tokens.write_bit(1); | |
175 | tree.add_to_head(TreeState::Candidate(idx as u8)); | |
176 | } | |
177 | } | |
178 | }, | |
179 | TreeState::Sixteen(val) => { | |
180 | tree.state[pos] = TreeState::Four(val); | |
181 | for i in 1u8..4 { | |
182 | tree.add_to_tail(TreeState::Four(val + i * 4)); | |
183 | } | |
184 | }, | |
185 | TreeState::Four(val) => { | |
186 | tree.state[pos] = TreeState::None; | |
187 | for i in 0..4 { | |
188 | let idx = usize::from(val) + i; | |
189 | if blen[idx] == cur_bits && flat[idx] != 0 { | |
190 | tokens.write_bit(0); | |
191 | tokens.write_bit(signs[idx] as u16); | |
192 | nz_cand.push(idx); | |
193 | masks_left -= 1; | |
194 | if masks_left == 0 { | |
195 | break 'tree_loop; | |
196 | } | |
197 | } else { | |
198 | tokens.write_bit(1); | |
199 | tree.add_to_head(TreeState::Candidate(idx as u8)); | |
200 | } | |
201 | } | |
202 | }, | |
203 | _ => unreachable!(), | |
204 | }; | |
205 | } else { | |
206 | tokens.write_bit(0); | |
207 | pos += 1; | |
208 | } | |
209 | } | |
210 | } | |
211 | ||
212 | 0 | |
213 | } | |
214 | ||
215 | pub fn set_quant_ranges(&mut self, ranges: [u8; 4]) { | |
216 | self.i_start = usize::from(ranges[0]); | |
217 | self.i_len = usize::from(ranges[1]); | |
218 | self.p_start = usize::from(ranges[2]); | |
219 | self.p_len = usize::from(ranges[3]); | |
220 | } | |
221 | pub fn try_dct_intra(&mut self, blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 { | |
222 | tokens.clear(); | |
223 | if self.i_len == 0 { | |
224 | return MAX_DIST; | |
225 | } | |
226 | ||
227 | let mut ref_coeffs = [0i32; 64]; | |
228 | for (dst, &src) in ref_coeffs.iter_mut().zip(blk.iter()) { | |
229 | *dst = i32::from(src); | |
230 | } | |
231 | dct(&mut ref_coeffs); | |
232 | ||
233 | let mut dct_out = [0u8; 64]; | |
234 | let qmats = if is_b { &self.qmats.intra_qmat } else { BINK_INTRA_QUANT }; | |
235 | for (qidx, qmat) in qmats.iter().enumerate().skip(self.i_start).take(self.i_len) { | |
236 | let mut coeffs = ref_coeffs; | |
237 | for (idx, el) in coeffs.iter_mut().enumerate() { | |
238 | *el /= qmat[BINK_INV_SCAN[idx]]; | |
239 | } | |
240 | ||
241 | if coeffs[0] >= 2048 { | |
242 | continue; | |
243 | } | |
244 | ||
245 | tmp_tok.clear(); | |
246 | tmp_tok.intradc.push(coeffs[0] as u16); | |
247 | Self::code_dct_coeffs(&coeffs, tmp_tok); | |
248 | if is_b { | |
249 | tmp_tok.intraq.push(qidx as u8); | |
250 | } else { | |
251 | tmp_tok.other.push((qidx as u16, 4)); | |
252 | } | |
253 | let bits = tmp_tok.bits(is_b); | |
254 | if rc.metric(0, bits) >= best_dist { | |
255 | continue; | |
256 | } | |
257 | ||
258 | for (idx, el) in coeffs.iter_mut().enumerate() { | |
259 | if *el != 0 { | |
260 | *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11; | |
261 | } | |
262 | } | |
263 | idct(&mut coeffs); | |
264 | for (dst, &src) in dct_out.iter_mut().zip(coeffs.iter()) { | |
265 | *dst = src as u8; | |
266 | } | |
267 | let diff = calc_diff(&dct_out, 8, blk, 8); | |
268 | let dist = rc.metric(diff, bits); | |
269 | if dist < best_dist { | |
270 | best_dist = dist; | |
271 | std::mem::swap(tokens, tmp_tok); | |
272 | self.dct_i.copy_from_slice(&dct_out); | |
273 | } | |
274 | } | |
275 | ||
276 | best_dist | |
277 | } | |
278 | pub fn try_dct_inter(&mut self, ref_blk: &[u8; 64], cur_blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 { | |
279 | let mv_x = tokens.xoff[0]; | |
280 | let mv_y = tokens.yoff[0]; | |
281 | ||
282 | let mut ref_coeffs = [0i32; 64]; | |
283 | for (dst, &src) in ref_coeffs.iter_mut().zip(self.diff.iter()) { | |
284 | *dst = i32::from(src); | |
285 | } | |
286 | dct(&mut ref_coeffs); | |
287 | ||
288 | let mut dct_out = [0u8; 64]; | |
289 | let qmats = if is_b { &self.qmats.inter_qmat } else { BINK_INTER_QUANT }; | |
290 | for (qidx, qmat) in qmats.iter().enumerate().skip(self.p_start).take(self.p_len) { | |
291 | let mut coeffs = ref_coeffs; | |
292 | ||
293 | for (idx, el) in coeffs.iter_mut().enumerate() { | |
294 | *el /= qmat[BINK_INV_SCAN[idx]]; | |
295 | } | |
296 | ||
297 | if coeffs[0].unsigned_abs() >= 1024 { | |
298 | continue; | |
299 | } | |
300 | ||
301 | tmp_tok.clear(); | |
302 | tmp_tok.interdc.push(coeffs[0] as i16); | |
303 | tmp_tok.xoff.push(mv_x); | |
304 | tmp_tok.yoff.push(mv_y); | |
305 | Self::code_dct_coeffs(&coeffs, tmp_tok); | |
306 | if is_b { | |
307 | tmp_tok.interq.push(qidx as u8); | |
308 | } else { | |
309 | tmp_tok.other.push((qidx as u16, 4)); | |
310 | } | |
311 | let bits = tmp_tok.bits(is_b); | |
312 | if rc.metric(0, bits) >= best_dist { | |
313 | continue; | |
314 | } | |
315 | ||
316 | for (idx, el) in coeffs.iter_mut().enumerate() { | |
317 | if *el != 0 { | |
318 | *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11; | |
319 | } | |
320 | } | |
321 | idct(&mut coeffs); | |
322 | for (dst, (&prev, &diff)) in dct_out.iter_mut().zip(ref_blk.iter().zip(coeffs.iter())) { | |
323 | *dst = (i32::from(prev) + diff) as u8; | |
324 | } | |
325 | let diff = calc_diff(&dct_out, 8, cur_blk, 8); | |
326 | let dist = rc.metric(diff, bits); | |
327 | if dist < best_dist { | |
328 | best_dist = dist; | |
329 | std::mem::swap(tokens, tmp_tok); | |
330 | self.dct_p.copy_from_slice(&dct_out); | |
331 | } | |
332 | } | |
333 | ||
334 | best_dist | |
335 | } | |
336 | ||
337 | fn code_dct_coeffs(coeffs: &[i32; 64], tokens: &mut BlockTokens) { | |
338 | let mut tree = Tree::new(false); | |
339 | let mut flat = [0; 64]; | |
340 | let mut blen = [0; 64]; | |
341 | ||
342 | for (&idx, &val) in BINK_INV_SCAN.iter().zip(coeffs.iter()).skip(1) { | |
343 | flat[idx] = val; | |
344 | let aval = val.unsigned_abs(); | |
345 | let mut b = 0u8; | |
346 | while (1 << b) <= aval { | |
347 | b += 1; | |
348 | } | |
349 | blen[idx] = b; | |
350 | } | |
351 | ||
352 | let mut max_val = 0; | |
353 | let mut max_bits = 0; | |
354 | let mut avals = [0; 64]; | |
355 | let mut signs = [false; 64]; | |
356 | for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut()) | |
357 | .zip(flat.iter().zip(blen.iter())) { | |
358 | *aval = val.unsigned_abs(); | |
359 | *sign = val < 0; | |
360 | max_val = max_val.max(*aval); | |
361 | max_bits = max_bits.max(vlen); | |
362 | } | |
363 | ||
364 | tokens.other.push((u16::from(max_bits), 4)); | |
365 | for cur_bits in (1..=max_bits).rev() { | |
366 | let mut pos = tree.start; | |
367 | while pos < tree.end { | |
368 | if tree.state[pos] == TreeState::None { | |
369 | pos += 1; | |
370 | continue; | |
371 | } | |
372 | if let TreeState::Candidate(idx) = tree.state[pos] { | |
373 | let idx = usize::from(idx); | |
374 | if blen[idx] == cur_bits && flat[idx] != 0 { | |
375 | tree.state[pos] = TreeState::None; | |
376 | tokens.write_bit(1); | |
377 | tokens.write_coef(avals[idx], signs[idx], cur_bits); | |
378 | } else { | |
379 | tokens.write_bit(0); | |
380 | pos += 1; | |
381 | } | |
382 | continue; | |
383 | } | |
384 | let range = tree.state[pos].get_range(); | |
385 | let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a)); | |
386 | if cur_max_bits == cur_bits { | |
387 | tokens.write_bit(1); | |
388 | match tree.state[pos] { | |
389 | TreeState::Twenty(val) => { | |
390 | tree.state[pos] = TreeState::Sixteen(val + 4); | |
391 | for i in 0..4 { | |
392 | let idx = usize::from(val) + i; | |
393 | if blen[idx] == cur_bits && flat[idx] != 0 { | |
394 | tokens.write_bit(0); | |
395 | tokens.write_coef(avals[idx], signs[idx], cur_bits); | |
396 | } else { | |
397 | tokens.write_bit(1); | |
398 | tree.add_to_head(TreeState::Candidate(idx as u8)); | |
399 | } | |
400 | } | |
401 | }, | |
402 | TreeState::Sixteen(val) => { | |
403 | tree.state[pos] = TreeState::Four(val); | |
404 | for i in 1u8..4 { | |
405 | tree.add_to_tail(TreeState::Four(val + i * 4)); | |
406 | } | |
407 | }, | |
408 | TreeState::Four(val) => { | |
409 | tree.state[pos] = TreeState::None; | |
410 | for i in 0..4 { | |
411 | let idx = usize::from(val) + i; | |
412 | if blen[idx] == cur_bits && flat[idx] != 0 { | |
413 | tokens.write_bit(0); | |
414 | tokens.write_coef(avals[idx], signs[idx], cur_bits); | |
415 | } else { | |
416 | tokens.write_bit(1); | |
417 | tree.add_to_head(TreeState::Candidate(idx as u8)); | |
418 | } | |
419 | } | |
420 | }, | |
421 | _ => unreachable!(), | |
422 | }; | |
423 | } else { | |
424 | tokens.write_bit(0); | |
425 | pos += 1; | |
426 | } | |
427 | } | |
428 | } | |
429 | } | |
430 | } | |
431 | ||
432 | #[derive(Clone,Copy,Debug,PartialEq)] | |
433 | enum TreeState { | |
434 | None, | |
435 | Twenty(u8), | |
436 | Sixteen(u8), | |
437 | Four(u8), | |
438 | Candidate(u8), | |
439 | } | |
440 | ||
441 | impl TreeState { | |
442 | fn get_range(self) -> std::ops::Range<usize> { | |
443 | let (base, len) = match self { | |
444 | TreeState::None => (0, 0), | |
445 | TreeState::Twenty(val) => (val, 20), | |
446 | TreeState::Sixteen(val) => (val, 16), | |
447 | TreeState::Four(val) => (val, 4), | |
448 | TreeState::Candidate(val) => (val, 1), | |
449 | }; | |
450 | usize::from(base)..usize::from(base + len) | |
451 | } | |
452 | } | |
453 | ||
454 | struct Tree { | |
455 | state: [TreeState; 128], | |
456 | start: usize, | |
457 | end: usize, | |
458 | } | |
459 | ||
460 | impl Tree { | |
461 | fn new(is_res: bool) -> Self { | |
462 | let mut state = [TreeState::None; 128]; | |
463 | let start = 64; | |
464 | let mut end = start; | |
465 | ||
466 | state[end] = TreeState::Twenty(4); | |
467 | end += 1; | |
468 | state[end] = TreeState::Twenty(24); | |
469 | end += 1; | |
470 | state[end] = TreeState::Twenty(44); | |
471 | end += 1; | |
472 | if is_res { | |
473 | state[end] = TreeState::Four(0); | |
474 | end += 1; | |
475 | } else { | |
476 | for i in 1..4 { | |
477 | state[end] = TreeState::Candidate(i); | |
478 | end += 1; | |
479 | } | |
480 | } | |
481 | Self { state, start, end } | |
482 | } | |
483 | fn add_to_tail(&mut self, ts: TreeState) { | |
484 | self.state[self.end] = ts; | |
485 | self.end += 1; | |
486 | } | |
487 | fn add_to_head(&mut self, ts: TreeState) { | |
488 | self.start -= 1; | |
489 | self.state[self.start] = ts; | |
490 | } | |
491 | } | |
492 | ||
493 | const A1: i32 = 2896; | |
494 | const A2: i32 = 2217; | |
495 | const A3: i32 = 3784; | |
496 | const A4: i32 = -5352; | |
497 | ||
498 | macro_rules! idct { | |
499 | ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => { | |
500 | let a0 = $src[$off + 0 * $sstep] + $src[$off + 4 * $sstep]; | |
501 | let a1 = $src[$off + 0 * $sstep] - $src[$off + 4 * $sstep]; | |
502 | let a2 = $src[$off + 2 * $sstep] + $src[$off + 6 * $sstep]; | |
503 | let a3 = A1.wrapping_mul($src[$off + 2 * $sstep] - $src[$off + 6 * $sstep]) >> 11; | |
504 | let a4 = $src[$off + 5 * $sstep] + $src[$off + 3 * $sstep]; | |
505 | let a5 = $src[$off + 5 * $sstep] - $src[$off + 3 * $sstep]; | |
506 | let a6 = $src[$off + 1 * $sstep] + $src[$off + 7 * $sstep]; | |
507 | let a7 = $src[$off + 1 * $sstep] - $src[$off + 7 * $sstep]; | |
508 | let b0 = a4 + a6; | |
509 | let b1 = A3.wrapping_mul(a5 + a7) >> 11; | |
510 | let b2 = (A4.wrapping_mul(a5) >> 11) - b0 + b1; | |
511 | let b3 = (A1.wrapping_mul(a6 - a4) >> 11) - b2; | |
512 | let b4 = (A2.wrapping_mul(a7) >> 11) + b3 - b1; | |
513 | let c0 = a0 + a2; | |
514 | let c1 = a0 - a2; | |
515 | let c2 = a1 + (a3 - a2); | |
516 | let c3 = a1 - (a3 - a2); | |
517 | ||
518 | $dst[$off + 0 * $dstep] = (c0 + b0 + $bias) >> $shift; | |
519 | $dst[$off + 1 * $dstep] = (c2 + b2 + $bias) >> $shift; | |
520 | $dst[$off + 2 * $dstep] = (c3 + b3 + $bias) >> $shift; | |
521 | $dst[$off + 3 * $dstep] = (c1 - b4 + $bias) >> $shift; | |
522 | $dst[$off + 4 * $dstep] = (c1 + b4 + $bias) >> $shift; | |
523 | $dst[$off + 5 * $dstep] = (c3 - b3 + $bias) >> $shift; | |
524 | $dst[$off + 6 * $dstep] = (c2 - b2 + $bias) >> $shift; | |
525 | $dst[$off + 7 * $dstep] = (c0 - b0 + $bias) >> $shift; | |
526 | }; | |
527 | } | |
528 | ||
529 | fn idct(coeffs: &mut [i32; 64]) { | |
530 | let mut tmp: [i32; 64] = [0; 64]; | |
531 | let mut row: [i32; 8] = [0; 8]; | |
532 | for i in 0..8 { | |
533 | idct!(coeffs, 8, tmp, 8, i, 0, 0); | |
534 | } | |
535 | for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) { | |
536 | idct!(srow, 1, row, 1, 0, 0x7F, 8); | |
537 | drow.copy_from_slice(&row); | |
538 | } | |
539 | } | |
540 | ||
541 | const B1: i32 = 2896; | |
542 | const B2: i32 = 3789; | |
543 | const B3: i32 = 1569; | |
544 | const B4: i32 = 4464; | |
545 | const B5: i32 = 6679; | |
546 | const B6: i32 = 1327; | |
547 | const B7: i32 = 888; | |
548 | macro_rules! dct { | |
549 | ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => { | |
550 | let a0 = $src[$off + 0 * $sstep] + $src[$off + 7 * $sstep]; | |
551 | let a1 = $src[$off + 0 * $sstep] - $src[$off + 7 * $sstep]; | |
552 | let a2 = $src[$off + 1 * $sstep] + $src[$off + 6 * $sstep]; | |
553 | let a3 = $src[$off + 1 * $sstep] - $src[$off + 6 * $sstep]; | |
554 | let a4 = $src[$off + 2 * $sstep] + $src[$off + 5 * $sstep]; | |
555 | let a5 = $src[$off + 2 * $sstep] - $src[$off + 5 * $sstep]; | |
556 | let a6 = $src[$off + 3 * $sstep] + $src[$off + 4 * $sstep]; | |
557 | let a7 = $src[$off + 3 * $sstep] - $src[$off + 4 * $sstep]; | |
558 | ||
559 | let b0 = (a0 + a4) << 7; | |
560 | let b1 = (a0 - a4) << 7; | |
561 | let b2 = (a2 + a6) << 7; | |
562 | let b3 = (a2 - a6) << 7; | |
563 | ||
564 | $dst[$off + 0 * $dstep] = (b0 + b2 + $bias) >> $shift; | |
565 | $dst[$off + 4 * $dstep] = (b1 - b3 + $bias) >> $shift; | |
566 | ||
567 | let c0 = (a0 - a6) << 7; | |
568 | let c1 = B1.wrapping_mul((b1 + b3) >> 7) >> 5; | |
569 | $dst[$off + 2 * $dstep] = (c0 + c1 + $bias) >> $shift; | |
570 | $dst[$off + 6 * $dstep] = (c0 - c1 + $bias) >> $shift; | |
571 | ||
572 | let d0 = B2.wrapping_mul(a1) + B3.wrapping_mul(a7); | |
573 | let d1 = ( d0 + B4.wrapping_mul(a5) + B5.wrapping_mul(a3) + (1 << 4)) >> 5; | |
574 | let d2 = (-d0 + B6.wrapping_mul(a5) - B7.wrapping_mul(a3) + (1 << 4)) >> 5; | |
575 | $dst[$off + 1 * $dstep] = ((a1 << 7) + d1 + $bias) >> $shift; | |
576 | $dst[$off + 7 * $dstep] = ((a1 << 7) + d2 + $bias) >> $shift; | |
577 | ||
578 | let e0 = B3.wrapping_mul(a1) - B2.wrapping_mul(a7); | |
579 | let e1 = ( e0 - B6.wrapping_mul(a3) - B5.wrapping_mul(a5) + (1 << 4)) >> 5; | |
580 | let e2 = (-e0 - B4.wrapping_mul(a3) + B7.wrapping_mul(a5) + (1 << 4)) >> 5; | |
581 | ||
582 | $dst[$off + 3 * $dstep] = ((a1 << 7) + e1 + $bias) >> $shift; | |
583 | $dst[$off + 5 * $dstep] = ((a1 << 7) + e2 + $bias) >> $shift; | |
584 | }; | |
585 | } | |
586 | ||
587 | fn dct(coeffs: &mut [i32; 64]) { | |
588 | let mut tmp: [i32; 64] = [0; 64]; | |
589 | let mut row: [i32; 8] = [0; 8]; | |
590 | for i in 0..8 { | |
591 | dct!(coeffs, 8, tmp, 8, i, 1, 1); | |
592 | } | |
593 | for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) { | |
594 | dct!(srow, 1, row, 1, 0, 0, 0); | |
595 | drow.copy_from_slice(&row); | |
596 | } | |
597 | } | |
598 |