]> git.nihav.org Git - nihav.git/blame - nihav-rad/src/codecs/binkvidenc/dsp.rs
avimux: do not record palette change chunks in OpenDML index
[nihav.git] / nihav-rad / src / codecs / binkvidenc / dsp.rs
CommitLineData
217de10b
KS
1use super::{BlockTokens, MAX_DIST, RateControl, calc_diff};
2use super::super::binkviddata::*;
3
4const BINK_INV_SCAN: [usize; 64] = [
5 0, 1, 4, 5, 8, 9, 12, 13,
6 2, 3, 6, 7, 10, 11, 14, 15,
7 24, 25, 44, 45, 16, 17, 20, 21,
8 26, 27, 46, 47, 18, 19, 22, 23,
9 28, 29, 32, 33, 48, 49, 52, 53,
10 30, 31, 34, 35, 50, 51, 54, 55,
11 36, 37, 40, 41, 56, 57, 60, 61,
12 38, 39, 42, 43, 58, 59, 62, 63
13];
14
15trait WriteBit {
16 fn write_bit(&mut self, val: u16);
17 fn write_coef(&mut self, aval: u32, sign: bool, bits: u8);
18}
19
20impl WriteBit for BlockTokens {
21 fn write_bit(&mut self, val: u16) {
22 self.other.push((val, 1));
23 }
24 fn write_coef(&mut self, aval: u32, sign: bool, bits: u8) {
25 if bits > 1 {
26 self.other.push((aval as u16 & ((1 << (bits - 1)) - 1), bits - 1));
27 }
28 self.write_bit(sign as u16);
29 }
30}
31
32pub struct DSP {
33 diff: [i16; 64],
34 dct_i: [u8; 64],
35 dct_p: [u8; 64],
36 qmats: QuantMats,
37 i_start: usize,
38 i_len: usize,
39 p_start: usize,
40 p_len: usize,
41}
42
43impl DSP {
44 pub fn new() -> Self {
45 let mut qmats = QuantMats::default();
46 qmats.calc_binkb_quants();
47 Self {
48 diff: [0; 64],
49 dct_i: [0; 64],
50 dct_p: [0; 64],
51 qmats,
52 i_start: 0,
53 i_len: 16,
54 p_start: 0,
55 p_len: 16,
56 }
57 }
58 pub fn get_diff(&mut self, mc_blk: &[u8; 64], cur_blk: &[u8; 64]) {
59 for (dst, (&prev, &cur)) in self.diff.iter_mut()
60 .zip(mc_blk.iter().zip(cur_blk.iter())) {
61 *dst = i16::from(cur) - i16::from(prev);
62 }
63 }
64 pub fn recon_residue(&self, dst: &mut [u8], dstride: usize, mc_blk: &[u8; 64]) {
65 for (dline, (prow, drow)) in dst.chunks_mut(dstride)
66 .zip(mc_blk.chunks_exact(8).zip(self.diff.chunks_exact(8))) {
67 for (dst, (&prev, &diff)) in dline.iter_mut().zip(prow.iter().zip(drow.iter())) {
68 *dst = (i16::from(prev) + diff) as u8;
69 }
70 }
71 }
72 pub fn recon_dct_i(&self, dst: &mut [u8], dstride: usize) {
73 for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_i.chunks_exact(8)) {
74 dline[..8].copy_from_slice(srow);
75 }
76 }
77 pub fn recon_dct_p(&self, dst: &mut [u8], dstride: usize) {
78 for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_p.chunks_exact(8)) {
79 dline[..8].copy_from_slice(srow);
80 }
81 }
82
83 pub fn try_residue(&self, tokens: &mut BlockTokens) -> u32 {
84 let mut tree = Tree::new(true);
85 let mut flat = [0; 64];
86 let mut blen = [0; 64];
87
88 for (&idx, &val) in BINK_INV_SCAN.iter().zip(self.diff.iter()) {
89 flat[idx] = val;
90 let aval = val.unsigned_abs();
91 let mut b = 0u8;
92 while (1 << b) <= aval {
93 b += 1;
94 }
95 blen[idx] = b.saturating_sub(1);
96 }
97
98 let mut max_val = 0;
99 let mut max_bits = 0;
100 let mut bits = 0;
101 let mut avals = [0; 64];
102 let mut signs = [false; 64];
103 for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut())
104 .zip(flat.iter().zip(blen.iter())) {
105 *aval = val.unsigned_abs();
106 *sign = val < 0;
107 max_val = max_val.max(*aval);
108 max_bits = max_bits.max(vlen);
109 bits += aval.count_ones();
110 }
111
112 if max_bits > 7 || bits > 127 {
113 return MAX_DIST;
114 }
115
116 tokens.nresidues.push(bits as u8);
117 tokens.other.push((max_bits as u16, 3));
118
119 let mut nz_cand = Vec::with_capacity(64);
120 let mut masks_left = bits + 1;
121 'tree_loop: for cur_bits in (0..=max_bits).rev() {
122 let mask = 1 << cur_bits;
123 for &idx in nz_cand.iter() {
124 tokens.write_bit(((avals[idx] & mask) != 0) as u16);
125 if (avals[idx] & mask) != 0 {
126 masks_left -= 1;
127 if masks_left == 0 {
128 break 'tree_loop;
129 }
130 }
131 }
132
133 let mut pos = tree.start;
134 while pos < tree.end {
135 if tree.state[pos] == TreeState::None {
136 pos += 1;
137 continue;
138 }
139 if let TreeState::Candidate(idx) = tree.state[pos] {
140 let idx = usize::from(idx);
141 if blen[idx] == cur_bits && flat[idx] != 0 {
142 tree.state[pos] = TreeState::None;
143 tokens.write_bit(1);
144 tokens.write_bit(signs[idx] as u16);
145 nz_cand.push(idx);
146 masks_left -= 1;
147 if masks_left == 0 {
148 break 'tree_loop;
149 }
150 } else {
151 tokens.write_bit(0);
152 pos += 1;
153 }
154 continue;
155 }
156 let range = tree.state[pos].get_range();
157 let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a));
158 if cur_max_bits == cur_bits {
159 tokens.write_bit(1);
160 match tree.state[pos] {
161 TreeState::Twenty(val) => {
162 tree.state[pos] = TreeState::Sixteen(val + 4);
163 for i in 0..4 {
164 let idx = usize::from(val) + i;
165 if blen[idx] == cur_bits && flat[idx] != 0 {
166 tokens.write_bit(0);
167 tokens.write_bit(signs[idx] as u16);
168 nz_cand.push(idx);
169 masks_left -= 1;
170 if masks_left == 0 {
171 break 'tree_loop;
172 }
173 } else {
174 tokens.write_bit(1);
175 tree.add_to_head(TreeState::Candidate(idx as u8));
176 }
177 }
178 },
179 TreeState::Sixteen(val) => {
180 tree.state[pos] = TreeState::Four(val);
181 for i in 1u8..4 {
182 tree.add_to_tail(TreeState::Four(val + i * 4));
183 }
184 },
185 TreeState::Four(val) => {
186 tree.state[pos] = TreeState::None;
187 for i in 0..4 {
188 let idx = usize::from(val) + i;
189 if blen[idx] == cur_bits && flat[idx] != 0 {
190 tokens.write_bit(0);
191 tokens.write_bit(signs[idx] as u16);
192 nz_cand.push(idx);
193 masks_left -= 1;
194 if masks_left == 0 {
195 break 'tree_loop;
196 }
197 } else {
198 tokens.write_bit(1);
199 tree.add_to_head(TreeState::Candidate(idx as u8));
200 }
201 }
202 },
203 _ => unreachable!(),
204 };
205 } else {
206 tokens.write_bit(0);
207 pos += 1;
208 }
209 }
210 }
211
212 0
213 }
214
215 pub fn set_quant_ranges(&mut self, ranges: [u8; 4]) {
216 self.i_start = usize::from(ranges[0]);
217 self.i_len = usize::from(ranges[1]);
218 self.p_start = usize::from(ranges[2]);
219 self.p_len = usize::from(ranges[3]);
220 }
221 pub fn try_dct_intra(&mut self, blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 {
222 tokens.clear();
223 if self.i_len == 0 {
224 return MAX_DIST;
225 }
226
227 let mut ref_coeffs = [0i32; 64];
228 for (dst, &src) in ref_coeffs.iter_mut().zip(blk.iter()) {
229 *dst = i32::from(src);
230 }
231 dct(&mut ref_coeffs);
232
233 let mut dct_out = [0u8; 64];
234 let qmats = if is_b { &self.qmats.intra_qmat } else { BINK_INTRA_QUANT };
235 for (qidx, qmat) in qmats.iter().enumerate().skip(self.i_start).take(self.i_len) {
236 let mut coeffs = ref_coeffs;
237 for (idx, el) in coeffs.iter_mut().enumerate() {
238 *el /= qmat[BINK_INV_SCAN[idx]];
239 }
240
241 if coeffs[0] >= 2048 {
242 continue;
243 }
244
245 tmp_tok.clear();
246 tmp_tok.intradc.push(coeffs[0] as u16);
247 Self::code_dct_coeffs(&coeffs, tmp_tok);
248 if is_b {
249 tmp_tok.intraq.push(qidx as u8);
250 } else {
251 tmp_tok.other.push((qidx as u16, 4));
252 }
253 let bits = tmp_tok.bits(is_b);
254 if rc.metric(0, bits) >= best_dist {
255 continue;
256 }
257
258 for (idx, el) in coeffs.iter_mut().enumerate() {
259 if *el != 0 {
260 *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11;
261 }
262 }
263 idct(&mut coeffs);
264 for (dst, &src) in dct_out.iter_mut().zip(coeffs.iter()) {
265 *dst = src as u8;
266 }
267 let diff = calc_diff(&dct_out, 8, blk, 8);
268 let dist = rc.metric(diff, bits);
269 if dist < best_dist {
270 best_dist = dist;
271 std::mem::swap(tokens, tmp_tok);
272 self.dct_i.copy_from_slice(&dct_out);
273 }
274 }
275
276 best_dist
277 }
3687b8b3 278 #[allow(clippy::too_many_arguments)]
217de10b
KS
279 pub fn try_dct_inter(&mut self, ref_blk: &[u8; 64], cur_blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 {
280 let mv_x = tokens.xoff[0];
281 let mv_y = tokens.yoff[0];
282
283 let mut ref_coeffs = [0i32; 64];
284 for (dst, &src) in ref_coeffs.iter_mut().zip(self.diff.iter()) {
285 *dst = i32::from(src);
286 }
287 dct(&mut ref_coeffs);
288
289 let mut dct_out = [0u8; 64];
290 let qmats = if is_b { &self.qmats.inter_qmat } else { BINK_INTER_QUANT };
291 for (qidx, qmat) in qmats.iter().enumerate().skip(self.p_start).take(self.p_len) {
292 let mut coeffs = ref_coeffs;
293
294 for (idx, el) in coeffs.iter_mut().enumerate() {
295 *el /= qmat[BINK_INV_SCAN[idx]];
296 }
297
298 if coeffs[0].unsigned_abs() >= 1024 {
299 continue;
300 }
301
302 tmp_tok.clear();
303 tmp_tok.interdc.push(coeffs[0] as i16);
304 tmp_tok.xoff.push(mv_x);
305 tmp_tok.yoff.push(mv_y);
306 Self::code_dct_coeffs(&coeffs, tmp_tok);
307 if is_b {
308 tmp_tok.interq.push(qidx as u8);
309 } else {
310 tmp_tok.other.push((qidx as u16, 4));
311 }
312 let bits = tmp_tok.bits(is_b);
313 if rc.metric(0, bits) >= best_dist {
314 continue;
315 }
316
317 for (idx, el) in coeffs.iter_mut().enumerate() {
318 if *el != 0 {
319 *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11;
320 }
321 }
322 idct(&mut coeffs);
323 for (dst, (&prev, &diff)) in dct_out.iter_mut().zip(ref_blk.iter().zip(coeffs.iter())) {
324 *dst = (i32::from(prev) + diff) as u8;
325 }
326 let diff = calc_diff(&dct_out, 8, cur_blk, 8);
327 let dist = rc.metric(diff, bits);
328 if dist < best_dist {
329 best_dist = dist;
330 std::mem::swap(tokens, tmp_tok);
331 self.dct_p.copy_from_slice(&dct_out);
332 }
333 }
334
335 best_dist
336 }
337
338 fn code_dct_coeffs(coeffs: &[i32; 64], tokens: &mut BlockTokens) {
339 let mut tree = Tree::new(false);
340 let mut flat = [0; 64];
341 let mut blen = [0; 64];
342
343 for (&idx, &val) in BINK_INV_SCAN.iter().zip(coeffs.iter()).skip(1) {
344 flat[idx] = val;
345 let aval = val.unsigned_abs();
346 let mut b = 0u8;
347 while (1 << b) <= aval {
348 b += 1;
349 }
350 blen[idx] = b;
351 }
352
353 let mut max_val = 0;
354 let mut max_bits = 0;
355 let mut avals = [0; 64];
356 let mut signs = [false; 64];
357 for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut())
358 .zip(flat.iter().zip(blen.iter())) {
359 *aval = val.unsigned_abs();
360 *sign = val < 0;
361 max_val = max_val.max(*aval);
362 max_bits = max_bits.max(vlen);
363 }
364
365 tokens.other.push((u16::from(max_bits), 4));
366 for cur_bits in (1..=max_bits).rev() {
367 let mut pos = tree.start;
368 while pos < tree.end {
369 if tree.state[pos] == TreeState::None {
370 pos += 1;
371 continue;
372 }
373 if let TreeState::Candidate(idx) = tree.state[pos] {
374 let idx = usize::from(idx);
375 if blen[idx] == cur_bits && flat[idx] != 0 {
376 tree.state[pos] = TreeState::None;
377 tokens.write_bit(1);
378 tokens.write_coef(avals[idx], signs[idx], cur_bits);
379 } else {
380 tokens.write_bit(0);
381 pos += 1;
382 }
383 continue;
384 }
385 let range = tree.state[pos].get_range();
386 let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a));
387 if cur_max_bits == cur_bits {
388 tokens.write_bit(1);
389 match tree.state[pos] {
390 TreeState::Twenty(val) => {
391 tree.state[pos] = TreeState::Sixteen(val + 4);
392 for i in 0..4 {
393 let idx = usize::from(val) + i;
394 if blen[idx] == cur_bits && flat[idx] != 0 {
395 tokens.write_bit(0);
396 tokens.write_coef(avals[idx], signs[idx], cur_bits);
397 } else {
398 tokens.write_bit(1);
399 tree.add_to_head(TreeState::Candidate(idx as u8));
400 }
401 }
402 },
403 TreeState::Sixteen(val) => {
404 tree.state[pos] = TreeState::Four(val);
405 for i in 1u8..4 {
406 tree.add_to_tail(TreeState::Four(val + i * 4));
407 }
408 },
409 TreeState::Four(val) => {
410 tree.state[pos] = TreeState::None;
411 for i in 0..4 {
412 let idx = usize::from(val) + i;
413 if blen[idx] == cur_bits && flat[idx] != 0 {
414 tokens.write_bit(0);
415 tokens.write_coef(avals[idx], signs[idx], cur_bits);
416 } else {
417 tokens.write_bit(1);
418 tree.add_to_head(TreeState::Candidate(idx as u8));
419 }
420 }
421 },
422 _ => unreachable!(),
423 };
424 } else {
425 tokens.write_bit(0);
426 pos += 1;
427 }
428 }
429 }
430 }
431}
432
433#[derive(Clone,Copy,Debug,PartialEq)]
434enum TreeState {
435 None,
436 Twenty(u8),
437 Sixteen(u8),
438 Four(u8),
439 Candidate(u8),
440}
441
442impl TreeState {
443 fn get_range(self) -> std::ops::Range<usize> {
444 let (base, len) = match self {
445 TreeState::None => (0, 0),
446 TreeState::Twenty(val) => (val, 20),
447 TreeState::Sixteen(val) => (val, 16),
448 TreeState::Four(val) => (val, 4),
449 TreeState::Candidate(val) => (val, 1),
450 };
451 usize::from(base)..usize::from(base + len)
452 }
453}
454
455struct Tree {
456 state: [TreeState; 128],
457 start: usize,
458 end: usize,
459}
460
461impl Tree {
462 fn new(is_res: bool) -> Self {
463 let mut state = [TreeState::None; 128];
464 let start = 64;
465 let mut end = start;
466
467 state[end] = TreeState::Twenty(4);
468 end += 1;
469 state[end] = TreeState::Twenty(24);
470 end += 1;
471 state[end] = TreeState::Twenty(44);
472 end += 1;
473 if is_res {
474 state[end] = TreeState::Four(0);
475 end += 1;
476 } else {
477 for i in 1..4 {
478 state[end] = TreeState::Candidate(i);
479 end += 1;
480 }
481 }
482 Self { state, start, end }
483 }
484 fn add_to_tail(&mut self, ts: TreeState) {
485 self.state[self.end] = ts;
486 self.end += 1;
487 }
488 fn add_to_head(&mut self, ts: TreeState) {
489 self.start -= 1;
490 self.state[self.start] = ts;
491 }
492}
493
494const A1: i32 = 2896;
495const A2: i32 = 2217;
496const A3: i32 = 3784;
497const A4: i32 = -5352;
498
499macro_rules! idct {
500 ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => {
501 let a0 = $src[$off + 0 * $sstep] + $src[$off + 4 * $sstep];
502 let a1 = $src[$off + 0 * $sstep] - $src[$off + 4 * $sstep];
503 let a2 = $src[$off + 2 * $sstep] + $src[$off + 6 * $sstep];
504 let a3 = A1.wrapping_mul($src[$off + 2 * $sstep] - $src[$off + 6 * $sstep]) >> 11;
505 let a4 = $src[$off + 5 * $sstep] + $src[$off + 3 * $sstep];
506 let a5 = $src[$off + 5 * $sstep] - $src[$off + 3 * $sstep];
507 let a6 = $src[$off + 1 * $sstep] + $src[$off + 7 * $sstep];
508 let a7 = $src[$off + 1 * $sstep] - $src[$off + 7 * $sstep];
509 let b0 = a4 + a6;
510 let b1 = A3.wrapping_mul(a5 + a7) >> 11;
511 let b2 = (A4.wrapping_mul(a5) >> 11) - b0 + b1;
512 let b3 = (A1.wrapping_mul(a6 - a4) >> 11) - b2;
513 let b4 = (A2.wrapping_mul(a7) >> 11) + b3 - b1;
514 let c0 = a0 + a2;
515 let c1 = a0 - a2;
516 let c2 = a1 + (a3 - a2);
517 let c3 = a1 - (a3 - a2);
518
519 $dst[$off + 0 * $dstep] = (c0 + b0 + $bias) >> $shift;
520 $dst[$off + 1 * $dstep] = (c2 + b2 + $bias) >> $shift;
521 $dst[$off + 2 * $dstep] = (c3 + b3 + $bias) >> $shift;
522 $dst[$off + 3 * $dstep] = (c1 - b4 + $bias) >> $shift;
523 $dst[$off + 4 * $dstep] = (c1 + b4 + $bias) >> $shift;
524 $dst[$off + 5 * $dstep] = (c3 - b3 + $bias) >> $shift;
525 $dst[$off + 6 * $dstep] = (c2 - b2 + $bias) >> $shift;
526 $dst[$off + 7 * $dstep] = (c0 - b0 + $bias) >> $shift;
527 };
528}
529
530fn idct(coeffs: &mut [i32; 64]) {
531 let mut tmp: [i32; 64] = [0; 64];
532 let mut row: [i32; 8] = [0; 8];
533 for i in 0..8 {
534 idct!(coeffs, 8, tmp, 8, i, 0, 0);
535 }
536 for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) {
537 idct!(srow, 1, row, 1, 0, 0x7F, 8);
538 drow.copy_from_slice(&row);
539 }
540}
541
542const B1: i32 = 2896;
543const B2: i32 = 3789;
544const B3: i32 = 1569;
545const B4: i32 = 4464;
546const B5: i32 = 6679;
547const B6: i32 = 1327;
548const B7: i32 = 888;
549macro_rules! dct {
550 ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => {
551 let a0 = $src[$off + 0 * $sstep] + $src[$off + 7 * $sstep];
552 let a1 = $src[$off + 0 * $sstep] - $src[$off + 7 * $sstep];
553 let a2 = $src[$off + 1 * $sstep] + $src[$off + 6 * $sstep];
554 let a3 = $src[$off + 1 * $sstep] - $src[$off + 6 * $sstep];
555 let a4 = $src[$off + 2 * $sstep] + $src[$off + 5 * $sstep];
556 let a5 = $src[$off + 2 * $sstep] - $src[$off + 5 * $sstep];
557 let a6 = $src[$off + 3 * $sstep] + $src[$off + 4 * $sstep];
558 let a7 = $src[$off + 3 * $sstep] - $src[$off + 4 * $sstep];
559
560 let b0 = (a0 + a4) << 7;
561 let b1 = (a0 - a4) << 7;
562 let b2 = (a2 + a6) << 7;
563 let b3 = (a2 - a6) << 7;
564
565 $dst[$off + 0 * $dstep] = (b0 + b2 + $bias) >> $shift;
566 $dst[$off + 4 * $dstep] = (b1 - b3 + $bias) >> $shift;
567
568 let c0 = (a0 - a6) << 7;
569 let c1 = B1.wrapping_mul((b1 + b3) >> 7) >> 5;
570 $dst[$off + 2 * $dstep] = (c0 + c1 + $bias) >> $shift;
571 $dst[$off + 6 * $dstep] = (c0 - c1 + $bias) >> $shift;
572
573 let d0 = B2.wrapping_mul(a1) + B3.wrapping_mul(a7);
574 let d1 = ( d0 + B4.wrapping_mul(a5) + B5.wrapping_mul(a3) + (1 << 4)) >> 5;
575 let d2 = (-d0 + B6.wrapping_mul(a5) - B7.wrapping_mul(a3) + (1 << 4)) >> 5;
576 $dst[$off + 1 * $dstep] = ((a1 << 7) + d1 + $bias) >> $shift;
577 $dst[$off + 7 * $dstep] = ((a1 << 7) + d2 + $bias) >> $shift;
578
579 let e0 = B3.wrapping_mul(a1) - B2.wrapping_mul(a7);
580 let e1 = ( e0 - B6.wrapping_mul(a3) - B5.wrapping_mul(a5) + (1 << 4)) >> 5;
581 let e2 = (-e0 - B4.wrapping_mul(a3) + B7.wrapping_mul(a5) + (1 << 4)) >> 5;
582
583 $dst[$off + 3 * $dstep] = ((a1 << 7) + e1 + $bias) >> $shift;
584 $dst[$off + 5 * $dstep] = ((a1 << 7) + e2 + $bias) >> $shift;
585 };
586}
587
588fn dct(coeffs: &mut [i32; 64]) {
589 let mut tmp: [i32; 64] = [0; 64];
590 let mut row: [i32; 8] = [0; 8];
591 for i in 0..8 {
592 dct!(coeffs, 8, tmp, 8, i, 1, 1);
593 }
594 for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) {
595 dct!(srow, 1, row, 1, 0, 0, 0);
596 drow.copy_from_slice(&row);
597 }
598}
599