Bink Video encoder (only 'b' version for now)
[nihav.git] / nihav-rad / src / codecs / binkvidenc / dsp.rs
CommitLineData
217de10b
KS
1use super::{BlockTokens, MAX_DIST, RateControl, calc_diff};
2use super::super::binkviddata::*;
3
4const BINK_INV_SCAN: [usize; 64] = [
5 0, 1, 4, 5, 8, 9, 12, 13,
6 2, 3, 6, 7, 10, 11, 14, 15,
7 24, 25, 44, 45, 16, 17, 20, 21,
8 26, 27, 46, 47, 18, 19, 22, 23,
9 28, 29, 32, 33, 48, 49, 52, 53,
10 30, 31, 34, 35, 50, 51, 54, 55,
11 36, 37, 40, 41, 56, 57, 60, 61,
12 38, 39, 42, 43, 58, 59, 62, 63
13];
14
15trait WriteBit {
16 fn write_bit(&mut self, val: u16);
17 fn write_coef(&mut self, aval: u32, sign: bool, bits: u8);
18}
19
20impl WriteBit for BlockTokens {
21 fn write_bit(&mut self, val: u16) {
22 self.other.push((val, 1));
23 }
24 fn write_coef(&mut self, aval: u32, sign: bool, bits: u8) {
25 if bits > 1 {
26 self.other.push((aval as u16 & ((1 << (bits - 1)) - 1), bits - 1));
27 }
28 self.write_bit(sign as u16);
29 }
30}
31
32pub struct DSP {
33 diff: [i16; 64],
34 dct_i: [u8; 64],
35 dct_p: [u8; 64],
36 qmats: QuantMats,
37 i_start: usize,
38 i_len: usize,
39 p_start: usize,
40 p_len: usize,
41}
42
43impl DSP {
44 pub fn new() -> Self {
45 let mut qmats = QuantMats::default();
46 qmats.calc_binkb_quants();
47 Self {
48 diff: [0; 64],
49 dct_i: [0; 64],
50 dct_p: [0; 64],
51 qmats,
52 i_start: 0,
53 i_len: 16,
54 p_start: 0,
55 p_len: 16,
56 }
57 }
58 pub fn get_diff(&mut self, mc_blk: &[u8; 64], cur_blk: &[u8; 64]) {
59 for (dst, (&prev, &cur)) in self.diff.iter_mut()
60 .zip(mc_blk.iter().zip(cur_blk.iter())) {
61 *dst = i16::from(cur) - i16::from(prev);
62 }
63 }
64 pub fn recon_residue(&self, dst: &mut [u8], dstride: usize, mc_blk: &[u8; 64]) {
65 for (dline, (prow, drow)) in dst.chunks_mut(dstride)
66 .zip(mc_blk.chunks_exact(8).zip(self.diff.chunks_exact(8))) {
67 for (dst, (&prev, &diff)) in dline.iter_mut().zip(prow.iter().zip(drow.iter())) {
68 *dst = (i16::from(prev) + diff) as u8;
69 }
70 }
71 }
72 pub fn recon_dct_i(&self, dst: &mut [u8], dstride: usize) {
73 for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_i.chunks_exact(8)) {
74 dline[..8].copy_from_slice(srow);
75 }
76 }
77 pub fn recon_dct_p(&self, dst: &mut [u8], dstride: usize) {
78 for (dline, srow) in dst.chunks_mut(dstride).zip(self.dct_p.chunks_exact(8)) {
79 dline[..8].copy_from_slice(srow);
80 }
81 }
82
83 pub fn try_residue(&self, tokens: &mut BlockTokens) -> u32 {
84 let mut tree = Tree::new(true);
85 let mut flat = [0; 64];
86 let mut blen = [0; 64];
87
88 for (&idx, &val) in BINK_INV_SCAN.iter().zip(self.diff.iter()) {
89 flat[idx] = val;
90 let aval = val.unsigned_abs();
91 let mut b = 0u8;
92 while (1 << b) <= aval {
93 b += 1;
94 }
95 blen[idx] = b.saturating_sub(1);
96 }
97
98 let mut max_val = 0;
99 let mut max_bits = 0;
100 let mut bits = 0;
101 let mut avals = [0; 64];
102 let mut signs = [false; 64];
103 for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut())
104 .zip(flat.iter().zip(blen.iter())) {
105 *aval = val.unsigned_abs();
106 *sign = val < 0;
107 max_val = max_val.max(*aval);
108 max_bits = max_bits.max(vlen);
109 bits += aval.count_ones();
110 }
111
112 if max_bits > 7 || bits > 127 {
113 return MAX_DIST;
114 }
115
116 tokens.nresidues.push(bits as u8);
117 tokens.other.push((max_bits as u16, 3));
118
119 let mut nz_cand = Vec::with_capacity(64);
120 let mut masks_left = bits + 1;
121 'tree_loop: for cur_bits in (0..=max_bits).rev() {
122 let mask = 1 << cur_bits;
123 for &idx in nz_cand.iter() {
124 tokens.write_bit(((avals[idx] & mask) != 0) as u16);
125 if (avals[idx] & mask) != 0 {
126 masks_left -= 1;
127 if masks_left == 0 {
128 break 'tree_loop;
129 }
130 }
131 }
132
133 let mut pos = tree.start;
134 while pos < tree.end {
135 if tree.state[pos] == TreeState::None {
136 pos += 1;
137 continue;
138 }
139 if let TreeState::Candidate(idx) = tree.state[pos] {
140 let idx = usize::from(idx);
141 if blen[idx] == cur_bits && flat[idx] != 0 {
142 tree.state[pos] = TreeState::None;
143 tokens.write_bit(1);
144 tokens.write_bit(signs[idx] as u16);
145 nz_cand.push(idx);
146 masks_left -= 1;
147 if masks_left == 0 {
148 break 'tree_loop;
149 }
150 } else {
151 tokens.write_bit(0);
152 pos += 1;
153 }
154 continue;
155 }
156 let range = tree.state[pos].get_range();
157 let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a));
158 if cur_max_bits == cur_bits {
159 tokens.write_bit(1);
160 match tree.state[pos] {
161 TreeState::Twenty(val) => {
162 tree.state[pos] = TreeState::Sixteen(val + 4);
163 for i in 0..4 {
164 let idx = usize::from(val) + i;
165 if blen[idx] == cur_bits && flat[idx] != 0 {
166 tokens.write_bit(0);
167 tokens.write_bit(signs[idx] as u16);
168 nz_cand.push(idx);
169 masks_left -= 1;
170 if masks_left == 0 {
171 break 'tree_loop;
172 }
173 } else {
174 tokens.write_bit(1);
175 tree.add_to_head(TreeState::Candidate(idx as u8));
176 }
177 }
178 },
179 TreeState::Sixteen(val) => {
180 tree.state[pos] = TreeState::Four(val);
181 for i in 1u8..4 {
182 tree.add_to_tail(TreeState::Four(val + i * 4));
183 }
184 },
185 TreeState::Four(val) => {
186 tree.state[pos] = TreeState::None;
187 for i in 0..4 {
188 let idx = usize::from(val) + i;
189 if blen[idx] == cur_bits && flat[idx] != 0 {
190 tokens.write_bit(0);
191 tokens.write_bit(signs[idx] as u16);
192 nz_cand.push(idx);
193 masks_left -= 1;
194 if masks_left == 0 {
195 break 'tree_loop;
196 }
197 } else {
198 tokens.write_bit(1);
199 tree.add_to_head(TreeState::Candidate(idx as u8));
200 }
201 }
202 },
203 _ => unreachable!(),
204 };
205 } else {
206 tokens.write_bit(0);
207 pos += 1;
208 }
209 }
210 }
211
212 0
213 }
214
215 pub fn set_quant_ranges(&mut self, ranges: [u8; 4]) {
216 self.i_start = usize::from(ranges[0]);
217 self.i_len = usize::from(ranges[1]);
218 self.p_start = usize::from(ranges[2]);
219 self.p_len = usize::from(ranges[3]);
220 }
221 pub fn try_dct_intra(&mut self, blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 {
222 tokens.clear();
223 if self.i_len == 0 {
224 return MAX_DIST;
225 }
226
227 let mut ref_coeffs = [0i32; 64];
228 for (dst, &src) in ref_coeffs.iter_mut().zip(blk.iter()) {
229 *dst = i32::from(src);
230 }
231 dct(&mut ref_coeffs);
232
233 let mut dct_out = [0u8; 64];
234 let qmats = if is_b { &self.qmats.intra_qmat } else { BINK_INTRA_QUANT };
235 for (qidx, qmat) in qmats.iter().enumerate().skip(self.i_start).take(self.i_len) {
236 let mut coeffs = ref_coeffs;
237 for (idx, el) in coeffs.iter_mut().enumerate() {
238 *el /= qmat[BINK_INV_SCAN[idx]];
239 }
240
241 if coeffs[0] >= 2048 {
242 continue;
243 }
244
245 tmp_tok.clear();
246 tmp_tok.intradc.push(coeffs[0] as u16);
247 Self::code_dct_coeffs(&coeffs, tmp_tok);
248 if is_b {
249 tmp_tok.intraq.push(qidx as u8);
250 } else {
251 tmp_tok.other.push((qidx as u16, 4));
252 }
253 let bits = tmp_tok.bits(is_b);
254 if rc.metric(0, bits) >= best_dist {
255 continue;
256 }
257
258 for (idx, el) in coeffs.iter_mut().enumerate() {
259 if *el != 0 {
260 *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11;
261 }
262 }
263 idct(&mut coeffs);
264 for (dst, &src) in dct_out.iter_mut().zip(coeffs.iter()) {
265 *dst = src as u8;
266 }
267 let diff = calc_diff(&dct_out, 8, blk, 8);
268 let dist = rc.metric(diff, bits);
269 if dist < best_dist {
270 best_dist = dist;
271 std::mem::swap(tokens, tmp_tok);
272 self.dct_i.copy_from_slice(&dct_out);
273 }
274 }
275
276 best_dist
277 }
278 pub fn try_dct_inter(&mut self, ref_blk: &[u8; 64], cur_blk: &[u8; 64], tokens: &mut BlockTokens, tmp_tok: &mut BlockTokens, is_b: bool, rc: &RateControl, mut best_dist: u32) -> u32 {
279 let mv_x = tokens.xoff[0];
280 let mv_y = tokens.yoff[0];
281
282 let mut ref_coeffs = [0i32; 64];
283 for (dst, &src) in ref_coeffs.iter_mut().zip(self.diff.iter()) {
284 *dst = i32::from(src);
285 }
286 dct(&mut ref_coeffs);
287
288 let mut dct_out = [0u8; 64];
289 let qmats = if is_b { &self.qmats.inter_qmat } else { BINK_INTER_QUANT };
290 for (qidx, qmat) in qmats.iter().enumerate().skip(self.p_start).take(self.p_len) {
291 let mut coeffs = ref_coeffs;
292
293 for (idx, el) in coeffs.iter_mut().enumerate() {
294 *el /= qmat[BINK_INV_SCAN[idx]];
295 }
296
297 if coeffs[0].unsigned_abs() >= 1024 {
298 continue;
299 }
300
301 tmp_tok.clear();
302 tmp_tok.interdc.push(coeffs[0] as i16);
303 tmp_tok.xoff.push(mv_x);
304 tmp_tok.yoff.push(mv_y);
305 Self::code_dct_coeffs(&coeffs, tmp_tok);
306 if is_b {
307 tmp_tok.interq.push(qidx as u8);
308 } else {
309 tmp_tok.other.push((qidx as u16, 4));
310 }
311 let bits = tmp_tok.bits(is_b);
312 if rc.metric(0, bits) >= best_dist {
313 continue;
314 }
315
316 for (idx, el) in coeffs.iter_mut().enumerate() {
317 if *el != 0 {
318 *el = (*el * qmat[BINK_INV_SCAN[idx]]) >> 11;
319 }
320 }
321 idct(&mut coeffs);
322 for (dst, (&prev, &diff)) in dct_out.iter_mut().zip(ref_blk.iter().zip(coeffs.iter())) {
323 *dst = (i32::from(prev) + diff) as u8;
324 }
325 let diff = calc_diff(&dct_out, 8, cur_blk, 8);
326 let dist = rc.metric(diff, bits);
327 if dist < best_dist {
328 best_dist = dist;
329 std::mem::swap(tokens, tmp_tok);
330 self.dct_p.copy_from_slice(&dct_out);
331 }
332 }
333
334 best_dist
335 }
336
337 fn code_dct_coeffs(coeffs: &[i32; 64], tokens: &mut BlockTokens) {
338 let mut tree = Tree::new(false);
339 let mut flat = [0; 64];
340 let mut blen = [0; 64];
341
342 for (&idx, &val) in BINK_INV_SCAN.iter().zip(coeffs.iter()).skip(1) {
343 flat[idx] = val;
344 let aval = val.unsigned_abs();
345 let mut b = 0u8;
346 while (1 << b) <= aval {
347 b += 1;
348 }
349 blen[idx] = b;
350 }
351
352 let mut max_val = 0;
353 let mut max_bits = 0;
354 let mut avals = [0; 64];
355 let mut signs = [false; 64];
356 for ((aval, sign), (&val, &vlen)) in avals.iter_mut().zip(signs.iter_mut())
357 .zip(flat.iter().zip(blen.iter())) {
358 *aval = val.unsigned_abs();
359 *sign = val < 0;
360 max_val = max_val.max(*aval);
361 max_bits = max_bits.max(vlen);
362 }
363
364 tokens.other.push((u16::from(max_bits), 4));
365 for cur_bits in (1..=max_bits).rev() {
366 let mut pos = tree.start;
367 while pos < tree.end {
368 if tree.state[pos] == TreeState::None {
369 pos += 1;
370 continue;
371 }
372 if let TreeState::Candidate(idx) = tree.state[pos] {
373 let idx = usize::from(idx);
374 if blen[idx] == cur_bits && flat[idx] != 0 {
375 tree.state[pos] = TreeState::None;
376 tokens.write_bit(1);
377 tokens.write_coef(avals[idx], signs[idx], cur_bits);
378 } else {
379 tokens.write_bit(0);
380 pos += 1;
381 }
382 continue;
383 }
384 let range = tree.state[pos].get_range();
385 let cur_max_bits = blen[range].iter().fold(0u8, |acc, &a| acc.max(a));
386 if cur_max_bits == cur_bits {
387 tokens.write_bit(1);
388 match tree.state[pos] {
389 TreeState::Twenty(val) => {
390 tree.state[pos] = TreeState::Sixteen(val + 4);
391 for i in 0..4 {
392 let idx = usize::from(val) + i;
393 if blen[idx] == cur_bits && flat[idx] != 0 {
394 tokens.write_bit(0);
395 tokens.write_coef(avals[idx], signs[idx], cur_bits);
396 } else {
397 tokens.write_bit(1);
398 tree.add_to_head(TreeState::Candidate(idx as u8));
399 }
400 }
401 },
402 TreeState::Sixteen(val) => {
403 tree.state[pos] = TreeState::Four(val);
404 for i in 1u8..4 {
405 tree.add_to_tail(TreeState::Four(val + i * 4));
406 }
407 },
408 TreeState::Four(val) => {
409 tree.state[pos] = TreeState::None;
410 for i in 0..4 {
411 let idx = usize::from(val) + i;
412 if blen[idx] == cur_bits && flat[idx] != 0 {
413 tokens.write_bit(0);
414 tokens.write_coef(avals[idx], signs[idx], cur_bits);
415 } else {
416 tokens.write_bit(1);
417 tree.add_to_head(TreeState::Candidate(idx as u8));
418 }
419 }
420 },
421 _ => unreachable!(),
422 };
423 } else {
424 tokens.write_bit(0);
425 pos += 1;
426 }
427 }
428 }
429 }
430}
431
432#[derive(Clone,Copy,Debug,PartialEq)]
433enum TreeState {
434 None,
435 Twenty(u8),
436 Sixteen(u8),
437 Four(u8),
438 Candidate(u8),
439}
440
441impl TreeState {
442 fn get_range(self) -> std::ops::Range<usize> {
443 let (base, len) = match self {
444 TreeState::None => (0, 0),
445 TreeState::Twenty(val) => (val, 20),
446 TreeState::Sixteen(val) => (val, 16),
447 TreeState::Four(val) => (val, 4),
448 TreeState::Candidate(val) => (val, 1),
449 };
450 usize::from(base)..usize::from(base + len)
451 }
452}
453
454struct Tree {
455 state: [TreeState; 128],
456 start: usize,
457 end: usize,
458}
459
460impl Tree {
461 fn new(is_res: bool) -> Self {
462 let mut state = [TreeState::None; 128];
463 let start = 64;
464 let mut end = start;
465
466 state[end] = TreeState::Twenty(4);
467 end += 1;
468 state[end] = TreeState::Twenty(24);
469 end += 1;
470 state[end] = TreeState::Twenty(44);
471 end += 1;
472 if is_res {
473 state[end] = TreeState::Four(0);
474 end += 1;
475 } else {
476 for i in 1..4 {
477 state[end] = TreeState::Candidate(i);
478 end += 1;
479 }
480 }
481 Self { state, start, end }
482 }
483 fn add_to_tail(&mut self, ts: TreeState) {
484 self.state[self.end] = ts;
485 self.end += 1;
486 }
487 fn add_to_head(&mut self, ts: TreeState) {
488 self.start -= 1;
489 self.state[self.start] = ts;
490 }
491}
492
493const A1: i32 = 2896;
494const A2: i32 = 2217;
495const A3: i32 = 3784;
496const A4: i32 = -5352;
497
498macro_rules! idct {
499 ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => {
500 let a0 = $src[$off + 0 * $sstep] + $src[$off + 4 * $sstep];
501 let a1 = $src[$off + 0 * $sstep] - $src[$off + 4 * $sstep];
502 let a2 = $src[$off + 2 * $sstep] + $src[$off + 6 * $sstep];
503 let a3 = A1.wrapping_mul($src[$off + 2 * $sstep] - $src[$off + 6 * $sstep]) >> 11;
504 let a4 = $src[$off + 5 * $sstep] + $src[$off + 3 * $sstep];
505 let a5 = $src[$off + 5 * $sstep] - $src[$off + 3 * $sstep];
506 let a6 = $src[$off + 1 * $sstep] + $src[$off + 7 * $sstep];
507 let a7 = $src[$off + 1 * $sstep] - $src[$off + 7 * $sstep];
508 let b0 = a4 + a6;
509 let b1 = A3.wrapping_mul(a5 + a7) >> 11;
510 let b2 = (A4.wrapping_mul(a5) >> 11) - b0 + b1;
511 let b3 = (A1.wrapping_mul(a6 - a4) >> 11) - b2;
512 let b4 = (A2.wrapping_mul(a7) >> 11) + b3 - b1;
513 let c0 = a0 + a2;
514 let c1 = a0 - a2;
515 let c2 = a1 + (a3 - a2);
516 let c3 = a1 - (a3 - a2);
517
518 $dst[$off + 0 * $dstep] = (c0 + b0 + $bias) >> $shift;
519 $dst[$off + 1 * $dstep] = (c2 + b2 + $bias) >> $shift;
520 $dst[$off + 2 * $dstep] = (c3 + b3 + $bias) >> $shift;
521 $dst[$off + 3 * $dstep] = (c1 - b4 + $bias) >> $shift;
522 $dst[$off + 4 * $dstep] = (c1 + b4 + $bias) >> $shift;
523 $dst[$off + 5 * $dstep] = (c3 - b3 + $bias) >> $shift;
524 $dst[$off + 6 * $dstep] = (c2 - b2 + $bias) >> $shift;
525 $dst[$off + 7 * $dstep] = (c0 - b0 + $bias) >> $shift;
526 };
527}
528
529fn idct(coeffs: &mut [i32; 64]) {
530 let mut tmp: [i32; 64] = [0; 64];
531 let mut row: [i32; 8] = [0; 8];
532 for i in 0..8 {
533 idct!(coeffs, 8, tmp, 8, i, 0, 0);
534 }
535 for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) {
536 idct!(srow, 1, row, 1, 0, 0x7F, 8);
537 drow.copy_from_slice(&row);
538 }
539}
540
541const B1: i32 = 2896;
542const B2: i32 = 3789;
543const B3: i32 = 1569;
544const B4: i32 = 4464;
545const B5: i32 = 6679;
546const B6: i32 = 1327;
547const B7: i32 = 888;
548macro_rules! dct {
549 ($src: expr, $sstep: expr, $dst: expr, $dstep: expr, $off: expr, $bias: expr, $shift: expr) => {
550 let a0 = $src[$off + 0 * $sstep] + $src[$off + 7 * $sstep];
551 let a1 = $src[$off + 0 * $sstep] - $src[$off + 7 * $sstep];
552 let a2 = $src[$off + 1 * $sstep] + $src[$off + 6 * $sstep];
553 let a3 = $src[$off + 1 * $sstep] - $src[$off + 6 * $sstep];
554 let a4 = $src[$off + 2 * $sstep] + $src[$off + 5 * $sstep];
555 let a5 = $src[$off + 2 * $sstep] - $src[$off + 5 * $sstep];
556 let a6 = $src[$off + 3 * $sstep] + $src[$off + 4 * $sstep];
557 let a7 = $src[$off + 3 * $sstep] - $src[$off + 4 * $sstep];
558
559 let b0 = (a0 + a4) << 7;
560 let b1 = (a0 - a4) << 7;
561 let b2 = (a2 + a6) << 7;
562 let b3 = (a2 - a6) << 7;
563
564 $dst[$off + 0 * $dstep] = (b0 + b2 + $bias) >> $shift;
565 $dst[$off + 4 * $dstep] = (b1 - b3 + $bias) >> $shift;
566
567 let c0 = (a0 - a6) << 7;
568 let c1 = B1.wrapping_mul((b1 + b3) >> 7) >> 5;
569 $dst[$off + 2 * $dstep] = (c0 + c1 + $bias) >> $shift;
570 $dst[$off + 6 * $dstep] = (c0 - c1 + $bias) >> $shift;
571
572 let d0 = B2.wrapping_mul(a1) + B3.wrapping_mul(a7);
573 let d1 = ( d0 + B4.wrapping_mul(a5) + B5.wrapping_mul(a3) + (1 << 4)) >> 5;
574 let d2 = (-d0 + B6.wrapping_mul(a5) - B7.wrapping_mul(a3) + (1 << 4)) >> 5;
575 $dst[$off + 1 * $dstep] = ((a1 << 7) + d1 + $bias) >> $shift;
576 $dst[$off + 7 * $dstep] = ((a1 << 7) + d2 + $bias) >> $shift;
577
578 let e0 = B3.wrapping_mul(a1) - B2.wrapping_mul(a7);
579 let e1 = ( e0 - B6.wrapping_mul(a3) - B5.wrapping_mul(a5) + (1 << 4)) >> 5;
580 let e2 = (-e0 - B4.wrapping_mul(a3) + B7.wrapping_mul(a5) + (1 << 4)) >> 5;
581
582 $dst[$off + 3 * $dstep] = ((a1 << 7) + e1 + $bias) >> $shift;
583 $dst[$off + 5 * $dstep] = ((a1 << 7) + e2 + $bias) >> $shift;
584 };
585}
586
587fn dct(coeffs: &mut [i32; 64]) {
588 let mut tmp: [i32; 64] = [0; 64];
589 let mut row: [i32; 8] = [0; 8];
590 for i in 0..8 {
591 dct!(coeffs, 8, tmp, 8, i, 1, 1);
592 }
593 for (drow, srow) in coeffs.chunks_exact_mut(8).zip(tmp.chunks_exact(8)) {
594 dct!(srow, 1, row, 1, 0, 0, 0);
595 drow.copy_from_slice(&row);
596 }
597}
598