h264: miscellaneous micro-optimisations
[nihav.git] / nihav-itu / src / codecs / h264 / types.rs
1 use nihav_core::frame::NASimpleVideoFrame;
2 use nihav_codec_support::codecs::{MV, ZERO_MV};
3 use nihav_codec_support::data::GenericCache;
4 use super::SliceRefs;
5 use super::pic_ref::FrameMBInfo;
6
7 #[repr(u8)]
8 #[derive(Clone,Copy,Debug,PartialEq)]
9 pub enum BMode {
10 L0,
11 L1,
12 Bi,
13 }
14
15 #[derive(Clone,Copy,Debug,PartialEq,Default)]
16 pub enum MBType {
17 #[default]
18 Intra4x4,
19 Intra8x8,
20 Intra16x16(u8, u8, u8),
21 PCM,
22
23 P16x16,
24 P16x8,
25 P8x16,
26 P8x8,
27 P8x8Ref0,
28 PSkip,
29
30 Direct,
31 B16x16(BMode),
32 B16x8(BMode, BMode),
33 B8x16(BMode, BMode),
34 B8x8,
35 BSkip,
36 }
37
38 impl MBType {
39 pub fn is_intra(self) -> bool {
40 matches!(self, MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM)
41 }
42 pub fn is_intra16x16(self) -> bool {
43 matches!(self, MBType::Intra16x16(_, _, _))
44 }
45 pub fn is_skip(self) -> bool {
46 matches!(self, MBType::PSkip | MBType::BSkip)
47 }
48 pub fn is_4x4(self) -> bool { self.num_parts() == 4 }
49 pub fn is_l0(self, part: usize) -> bool {
50 match self {
51 MBType::B16x16(mode) => mode == BMode::L0,
52 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
53 if part == 0 {
54 mode0 == BMode::L0
55 } else {
56 mode1 == BMode::L0
57 }
58 },
59 MBType::Direct | MBType::BSkip => false,
60 _ => true,
61 }
62 }
63 pub fn is_l1(self, part: usize) -> bool {
64 match self {
65 MBType::B16x16(mode) => mode == BMode::L1,
66 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
67 if part == 0 {
68 mode0 == BMode::L1
69 } else {
70 mode1 == BMode::L1
71 }
72 },
73 _ => false,
74 }
75 }
76 pub fn num_parts(self) -> usize {
77 match self {
78 MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM |
79 MBType::PSkip |
80 MBType::Direct | MBType::BSkip
81 => 1,
82 MBType::P16x16 |
83 MBType::B16x16(_)
84 => 1,
85 MBType::P16x8 | MBType::P8x16 |
86 MBType::B16x8(_, _) | MBType::B8x16(_, _)
87 => 2,
88 _ => 4,
89 }
90 }
91 pub fn size(self) -> (usize, usize) {
92 match self {
93 MBType::Intra4x4 |
94 MBType::Intra8x8 |
95 MBType::Intra16x16(_, _, _) |
96 MBType::PCM |
97 MBType::P16x16 |
98 MBType::PSkip |
99 MBType::Direct |
100 MBType::B16x16(_) |
101 MBType::BSkip
102 => (16, 16),
103 MBType::P16x8 | MBType::B16x8(_, _) => (16, 8),
104 MBType::P8x16 | MBType::B8x16(_, _) => (8, 16),
105 _ => (8, 8),
106 }
107 }
108 }
109
110 #[derive(Clone,Copy,Debug,PartialEq,Default)]
111 pub enum SubMBType {
112 P8x8,
113 P8x4,
114 P4x8,
115 P4x4,
116 #[default]
117 Direct8x8,
118 B8x8(BMode),
119 B8x4(BMode),
120 B4x8(BMode),
121 B4x4(BMode),
122 }
123
124 impl SubMBType {
125 pub fn num_parts(self) -> usize {
126 match self {
127 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => 1,
128 SubMBType::P4x4 | SubMBType::B4x4(_) => 4,
129 _ => 2,
130 }
131 }
132 pub fn size(self) -> (usize, usize) {
133 match self {
134 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => (8, 8),
135 SubMBType::P8x4 | SubMBType::B8x4(_) => (8, 4),
136 SubMBType::P4x8 | SubMBType::B4x8(_) => (4, 8),
137 SubMBType::P4x4 | SubMBType::B4x4(_) => (4, 4),
138 }
139 }
140 pub fn is_l0(self) -> bool {
141 match self {
142 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
143 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
144 mode == BMode::L0
145 },
146 _ => true,
147 }
148 }
149 pub fn is_l1(self) -> bool {
150 match self {
151 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
152 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
153 mode == BMode::L1
154 },
155 _ => false,
156 }
157 }
158 }
159
160 #[repr(u8)]
161 #[derive(Clone,Copy,Debug,PartialEq,Default)]
162 pub enum CompactMBType {
163 Intra4x4,
164 Intra8x8,
165 Intra16x16,
166 PCM,
167
168 P16x16,
169 P16x8,
170 P8x16,
171 P8x8,
172 P8x8Ref0,
173 PSkip,
174
175 Direct,
176 B16x16,
177 B16x8,
178 B8x16,
179 B8x8,
180 BSkip,
181
182 #[default]
183 None,
184 }
185
186 impl CompactMBType {
187 pub fn is_intra(self) -> bool {
188 matches!(self, CompactMBType::Intra4x4 | CompactMBType::Intra8x8 | CompactMBType::Intra16x16)
189 }
190 pub fn is_intra16orpcm(self) -> bool {
191 matches!(self, CompactMBType::Intra16x16 | CompactMBType::PCM)
192 }
193 pub fn is_skip(self) -> bool {
194 matches!(self, CompactMBType::PSkip | CompactMBType::BSkip)
195 }
196 pub fn is_direct(self) -> bool {
197 matches!(self, CompactMBType::BSkip | CompactMBType::Direct | CompactMBType::None)
198 }
199 pub fn is_inter(self) -> bool {
200 !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM
201 }
202 pub fn is_16x16_ref(self) -> bool {
203 matches!(self,
204 CompactMBType::Intra4x4 |
205 CompactMBType::Intra8x8 |
206 CompactMBType::Intra16x16 |
207 CompactMBType::PCM |
208 CompactMBType::P16x16 |
209 CompactMBType::B16x16)
210 }
211 }
212
213 impl From<MBType> for CompactMBType {
214 fn from(mbtype: MBType) -> Self {
215 match mbtype {
216 MBType::Intra4x4 => CompactMBType::Intra4x4,
217 MBType::Intra8x8 => CompactMBType::Intra8x8,
218 MBType::Intra16x16(_, _, _) => CompactMBType::Intra16x16,
219 MBType::PCM => CompactMBType::PCM,
220 MBType::P16x16 => CompactMBType::P16x16,
221 MBType::P16x8 => CompactMBType::P16x8,
222 MBType::P8x16 => CompactMBType::P8x16,
223 MBType::P8x8 => CompactMBType::P8x8,
224 MBType::P8x8Ref0 => CompactMBType::P8x8Ref0,
225 MBType::PSkip => CompactMBType::PSkip,
226 MBType::Direct => CompactMBType::Direct,
227 MBType::B16x16(_) => CompactMBType::B16x16,
228 MBType::B16x8(_, _) => CompactMBType::B16x8,
229 MBType::B8x16(_, _) => CompactMBType::B8x16,
230 MBType::B8x8 => CompactMBType::B8x8,
231 MBType::BSkip => CompactMBType::BSkip,
232 }
233 }
234 }
235
236 #[repr(u8)]
237 #[derive(Clone,Copy,Debug,PartialEq,Default)]
238 pub enum IntraPredMode {
239 Vertical,
240 Horizontal,
241 DC,
242 DiagDownLeft,
243 DiagDownRight,
244 VerRight,
245 HorDown,
246 VerLeft,
247 HorUp,
248 #[default]
249 None,
250 }
251
252 impl IntraPredMode {
253 pub fn is_none(self) -> bool { self == IntraPredMode::None }
254 pub fn into_pred_idx(self) -> i8 {
255 if !self.is_none() {
256 self as u8 as i8
257 } else {
258 -1
259 }
260 }
261 }
262
263 impl From<u8> for IntraPredMode {
264 fn from(val: u8) -> Self {
265 match val {
266 0 => IntraPredMode::Vertical,
267 1 => IntraPredMode::Horizontal,
268 2 => IntraPredMode::DC,
269 3 => IntraPredMode::DiagDownLeft,
270 4 => IntraPredMode::DiagDownRight,
271 5 => IntraPredMode::VerRight,
272 6 => IntraPredMode::HorDown,
273 7 => IntraPredMode::VerLeft,
274 8 => IntraPredMode::HorUp,
275 _ => IntraPredMode::None,
276 }
277 }
278 }
279
280 impl From<IntraPredMode> for u8 {
281 fn from(val: IntraPredMode) -> Self {
282 match val {
283 IntraPredMode::Vertical => 0,
284 IntraPredMode::Horizontal => 1,
285 IntraPredMode::DC => 2,
286 IntraPredMode::DiagDownLeft => 3,
287 IntraPredMode::DiagDownRight => 4,
288 IntraPredMode::VerRight => 5,
289 IntraPredMode::HorDown => 6,
290 IntraPredMode::VerLeft => 7,
291 IntraPredMode::HorUp => 8,
292 _ => 9,
293 }
294 }
295 }
296
297 pub const MISSING_POC: u16 = 0xFFFF;
298
299 #[derive(Clone,Copy,Debug)]
300 pub struct PicRef {
301 ref_idx: u8
302 }
303
304 pub const MISSING_REF: PicRef = PicRef { ref_idx: 0xFF };
305 pub const INVALID_REF: PicRef = PicRef { ref_idx: 0xFE };
306 pub const ZERO_REF: PicRef = PicRef { ref_idx: 0 };
307 const DIRECT_FLAG: u8 = 0x40;
308
309 impl PicRef {
310 pub fn new(ref_idx: u8) -> Self {
311 Self { ref_idx }
312 }
313 pub fn not_avail(self) -> bool {
314 self == MISSING_REF || self == INVALID_REF
315 }
316 pub fn index(self) -> usize { (self.ref_idx & !DIRECT_FLAG) as usize }
317 pub fn is_direct(self) -> bool { (self.ref_idx & DIRECT_FLAG) != 0 }
318 pub fn set_direct(&mut self) { self.ref_idx |= DIRECT_FLAG; }
319 fn min_pos(self, other: Self) -> Self {
320 match (self.not_avail(), other.not_avail()) {
321 (true, true) => self,
322 (false, true) => self,
323 (true, false) => other,
324 (false, false) => PicRef::new((self.ref_idx & !DIRECT_FLAG).min(other.ref_idx & !DIRECT_FLAG)),
325 }
326 }
327 }
328
329 impl Default for PicRef {
330 fn default() -> Self { MISSING_REF }
331 }
332
333 impl PartialEq for PicRef {
334 fn eq(&self, other: &Self) -> bool {
335 (self.ref_idx | DIRECT_FLAG) == (other.ref_idx | DIRECT_FLAG)
336 }
337 }
338
339 impl std::fmt::Display for PicRef {
340 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341 if *self == MISSING_REF {
342 write!(f, "-1")
343 } else if *self == INVALID_REF {
344 write!(f, "-2")
345 } else {
346 write!(f, "{}", self.ref_idx & !DIRECT_FLAG)
347 }
348 }
349 }
350
351 #[derive(Clone,Copy,Default)]
352 pub struct MBData {
353 pub mb_type: CompactMBType,
354 pub cbp: u8,
355 pub coded_flags: u32,
356 pub cmode: u8,
357 pub qp_y: u8,
358 pub qp_u: u8,
359 pub qp_v: u8,
360 pub transform_8x8: bool,
361 }
362
363 pub fn blk4_to_blk8(blk4: usize) -> usize {
364 const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ];
365 MAP[blk4 & 0xF]
366 }
367
368 #[derive(Clone,Copy)]
369 pub struct Blk8Data {
370 pub ref_idx: [PicRef; 2],
371 pub ncoded_c: [u8; 2],
372 }
373
374 impl Default for Blk8Data {
375 fn default() -> Self {
376 Self {
377 ref_idx: [MISSING_REF; 2],
378 ncoded_c: [0; 2],
379 }
380 }
381 }
382
383 #[derive(Clone,Copy,Default)]
384 pub struct Blk4Data {
385 pub ncoded: u8,
386 pub ipred: IntraPredMode,
387 pub mv: [MV; 2],
388 pub mvd: [MV; 2],
389 }
390
391 pub struct SliceState {
392 pub mb_x: usize,
393 pub mb_y: usize,
394 pub mb_w: usize,
395 pub mb_h: usize,
396 pub mb_start: usize,
397
398 pub mb: GenericCache<MBData>,
399 pub blk8: GenericCache<Blk8Data>,
400 pub blk4: GenericCache<Blk4Data>,
401
402 pub deblock: [u8; 16],
403
404 pub has_top: bool,
405 pub has_left: bool,
406
407 pub top_line_y: Vec<u8>,
408 pub left_y: [u8; 17], // first element is top-left
409 pub top_line_c: [Vec<u8>; 2],
410 pub left_c: [[u8; 9]; 2],
411 }
412
413 const BLK4_TO_D8: [usize; 16] = [ 0, 0, 3, 3, 0, 0, 3, 3, 12, 12, 15, 15, 12, 12, 15, 15 ];
414
415 impl SliceState {
416 pub fn new() -> Self {
417 Self {
418 mb_x: 0,
419 mb_y: 0,
420 mb_w: 0,
421 mb_h: 0,
422 mb_start: 0,
423 mb: GenericCache::new(0, 0, MBData::default()),
424 blk8: GenericCache::new(0, 0, Blk8Data::default()),
425 blk4: GenericCache::new(0, 0, Blk4Data::default()),
426
427 deblock: [0; 16],
428
429 has_top: false,
430 has_left: false,
431
432 top_line_y: Vec::new(),
433 left_y: [0; 17],
434 top_line_c: [Vec::new(), Vec::new()],
435 left_c: [[0; 9]; 2],
436 }
437 }
438 pub fn reset(&mut self, mb_w: usize, mb_h: usize, mb_pos: usize) {
439 self.mb_w = mb_w;
440 self.mb_h = mb_h;
441 self.mb_start = mb_pos;
442 if mb_w > 0 {
443 self.mb_x = mb_pos % mb_w;
444 self.mb_y = mb_pos / mb_w;
445 } else {
446 self.mb_x = 0;
447 self.mb_y = 0;
448 }
449 self.mb = GenericCache::new(1, mb_w + 2, MBData::default());
450 self.blk8 = GenericCache::new(2, mb_w * 2 + 2, Blk8Data::default());
451 self.blk4 = GenericCache::new(4, mb_w * 4 + 2, Blk4Data::default());
452
453 self.has_top = false;
454 self.has_left = false;
455
456 self.top_line_y.resize(mb_w * 16 + 1, 0x80);
457 self.top_line_c[0].resize(mb_w * 8 + 1, 0x80);
458 self.top_line_c[1].resize(mb_w * 8 + 1, 0x80);
459 self.left_y = [0x80; 17];
460 self.left_c = [[0x80; 9]; 2];
461 }
462 pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame<u8>) {
463 let dstoff = self.mb_x * 16;
464 let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0];
465 self.left_y[0] = self.top_line_y[dstoff + 15];
466 self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]);
467 for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) {
468 *dst = src[15];
469 }
470 for chroma in 0..2 {
471 let cstride = frm.stride[chroma + 1];
472 let dstoff = self.mb_x * 8;
473 let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride;
474 self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7];
475 self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]);
476 for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) {
477 *dst = src[7];
478 }
479 }
480 }
481 pub fn fill_deblock(&mut self, frefs: &SliceRefs, deblock_mode: u8, is_s: bool) {
482 if deblock_mode == 1 {
483 return;
484 }
485
486 self.deblock = [0; 16];
487
488 let tx8x8 = self.get_cur_mb().transform_8x8;
489
490 let cur_intra = self.get_cur_mb().mb_type.is_intra();
491 let left_intra = self.get_left_mb().mb_type.is_intra();
492 let mut top_intra = self.get_top_mb().mb_type.is_intra();
493
494 let mut coded_cache = [false; 25];
495 let mut mv_cache = [[ZERO_MV; 2]; 25];
496 let mut ref_cache = [[INVALID_REF; 2]; 25];
497
498 if self.mb_y != 0 || self.has_top {
499 for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() {
500 let blk4 = self.get_top_blk4(x);
501 *cc = blk4.ncoded != 0;
502 *mv = blk4.mv;
503 if (x & 1) == 0 {
504 let blk8 = self.get_top_blk8(x / 2);
505 ref_cache[x + 1] = blk8.ref_idx;
506 } else {
507 ref_cache[x + 1] = ref_cache[x];
508 }
509 }
510 }
511 for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip(
512 mv_cache[5..].chunks_exact_mut(5)).enumerate() {
513 if self.has_left || self.mb_x != 0 {
514 let blk4 = self.get_left_blk4(y * 4);
515 ccs[0] = blk4.ncoded != 0;
516 mvs[0] = blk4.mv;
517 if (y & 1) == 0 {
518 let blk8 = self.get_left_blk8(y);
519 ref_cache[y * 5 + 5] = blk8.ref_idx;
520 } else {
521 ref_cache[y * 5 + 5] = ref_cache[y * 5];
522 }
523 }
524 for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() {
525 let blk4 = self.get_cur_blk4(x + y * 4);
526 *cc = blk4.ncoded != 0;
527 *mv = blk4.mv;
528 ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) {
529 self.get_cur_blk8(x / 2 + y).ref_idx
530 } else {
531 ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5]
532 };
533 }
534 }
535
536 for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in
537 coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip(
538 mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip(
539 ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() {
540 let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2));
541 if can_do_top && (!tx8x8 || (y & 1) == 0) {
542 if is_s || cur_intra || top_intra {
543 let val = if y == 0 { 0x40 } else { 0x30 };
544 for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; }
545 } else {
546 for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in
547 cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip(
548 cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip(
549 cur_refs[1..].iter().zip(
550 top_refs[1..].iter())).take(4).enumerate() {
551 if cur_cc || top_cc {
552 self.deblock[y * 4 + x] |= 0x20;
553 } else {
554 if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) {
555 self.deblock[y * 4 + x] |= 0x10;
556 }
557 }
558 }
559 }
560 }
561 let mut lleft_intra = left_intra;
562 for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in
563 cur_ccs[1..].iter().zip(cur_ccs.iter()).zip(
564 cur_mvs[1..].iter().zip(cur_mvs.iter())).zip(
565 cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() {
566 let skip_8 = tx8x8 && (x & 1) != 0;
567 let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2);
568 if !can_do_left {
569 continue;
570 }
571 if skip_8 {
572 } else if is_s || cur_intra || lleft_intra {
573 self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 };
574 } else if cur_cc || left_cc {
575 self.deblock[y * 4 + x] |= 2;
576 } else {
577 if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) {
578 self.deblock[y * 4 + x] |= 1;
579 }
580 }
581 lleft_intra = cur_intra;
582 }
583 top_intra = cur_intra;
584 }
585 }
586 pub fn next_mb(&mut self) {
587 self.mb_x += 1;
588 self.has_left = true;
589 if self.mb_x == self.mb_w {
590 self.mb_x = 0;
591 self.mb_y += 1;
592 self.mb.update_row();
593 self.blk8.update_row();
594 self.blk4.update_row();
595
596 self.has_left = false;
597 }
598 self.has_top = self.mb_x + self.mb_y * self.mb_w >= self.mb_start + self.mb_w;
599 }
600 pub fn get_cur_mb_idx(&self) -> usize { self.mb.xpos + self.mb_x }
601 pub fn get_cur_blk8_idx(&self, blk_no: usize) -> usize {
602 self.blk8.xpos + self.mb_x * 2 + (blk_no & 1) + (blk_no >> 1) * self.blk8.stride
603 }
604 pub fn get_cur_blk4_idx(&self, blk_no: usize) -> usize {
605 self.blk4.xpos + self.mb_x * 4 + (blk_no & 3) + (blk_no >> 2) * self.blk4.stride
606 }
607 pub fn get_cur_mb(&mut self) -> &mut MBData {
608 let idx = self.get_cur_mb_idx();
609 &mut self.mb.data[idx]
610 }
611 pub fn get_left_mb(&self) -> &MBData {
612 &self.mb.data[self.get_cur_mb_idx() - 1]
613 }
614 pub fn get_top_mb(&self) -> &MBData {
615 &self.mb.data[self.get_cur_mb_idx() - self.mb.stride]
616 }
617 pub fn get_cur_blk8(&mut self, blk_no: usize) -> &mut Blk8Data {
618 let idx = self.get_cur_blk8_idx(blk_no);
619 &mut self.blk8.data[idx]
620 }
621 pub fn get_left_blk8(&self, blk_no: usize) -> &Blk8Data {
622 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - 1]
623 }
624 pub fn get_top_blk8(&self, blk_no: usize) -> &Blk8Data {
625 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - self.blk8.stride]
626 }
627 pub fn get_cur_blk4(&mut self, blk_no: usize) -> &mut Blk4Data {
628 let idx = self.get_cur_blk4_idx(blk_no);
629 &mut self.blk4.data[idx]
630 }
631 pub fn get_left_blk4(&self, blk_no: usize) -> &Blk4Data {
632 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - 1]
633 }
634 pub fn get_top_blk4(&self, blk_no: usize) -> &Blk4Data {
635 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - self.blk4.stride]
636 }
637
638 pub fn apply_to_blk8<F: (Fn(&mut Blk8Data))>(&mut self, f: F) {
639 let start = self.get_cur_blk8_idx(0);
640 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(2) {
641 for el in row[..2].iter_mut() {
642 f(el);
643 }
644 }
645 }
646 pub fn apply_to_blk4<F: (Fn(&mut Blk4Data))>(&mut self, f: F) {
647 let start = self.get_cur_blk4_idx(0);
648 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(4) {
649 for el in row[..4].iter_mut() {
650 f(el);
651 }
652 }
653 }
654
655 pub fn fill_ipred(&mut self, imode: IntraPredMode) {
656 self.apply_to_blk4(|blk| blk.ipred = imode);
657 }
658 pub fn fill_ncoded(&mut self, nc: u8) {
659 self.apply_to_blk4(|blk| blk.ncoded = nc);
660 self.apply_to_blk8(|blk| blk.ncoded_c = [nc; 2]);
661 }
662 pub fn reset_mb_mv(&mut self) {
663 self.apply_to_blk8(|blk| blk.ref_idx = [INVALID_REF; 2]);
664 }
665
666 pub fn get_mv_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> (usize, usize) {
667 let blk_no = xoff / 4 + yoff;
668 let mv_a = self.get_left_blk4(blk_no).mvd[ref_l];
669 let mv_b = self.get_top_blk4(blk_no).mvd[ref_l];
670 let mv = mv_a + mv_b;
671 let ctx0 = if mv.x < 3 { 0 } else if mv.x <= 32 { 1 } else { 2 };
672 let ctx1 = if mv.y < 3 { 0 } else if mv.y <= 32 { 1 } else { 2 };
673 (ctx0, ctx1)
674 }
675 pub fn get_mv_ref_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> usize {
676 let blk_no = xoff / 8 + (yoff / 8) * 2;
677 let mut ctx = 0;
678 let left_ref = self.get_left_blk8(blk_no).ref_idx[ref_l];
679 let top_ref = self.get_top_blk8(blk_no).ref_idx[ref_l];
680 if !left_ref.not_avail() && !left_ref.is_direct() && left_ref.index() > 0 {
681 ctx += 1;
682 }
683 if !top_ref.not_avail() && !top_ref.is_direct() && top_ref.index() > 0 {
684 ctx += 2;
685 }
686 ctx
687 }
688 #[allow(clippy::if_same_then_else)]
689 pub fn predict(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, diff_mv: MV, ref_idx: PicRef) {
690 let midx = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
691 let ridx = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
692 let ridx_c = self.get_cur_blk8_idx(0) + (xpos + bw) / 8 + ypos / 8 * self.blk8.stride - if (ypos & 4) == 0 { self.blk8.stride } else { 0 };
693
694 let mv_a = self.blk4.data[midx - 1].mv[ref_l];
695 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[ref_l];
696 let mut mv_c = self.blk4.data[midx - self.blk4.stride + bw / 4].mv[ref_l];
697
698 let rx = if (xpos & 4) != 0 { 0 } else { 1 };
699 let ry = if (ypos & 4) != 0 { 0 } else { self.blk8.stride };
700 let ref_a = self.blk8.data[ridx - rx].ref_idx[ref_l];
701 let ref_b = self.blk8.data[ridx - ry].ref_idx[ref_l];
702 let mut ref_c = self.blk8.data[ridx_c].ref_idx[ref_l];
703
704 if ref_c == MISSING_REF || (((xpos + bw) & 4) == 0 && (ypos & 4) != 0) {
705 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[ref_l];
706 ref_c = self.blk8.data[ridx - rx - ry].ref_idx[ref_l];
707 }
708
709 let pred_mv = if bw == 16 && bh == 8 && ypos == 0 && ref_b == ref_idx {
710 mv_b
711 } else if bw == 16 && bh == 8 && ypos != 0 && ref_a == ref_idx {
712 mv_a
713 } else if bw == 8 && bh == 16 && xpos == 0 && ref_a == ref_idx {
714 mv_a
715 } else if bw == 8 && bh == 16 && xpos != 0 && ref_c == ref_idx {
716 mv_c
717 } else if ref_b == MISSING_REF && ref_c == MISSING_REF {
718 mv_a
719 } else {
720 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
721 if count == 1 {
722 if ref_a == ref_idx {
723 mv_a
724 } else if ref_b == ref_idx {
725 mv_b
726 } else {
727 mv_c
728 }
729 } else {
730 MV::pred(mv_a, mv_b, mv_c)
731 }
732 };
733
734 let mv = pred_mv + diff_mv;
735 self.fill_mv (xpos, ypos, bw, bh, ref_l, mv);
736 self.fill_ref(xpos, ypos, bw, bh, ref_l, ref_idx);
737 }
738 pub fn predict_pskip(&mut self) {
739 let midx = self.get_cur_blk4_idx(0);
740 let ridx = self.get_cur_blk8_idx(0);
741
742 let mv_a = self.blk4.data[midx - 1].mv[0];
743 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[0];
744 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 4].mv[0];
745
746 let ref_a = self.blk8.data[ridx - 1].ref_idx[0];
747 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx[0];
748 let mut ref_c = self.blk8.data[ridx - self.blk8.stride + 2].ref_idx[0];
749
750 if ref_c == MISSING_REF {
751 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[0];
752 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx[0];
753 }
754
755 let ref_idx = ZERO_REF;
756 let mv = if ref_a == MISSING_REF || ref_b == MISSING_REF || (ref_a == ZERO_REF && mv_a == ZERO_MV) || (ref_b == ZERO_REF && mv_b == ZERO_MV) {
757 ZERO_MV
758 } else {
759 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
760 if count == 1 {
761 if ref_a == ref_idx {
762 mv_a
763 } else if ref_b == ref_idx {
764 mv_b
765 } else {
766 mv_c
767 }
768 } else {
769 MV::pred(mv_a, mv_b, mv_c)
770 }
771 };
772
773 self.fill_mv (0, 0, 16, 16, 0, mv);
774 self.fill_ref(0, 0, 16, 16, 0, ref_idx);
775 }
776 pub fn predict_direct_mb(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
777 let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
778 if direct_8x8 {
779 for blk4 in 0..16 {
780 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]);
781 self.get_cur_blk4(blk4).mv = [mv0, mv1];
782 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
783 }
784 } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv {
785 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0);
786 self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]);
787 self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]);
788 } else {
789 for blk4 in 0..16 {
790 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4);
791 self.get_cur_blk4(blk4).mv = [mv0, mv1];
792 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
793 }
794 }
795 }
796 pub fn predict_direct_sub(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
797 let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
798 let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
799 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
800 self.get_cur_blk4(blk4).mv = [mv0, mv1];
801 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
802 }
803 #[allow(clippy::nonminimal_bool)]
804 pub fn get_direct_mv(&self, frame_refs: &SliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
805 let blk8 = blk4_to_blk8(blk4);
806 let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
807 (ZERO_MV, MISSING_POC, MISSING_REF)
808 } else if mbi.ref_poc[blk8][0] != MISSING_POC {
809 (mbi.mv[blk4][0], mbi.ref_poc[blk8][0], mbi.ref_idx[blk8][0])
810 } else {
811 (mbi.mv[blk4][1], mbi.ref_poc[blk8][1], mbi.ref_idx[blk8][1])
812 };
813 let (col_ref, r0_long) = frame_refs.map_ref0(r0_poc);
814 if temporal_mv {
815 let td = (i32::from(r1_poc) - i32::from(r0_poc)).max(-128).min(127);
816 if r0_long || td == 0 {
817 (col_mv, col_ref, ZERO_MV, ZERO_REF)
818 } else {
819 let tx = (16384 + (td / 2).abs()) / td;
820 let tb = (i32::from(cur_id) - i32::from(r0_poc)).max(-128).min(127);
821 let scale = ((tb * tx + 32) >> 6).max(-1024).min(1023);
822 let mv0 = MV {
823 x: ((i32::from(col_mv.x) * scale + 128) >> 8) as i16,
824 y: ((i32::from(col_mv.y) * scale + 128) >> 8) as i16,
825 };
826 let mv1 = mv0 - col_mv;
827 (mv0, col_ref, mv1, ZERO_REF)
828 }
829 } else {
830 let blk4 = 0; // we generate the same MV prediction for the whole MB
831 let blk8 = blk4_to_blk8(blk4);
832 let midx = self.get_cur_blk4_idx(blk4);
833 let ridx = self.get_cur_blk8_idx(blk8);
834 let ridx_c = self.get_cur_blk8_idx(blk8) + 16 / 8 - self.blk8.stride;
835
836 let mv_a = self.blk4.data[midx - 1].mv;
837 let mv_b = self.blk4.data[midx - self.blk4.stride].mv;
838 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 16 / 4].mv;
839
840 let ref_a = self.blk8.data[ridx - 1].ref_idx;
841 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx;
842 let mut ref_c = self.blk8.data[ridx_c].ref_idx;
843
844 if ref_c == [MISSING_REF; 2] {
845 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv;
846 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx;
847 }
848 let mut refs = [INVALID_REF; 2];
849 for cur_ref in [ref_a, ref_b, ref_c].iter() {
850 refs[0] = refs[0].min_pos(cur_ref[0]);
851 refs[1] = refs[1].min_pos(cur_ref[1]);
852 }
853 if refs == [INVALID_REF; 2] {
854 return (ZERO_MV, ZERO_REF, ZERO_MV, ZERO_REF);
855 }
856
857 let mut col_zero = true;
858 if r1_long || col_idx != ZERO_REF {
859 col_zero = false;
860 }
861 if col_mv.x.abs() > 1 || col_mv.y.abs() > 1 {
862 col_zero = false;
863 }
864 let mut mvs = [ZERO_MV; 2];
865 for ref_l in 0..2 {
866 if mbi.mb_type.is_intra() || (!refs[ref_l].not_avail() && !(refs[ref_l] == ZERO_REF && col_zero)) {
867 let ref_idx = refs[ref_l];
868 mvs[ref_l] = if ref_b[ref_l] == MISSING_REF && ref_c[ref_l] == MISSING_REF {
869 mv_a[ref_l]
870 } else {
871 let count = ((ref_a[ref_l] == ref_idx) as u8) + ((ref_b[ref_l] == ref_idx) as u8) + ((ref_c[ref_l] == ref_idx) as u8);
872 if count == 1 {
873 if ref_a[ref_l] == ref_idx {
874 mv_a[ref_l]
875 } else if ref_b[ref_l] == ref_idx {
876 mv_b[ref_l]
877 } else {
878 mv_c[ref_l]
879 }
880 } else {
881 MV::pred(mv_a[ref_l], mv_b[ref_l], mv_c[ref_l])
882 }
883 };
884 }
885 }
886 (mvs[0], refs[0], mvs[1], refs[1])
887 }
888 }
889 pub fn fill_mv(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
890 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
891 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
892 for blk in row[..bw / 4].iter_mut() {
893 blk.mv[ref_l] = mv;
894 }
895 }
896 }
897 pub fn fill_mvd(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
898 let mvd = MV{ x: mv.x.abs().min(128), y: mv.y.abs().min(128) };
899 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
900 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
901 for blk in row[..bw / 4].iter_mut() {
902 blk.mvd[ref_l] = mvd;
903 }
904 }
905 }
906 pub fn fill_ref(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, ref_idx: PicRef) {
907 let start = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
908 if bw < 8 || bh < 8 {
909 self.blk8.data[start].ref_idx[ref_l] = ref_idx;
910 } else {
911 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(bh / 8) {
912 for blk in row[..bw / 8].iter_mut() {
913 blk.ref_idx[ref_l] = ref_idx;
914 }
915 }
916 }
917 }
918 }
919
920 #[cfg(not(target_arch="x86_64"))]
921 fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
922 let mvd0 = mv1[0] - mv2[0];
923 let mvd1 = mv1[1] - mv2[1];
924 (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4)
925 }
926
927 #[cfg(target_arch="x86_64")]
928 fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
929 unsafe {
930 let mut flag = false;
931 let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr());
932 let mut m0 = *ptr;
933 let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr());
934 let mut m1 = *ptr;
935 for _ in 0..4 {
936 let tmp = m0.wrapping_sub(m1) as u16;
937 flag |= tmp.wrapping_add(3) > 6;
938 m0 >>= 16;
939 m1 >>= 16;
940 }
941 flag
942 }
943 }