h264: more micro-optimisations
[nihav.git] / nihav-itu / src / codecs / h264 / types.rs
1 use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame};
2 use nihav_codec_support::codecs::{MV, ZERO_MV};
3 use nihav_codec_support::data::GenericCache;
4 use super::SimplifiedSliceRefs;
5 use super::pic_ref::FrameMBInfo;
6
7 #[derive(Clone,Copy)]
8 pub struct SimpleFrame<'a> {
9 pub data: &'a [u8],
10 pub offset: [usize; 3],
11 pub stride: [usize; 3],
12 }
13
14 impl<'a> SimpleFrame<'a> {
15 pub fn new(buf: &'a NAVideoBuffer<u8>) -> Self {
16 let mut offset = [0; 3];
17 let mut stride = [0; 3];
18 for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() {
19 *offs = buf.get_offset(plane);
20 *strd = buf.get_stride(plane);
21 }
22 Self {
23 data: buf.get_data(),
24 offset, stride
25 }
26 }
27 }
28
29 #[repr(u8)]
30 #[derive(Clone,Copy,Debug,PartialEq)]
31 pub enum BMode {
32 L0,
33 L1,
34 Bi,
35 }
36
37 #[derive(Clone,Copy,Debug,PartialEq,Default)]
38 pub enum MBType {
39 #[default]
40 Intra4x4,
41 Intra8x8,
42 Intra16x16(u8, u8, u8),
43 PCM,
44
45 P16x16,
46 P16x8,
47 P8x16,
48 P8x8,
49 P8x8Ref0,
50 PSkip,
51
52 Direct,
53 B16x16(BMode),
54 B16x8(BMode, BMode),
55 B8x16(BMode, BMode),
56 B8x8,
57 BSkip,
58 }
59
60 impl MBType {
61 pub fn is_intra(self) -> bool {
62 matches!(self, MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM)
63 }
64 pub fn is_intra16x16(self) -> bool {
65 matches!(self, MBType::Intra16x16(_, _, _))
66 }
67 pub fn is_skip(self) -> bool {
68 matches!(self, MBType::PSkip | MBType::BSkip)
69 }
70 pub fn is_4x4(self) -> bool { self.num_parts() == 4 }
71 pub fn is_l0(self, part: usize) -> bool {
72 match self {
73 MBType::B16x16(mode) => mode == BMode::L0,
74 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
75 if part == 0 {
76 mode0 == BMode::L0
77 } else {
78 mode1 == BMode::L0
79 }
80 },
81 MBType::Direct | MBType::BSkip => false,
82 _ => true,
83 }
84 }
85 pub fn is_l1(self, part: usize) -> bool {
86 match self {
87 MBType::B16x16(mode) => mode == BMode::L1,
88 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
89 if part == 0 {
90 mode0 == BMode::L1
91 } else {
92 mode1 == BMode::L1
93 }
94 },
95 _ => false,
96 }
97 }
98 pub fn num_parts(self) -> usize {
99 match self {
100 MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM |
101 MBType::PSkip |
102 MBType::Direct | MBType::BSkip
103 => 1,
104 MBType::P16x16 |
105 MBType::B16x16(_)
106 => 1,
107 MBType::P16x8 | MBType::P8x16 |
108 MBType::B16x8(_, _) | MBType::B8x16(_, _)
109 => 2,
110 _ => 4,
111 }
112 }
113 pub fn size(self) -> (usize, usize) {
114 match self {
115 MBType::Intra4x4 |
116 MBType::Intra8x8 |
117 MBType::Intra16x16(_, _, _) |
118 MBType::PCM |
119 MBType::P16x16 |
120 MBType::PSkip |
121 MBType::Direct |
122 MBType::B16x16(_) |
123 MBType::BSkip
124 => (16, 16),
125 MBType::P16x8 | MBType::B16x8(_, _) => (16, 8),
126 MBType::P8x16 | MBType::B8x16(_, _) => (8, 16),
127 _ => (8, 8),
128 }
129 }
130 }
131
132 #[derive(Clone,Copy,Debug,PartialEq,Default)]
133 pub enum SubMBType {
134 P8x8,
135 P8x4,
136 P4x8,
137 P4x4,
138 #[default]
139 Direct8x8,
140 B8x8(BMode),
141 B8x4(BMode),
142 B4x8(BMode),
143 B4x4(BMode),
144 }
145
146 impl SubMBType {
147 pub fn num_parts(self) -> usize {
148 match self {
149 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => 1,
150 SubMBType::P4x4 | SubMBType::B4x4(_) => 4,
151 _ => 2,
152 }
153 }
154 pub fn size(self) -> (usize, usize) {
155 match self {
156 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => (8, 8),
157 SubMBType::P8x4 | SubMBType::B8x4(_) => (8, 4),
158 SubMBType::P4x8 | SubMBType::B4x8(_) => (4, 8),
159 SubMBType::P4x4 | SubMBType::B4x4(_) => (4, 4),
160 }
161 }
162 pub fn is_l0(self) -> bool {
163 match self {
164 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
165 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
166 mode == BMode::L0
167 },
168 _ => true,
169 }
170 }
171 pub fn is_l1(self) -> bool {
172 match self {
173 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
174 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
175 mode == BMode::L1
176 },
177 _ => false,
178 }
179 }
180 }
181
182 #[repr(u8)]
183 #[derive(Clone,Copy,Debug,PartialEq,Default)]
184 pub enum CompactMBType {
185 Intra4x4,
186 Intra8x8,
187 Intra16x16,
188 PCM,
189
190 P16x16,
191 P16x8,
192 P8x16,
193 P8x8,
194 P8x8Ref0,
195 PSkip,
196
197 Direct,
198 B16x16,
199 B16x8,
200 B8x16,
201 B8x8,
202 BSkip,
203
204 #[default]
205 None,
206 }
207
208 impl CompactMBType {
209 pub fn is_intra(self) -> bool {
210 matches!(self, CompactMBType::Intra4x4 | CompactMBType::Intra8x8 | CompactMBType::Intra16x16)
211 }
212 pub fn is_intra16orpcm(self) -> bool {
213 matches!(self, CompactMBType::Intra16x16 | CompactMBType::PCM)
214 }
215 pub fn is_skip(self) -> bool {
216 matches!(self, CompactMBType::PSkip | CompactMBType::BSkip)
217 }
218 pub fn is_direct(self) -> bool {
219 matches!(self, CompactMBType::BSkip | CompactMBType::Direct | CompactMBType::None)
220 }
221 pub fn is_inter(self) -> bool {
222 !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM
223 }
224 pub fn is_16x16_ref(self) -> bool {
225 matches!(self,
226 CompactMBType::Intra4x4 |
227 CompactMBType::Intra8x8 |
228 CompactMBType::Intra16x16 |
229 CompactMBType::PCM |
230 CompactMBType::P16x16 |
231 CompactMBType::B16x16)
232 }
233 }
234
235 impl From<MBType> for CompactMBType {
236 fn from(mbtype: MBType) -> Self {
237 match mbtype {
238 MBType::Intra4x4 => CompactMBType::Intra4x4,
239 MBType::Intra8x8 => CompactMBType::Intra8x8,
240 MBType::Intra16x16(_, _, _) => CompactMBType::Intra16x16,
241 MBType::PCM => CompactMBType::PCM,
242 MBType::P16x16 => CompactMBType::P16x16,
243 MBType::P16x8 => CompactMBType::P16x8,
244 MBType::P8x16 => CompactMBType::P8x16,
245 MBType::P8x8 => CompactMBType::P8x8,
246 MBType::P8x8Ref0 => CompactMBType::P8x8Ref0,
247 MBType::PSkip => CompactMBType::PSkip,
248 MBType::Direct => CompactMBType::Direct,
249 MBType::B16x16(_) => CompactMBType::B16x16,
250 MBType::B16x8(_, _) => CompactMBType::B16x8,
251 MBType::B8x16(_, _) => CompactMBType::B8x16,
252 MBType::B8x8 => CompactMBType::B8x8,
253 MBType::BSkip => CompactMBType::BSkip,
254 }
255 }
256 }
257
258 #[repr(u8)]
259 #[derive(Clone,Copy,Debug,PartialEq,Default)]
260 pub enum IntraPredMode {
261 Vertical,
262 Horizontal,
263 DC,
264 DiagDownLeft,
265 DiagDownRight,
266 VerRight,
267 HorDown,
268 VerLeft,
269 HorUp,
270 #[default]
271 None,
272 }
273
274 impl IntraPredMode {
275 pub fn is_none(self) -> bool { self == IntraPredMode::None }
276 pub fn into_pred_idx(self) -> i8 {
277 if !self.is_none() {
278 self as u8 as i8
279 } else {
280 -1
281 }
282 }
283 }
284
285 impl From<u8> for IntraPredMode {
286 fn from(val: u8) -> Self {
287 match val {
288 0 => IntraPredMode::Vertical,
289 1 => IntraPredMode::Horizontal,
290 2 => IntraPredMode::DC,
291 3 => IntraPredMode::DiagDownLeft,
292 4 => IntraPredMode::DiagDownRight,
293 5 => IntraPredMode::VerRight,
294 6 => IntraPredMode::HorDown,
295 7 => IntraPredMode::VerLeft,
296 8 => IntraPredMode::HorUp,
297 _ => IntraPredMode::None,
298 }
299 }
300 }
301
302 impl From<IntraPredMode> for u8 {
303 fn from(val: IntraPredMode) -> Self {
304 match val {
305 IntraPredMode::Vertical => 0,
306 IntraPredMode::Horizontal => 1,
307 IntraPredMode::DC => 2,
308 IntraPredMode::DiagDownLeft => 3,
309 IntraPredMode::DiagDownRight => 4,
310 IntraPredMode::VerRight => 5,
311 IntraPredMode::HorDown => 6,
312 IntraPredMode::VerLeft => 7,
313 IntraPredMode::HorUp => 8,
314 _ => 9,
315 }
316 }
317 }
318
319 pub const MISSING_POC: u16 = 0xFFFF;
320
321 #[derive(Clone,Copy,Debug)]
322 pub struct PicRef {
323 ref_idx: u8
324 }
325
326 pub const MISSING_REF: PicRef = PicRef { ref_idx: 0xFF };
327 pub const INVALID_REF: PicRef = PicRef { ref_idx: 0xFE };
328 pub const ZERO_REF: PicRef = PicRef { ref_idx: 0 };
329 const DIRECT_FLAG: u8 = 0x40;
330
331 impl PicRef {
332 pub fn new(ref_idx: u8) -> Self {
333 Self { ref_idx }
334 }
335 pub fn not_avail(self) -> bool {
336 self == MISSING_REF || self == INVALID_REF
337 }
338 pub fn index(self) -> usize { (self.ref_idx & !DIRECT_FLAG) as usize }
339 pub fn is_direct(self) -> bool { (self.ref_idx & DIRECT_FLAG) != 0 }
340 pub fn set_direct(&mut self) { self.ref_idx |= DIRECT_FLAG; }
341 fn min_pos(self, other: Self) -> Self {
342 match (self.not_avail(), other.not_avail()) {
343 (true, true) => self,
344 (false, true) => self,
345 (true, false) => other,
346 (false, false) => PicRef::new((self.ref_idx & !DIRECT_FLAG).min(other.ref_idx & !DIRECT_FLAG)),
347 }
348 }
349 }
350
351 impl Default for PicRef {
352 fn default() -> Self { MISSING_REF }
353 }
354
355 impl PartialEq for PicRef {
356 fn eq(&self, other: &Self) -> bool {
357 (self.ref_idx | DIRECT_FLAG) == (other.ref_idx | DIRECT_FLAG)
358 }
359 }
360
361 impl std::fmt::Display for PicRef {
362 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
363 if *self == MISSING_REF {
364 write!(f, "-1")
365 } else if *self == INVALID_REF {
366 write!(f, "-2")
367 } else {
368 write!(f, "{}", self.ref_idx & !DIRECT_FLAG)
369 }
370 }
371 }
372
373 #[derive(Clone,Copy,Default)]
374 pub struct MBData {
375 pub mb_type: CompactMBType,
376 pub cbp: u8,
377 pub coded_flags: u32,
378 pub cmode: u8,
379 pub qp_y: u8,
380 pub qp_u: u8,
381 pub qp_v: u8,
382 pub transform_8x8: bool,
383 }
384
385 pub fn blk4_to_blk8(blk4: usize) -> usize {
386 /*const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ];
387 MAP[blk4 & 0xF]*/
388 ((blk4 & 2) >> 1) | ((blk4 & 8) >> 2)
389 }
390
391 #[derive(Clone,Copy)]
392 pub struct Blk8Data {
393 pub ref_idx: [PicRef; 2],
394 pub ncoded_c: [u8; 2],
395 }
396
397 impl Default for Blk8Data {
398 fn default() -> Self {
399 Self {
400 ref_idx: [MISSING_REF; 2],
401 ncoded_c: [0; 2],
402 }
403 }
404 }
405
406 #[derive(Clone,Copy,Default)]
407 pub struct Blk4Data {
408 pub ncoded: u8,
409 pub ipred: IntraPredMode,
410 pub mv: [MV; 2],
411 pub mvd: [MV; 2],
412 }
413
414 pub struct SliceState {
415 pub mb_x: usize,
416 pub mb_y: usize,
417 pub mb_w: usize,
418 pub mb_h: usize,
419 pub mb_start: usize,
420
421 pub mb: GenericCache<MBData>,
422 pub blk8: GenericCache<Blk8Data>,
423 pub blk4: GenericCache<Blk4Data>,
424
425 pub deblock: [u8; 16],
426
427 pub has_top: bool,
428 pub has_left: bool,
429
430 pub top_line_y: Vec<u8>,
431 pub left_y: [u8; 17], // first element is top-left
432 pub top_line_c: [Vec<u8>; 2],
433 pub left_c: [[u8; 9]; 2],
434 }
435
436 const BLK4_TO_D8: [usize; 16] = [ 0, 0, 3, 3, 0, 0, 3, 3, 12, 12, 15, 15, 12, 12, 15, 15 ];
437
438 impl SliceState {
439 pub fn new() -> Self {
440 Self {
441 mb_x: 0,
442 mb_y: 0,
443 mb_w: 0,
444 mb_h: 0,
445 mb_start: 0,
446 mb: GenericCache::new(0, 0, MBData::default()),
447 blk8: GenericCache::new(0, 0, Blk8Data::default()),
448 blk4: GenericCache::new(0, 0, Blk4Data::default()),
449
450 deblock: [0; 16],
451
452 has_top: false,
453 has_left: false,
454
455 top_line_y: Vec::new(),
456 left_y: [0; 17],
457 top_line_c: [Vec::new(), Vec::new()],
458 left_c: [[0; 9]; 2],
459 }
460 }
461 pub fn reset(&mut self, mb_w: usize, mb_h: usize, mb_pos: usize) {
462 self.mb_w = mb_w;
463 self.mb_h = mb_h;
464 self.mb_start = mb_pos;
465 if mb_w > 0 {
466 self.mb_x = mb_pos % mb_w;
467 self.mb_y = mb_pos / mb_w;
468 } else {
469 self.mb_x = 0;
470 self.mb_y = 0;
471 }
472 self.mb = GenericCache::new(1, mb_w + 2, MBData::default());
473 self.blk8 = GenericCache::new(2, mb_w * 2 + 2, Blk8Data::default());
474 self.blk4 = GenericCache::new(4, mb_w * 4 + 2, Blk4Data::default());
475
476 self.has_top = false;
477 self.has_left = false;
478
479 self.top_line_y.resize(mb_w * 16 + 1, 0x80);
480 self.top_line_c[0].resize(mb_w * 8 + 1, 0x80);
481 self.top_line_c[1].resize(mb_w * 8 + 1, 0x80);
482 self.left_y = [0x80; 17];
483 self.left_c = [[0x80; 9]; 2];
484 }
485 pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame<u8>) {
486 let dstoff = self.mb_x * 16;
487 let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0];
488 self.left_y[0] = self.top_line_y[dstoff + 15];
489 self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]);
490 for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) {
491 *dst = src[15];
492 }
493 for chroma in 0..2 {
494 let cstride = frm.stride[chroma + 1];
495 let dstoff = self.mb_x * 8;
496 let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride;
497 self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7];
498 self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]);
499 for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) {
500 *dst = src[7];
501 }
502 }
503 }
504 pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) {
505 if deblock_mode == 1 {
506 return;
507 }
508
509 self.deblock = [0; 16];
510
511 let tx8x8 = self.get_cur_mb().transform_8x8;
512
513 let cur_intra = self.get_cur_mb().mb_type.is_intra();
514 let left_intra = self.get_left_mb().mb_type.is_intra();
515 let mut top_intra = self.get_top_mb().mb_type.is_intra();
516
517 let mut coded_cache = [false; 25];
518 let mut mv_cache = [[ZERO_MV; 2]; 25];
519 let mut ref_cache = [[INVALID_REF; 2]; 25];
520
521 if self.mb_y != 0 || self.has_top {
522 for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() {
523 let blk4 = self.get_top_blk4(x);
524 *cc = blk4.ncoded != 0;
525 *mv = blk4.mv;
526 if (x & 1) == 0 {
527 let blk8 = self.get_top_blk8(x / 2);
528 ref_cache[x + 1] = blk8.ref_idx;
529 } else {
530 ref_cache[x + 1] = ref_cache[x];
531 }
532 }
533 }
534 for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip(
535 mv_cache[5..].chunks_exact_mut(5)).enumerate() {
536 if self.has_left || self.mb_x != 0 {
537 let blk4 = self.get_left_blk4(y * 4);
538 ccs[0] = blk4.ncoded != 0;
539 mvs[0] = blk4.mv;
540 if (y & 1) == 0 {
541 let blk8 = self.get_left_blk8(y);
542 ref_cache[y * 5 + 5] = blk8.ref_idx;
543 } else {
544 ref_cache[y * 5 + 5] = ref_cache[y * 5];
545 }
546 }
547 for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() {
548 let blk4 = self.get_cur_blk4(x + y * 4);
549 *cc = blk4.ncoded != 0;
550 *mv = blk4.mv;
551 ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) {
552 self.get_cur_blk8(x / 2 + y).ref_idx
553 } else {
554 ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5]
555 };
556 }
557 }
558
559 for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in
560 coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip(
561 mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip(
562 ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() {
563 let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2));
564 if can_do_top && (!tx8x8 || (y & 1) == 0) {
565 if is_s || cur_intra || top_intra {
566 let val = if y == 0 { 0x40 } else { 0x30 };
567 for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; }
568 } else {
569 for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in
570 cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip(
571 cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip(
572 cur_refs[1..].iter().zip(
573 top_refs[1..].iter())).take(4).enumerate() {
574 if cur_cc || top_cc {
575 self.deblock[y * 4 + x] |= 0x20;
576 } else {
577 if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) {
578 self.deblock[y * 4 + x] |= 0x10;
579 }
580 }
581 }
582 }
583 }
584 let mut lleft_intra = left_intra;
585 for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in
586 cur_ccs[1..].iter().zip(cur_ccs.iter()).zip(
587 cur_mvs[1..].iter().zip(cur_mvs.iter())).zip(
588 cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() {
589 let skip_8 = tx8x8 && (x & 1) != 0;
590 let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2);
591 if !can_do_left {
592 continue;
593 }
594 if skip_8 {
595 } else if is_s || cur_intra || lleft_intra {
596 self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 };
597 } else if cur_cc || left_cc {
598 self.deblock[y * 4 + x] |= 2;
599 } else {
600 if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) {
601 self.deblock[y * 4 + x] |= 1;
602 }
603 }
604 lleft_intra = cur_intra;
605 }
606 top_intra = cur_intra;
607 }
608 }
609 pub fn next_mb(&mut self) {
610 self.mb_x += 1;
611 self.has_left = true;
612 if self.mb_x == self.mb_w {
613 self.mb_x = 0;
614 self.mb_y += 1;
615 self.mb.update_row();
616 self.blk8.update_row();
617 self.blk4.update_row();
618
619 self.has_left = false;
620 }
621 self.has_top = self.mb_x + self.mb_y * self.mb_w >= self.mb_start + self.mb_w;
622 }
623 pub fn get_cur_mb_idx(&self) -> usize { self.mb.xpos + self.mb_x }
624 pub fn get_cur_blk8_idx(&self, blk_no: usize) -> usize {
625 self.blk8.xpos + self.mb_x * 2 + (blk_no & 1) + (blk_no >> 1) * self.blk8.stride
626 }
627 pub fn get_cur_blk4_idx(&self, blk_no: usize) -> usize {
628 self.blk4.xpos + self.mb_x * 4 + (blk_no & 3) + (blk_no >> 2) * self.blk4.stride
629 }
630 pub fn get_cur_mb(&mut self) -> &mut MBData {
631 let idx = self.get_cur_mb_idx();
632 &mut self.mb.data[idx]
633 }
634 pub fn get_left_mb(&self) -> &MBData {
635 &self.mb.data[self.get_cur_mb_idx() - 1]
636 }
637 pub fn get_top_mb(&self) -> &MBData {
638 &self.mb.data[self.get_cur_mb_idx() - self.mb.stride]
639 }
640 pub fn get_cur_blk8(&mut self, blk_no: usize) -> &mut Blk8Data {
641 let idx = self.get_cur_blk8_idx(blk_no);
642 &mut self.blk8.data[idx]
643 }
644 pub fn get_left_blk8(&self, blk_no: usize) -> &Blk8Data {
645 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - 1]
646 }
647 pub fn get_top_blk8(&self, blk_no: usize) -> &Blk8Data {
648 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - self.blk8.stride]
649 }
650 pub fn get_cur_blk4(&mut self, blk_no: usize) -> &mut Blk4Data {
651 let idx = self.get_cur_blk4_idx(blk_no);
652 &mut self.blk4.data[idx]
653 }
654 pub fn get_left_blk4(&self, blk_no: usize) -> &Blk4Data {
655 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - 1]
656 }
657 pub fn get_top_blk4(&self, blk_no: usize) -> &Blk4Data {
658 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - self.blk4.stride]
659 }
660
661 pub fn apply_to_blk8<F: (Fn(&mut Blk8Data))>(&mut self, f: F) {
662 let start = self.get_cur_blk8_idx(0);
663 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(2) {
664 for el in row[..2].iter_mut() {
665 f(el);
666 }
667 }
668 }
669 pub fn apply_to_blk4<F: (Fn(&mut Blk4Data))>(&mut self, f: F) {
670 let start = self.get_cur_blk4_idx(0);
671 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(4) {
672 for el in row[..4].iter_mut() {
673 f(el);
674 }
675 }
676 }
677
678 pub fn fill_ipred(&mut self, imode: IntraPredMode) {
679 self.apply_to_blk4(|blk| blk.ipred = imode);
680 }
681 pub fn fill_ncoded(&mut self, nc: u8) {
682 self.apply_to_blk4(|blk| blk.ncoded = nc);
683 self.apply_to_blk8(|blk| blk.ncoded_c = [nc; 2]);
684 }
685 pub fn reset_mb_mv(&mut self) {
686 self.apply_to_blk8(|blk| blk.ref_idx = [INVALID_REF; 2]);
687 }
688
689 pub fn get_mv_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> (usize, usize) {
690 let blk_no = xoff / 4 + yoff;
691 let mv_a = self.get_left_blk4(blk_no).mvd[ref_l];
692 let mv_b = self.get_top_blk4(blk_no).mvd[ref_l];
693 let mv = mv_a + mv_b;
694 let ctx0 = if mv.x < 3 { 0 } else if mv.x <= 32 { 1 } else { 2 };
695 let ctx1 = if mv.y < 3 { 0 } else if mv.y <= 32 { 1 } else { 2 };
696 (ctx0, ctx1)
697 }
698 pub fn get_mv_ref_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> usize {
699 let blk_no = xoff / 8 + (yoff / 8) * 2;
700 let mut ctx = 0;
701 let left_ref = self.get_left_blk8(blk_no).ref_idx[ref_l];
702 let top_ref = self.get_top_blk8(blk_no).ref_idx[ref_l];
703 if !left_ref.not_avail() && !left_ref.is_direct() && left_ref.index() > 0 {
704 ctx += 1;
705 }
706 if !top_ref.not_avail() && !top_ref.is_direct() && top_ref.index() > 0 {
707 ctx += 2;
708 }
709 ctx
710 }
711 #[allow(clippy::if_same_then_else)]
712 pub fn predict(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, diff_mv: MV, ref_idx: PicRef) {
713 let midx = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
714 let ridx = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
715 let ridx_c = self.get_cur_blk8_idx(0) + (xpos + bw) / 8 + ypos / 8 * self.blk8.stride - if (ypos & 4) == 0 { self.blk8.stride } else { 0 };
716
717 let mv_a = self.blk4.data[midx - 1].mv[ref_l];
718 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[ref_l];
719 let mut mv_c = self.blk4.data[midx - self.blk4.stride + bw / 4].mv[ref_l];
720
721 let rx = if (xpos & 4) != 0 { 0 } else { 1 };
722 let ry = if (ypos & 4) != 0 { 0 } else { self.blk8.stride };
723 let ref_a = self.blk8.data[ridx - rx].ref_idx[ref_l];
724 let ref_b = self.blk8.data[ridx - ry].ref_idx[ref_l];
725 let mut ref_c = self.blk8.data[ridx_c].ref_idx[ref_l];
726
727 if ref_c == MISSING_REF || (((xpos + bw) & 4) == 0 && (ypos & 4) != 0) {
728 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[ref_l];
729 ref_c = self.blk8.data[ridx - rx - ry].ref_idx[ref_l];
730 }
731
732 let pred_mv = if bw == 16 && bh == 8 && ypos == 0 && ref_b == ref_idx {
733 mv_b
734 } else if bw == 16 && bh == 8 && ypos != 0 && ref_a == ref_idx {
735 mv_a
736 } else if bw == 8 && bh == 16 && xpos == 0 && ref_a == ref_idx {
737 mv_a
738 } else if bw == 8 && bh == 16 && xpos != 0 && ref_c == ref_idx {
739 mv_c
740 } else if ref_b == MISSING_REF && ref_c == MISSING_REF {
741 mv_a
742 } else {
743 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
744 if count == 1 {
745 if ref_a == ref_idx {
746 mv_a
747 } else if ref_b == ref_idx {
748 mv_b
749 } else {
750 mv_c
751 }
752 } else {
753 MV::pred(mv_a, mv_b, mv_c)
754 }
755 };
756
757 let mv = pred_mv + diff_mv;
758 self.fill_mv (xpos, ypos, bw, bh, ref_l, mv);
759 self.fill_ref(xpos, ypos, bw, bh, ref_l, ref_idx);
760 }
761 pub fn predict_pskip(&mut self) {
762 let midx = self.get_cur_blk4_idx(0);
763 let ridx = self.get_cur_blk8_idx(0);
764
765 let mv_a = self.blk4.data[midx - 1].mv[0];
766 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[0];
767 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 4].mv[0];
768
769 let ref_a = self.blk8.data[ridx - 1].ref_idx[0];
770 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx[0];
771 let mut ref_c = self.blk8.data[ridx - self.blk8.stride + 2].ref_idx[0];
772
773 if ref_c == MISSING_REF {
774 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[0];
775 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx[0];
776 }
777
778 let ref_idx = ZERO_REF;
779 let mv = if ref_a == MISSING_REF || ref_b == MISSING_REF || (ref_a == ZERO_REF && mv_a == ZERO_MV) || (ref_b == ZERO_REF && mv_b == ZERO_MV) {
780 ZERO_MV
781 } else {
782 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
783 if count == 1 {
784 if ref_a == ref_idx {
785 mv_a
786 } else if ref_b == ref_idx {
787 mv_b
788 } else {
789 mv_c
790 }
791 } else {
792 MV::pred(mv_a, mv_b, mv_c)
793 }
794 };
795
796 self.fill_mv (0, 0, 16, 16, 0, mv);
797 self.fill_ref(0, 0, 16, 16, 0, ref_idx);
798 }
799 pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
800 let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
801 if direct_8x8 {
802 for blk4 in 0..16 {
803 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]);
804 self.get_cur_blk4(blk4).mv = [mv0, mv1];
805 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
806 }
807 } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv {
808 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0);
809 self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]);
810 self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]);
811 } else {
812 for blk4 in 0..16 {
813 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4);
814 self.get_cur_blk4(blk4).mv = [mv0, mv1];
815 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
816 }
817 }
818 }
819 pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
820 let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
821 let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
822 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
823 self.get_cur_blk4(blk4).mv = [mv0, mv1];
824 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
825 }
826 #[allow(clippy::nonminimal_bool)]
827 pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
828 let blk8 = blk4_to_blk8(blk4);
829 let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
830 (ZERO_MV, MISSING_POC, MISSING_REF)
831 } else if mbi.ref_poc[blk8][0] != MISSING_POC {
832 (mbi.mv[blk4][0], mbi.ref_poc[blk8][0], mbi.ref_idx[blk8][0])
833 } else {
834 (mbi.mv[blk4][1], mbi.ref_poc[blk8][1], mbi.ref_idx[blk8][1])
835 };
836 let (col_ref, r0_long) = frame_refs.map_ref0(r0_poc);
837 if temporal_mv {
838 let td = (i32::from(r1_poc) - i32::from(r0_poc)).max(-128).min(127);
839 if r0_long || td == 0 {
840 (col_mv, col_ref, ZERO_MV, ZERO_REF)
841 } else {
842 let tx = (16384 + (td / 2).abs()) / td;
843 let tb = (i32::from(cur_id) - i32::from(r0_poc)).max(-128).min(127);
844 let scale = ((tb * tx + 32) >> 6).max(-1024).min(1023);
845 let mv0 = MV {
846 x: ((i32::from(col_mv.x) * scale + 128) >> 8) as i16,
847 y: ((i32::from(col_mv.y) * scale + 128) >> 8) as i16,
848 };
849 let mv1 = mv0 - col_mv;
850 (mv0, col_ref, mv1, ZERO_REF)
851 }
852 } else {
853 let blk4 = 0; // we generate the same MV prediction for the whole MB
854 let blk8 = blk4_to_blk8(blk4);
855 let midx = self.get_cur_blk4_idx(blk4);
856 let ridx = self.get_cur_blk8_idx(blk8);
857 let ridx_c = self.get_cur_blk8_idx(blk8) + 16 / 8 - self.blk8.stride;
858
859 let mv_a = self.blk4.data[midx - 1].mv;
860 let mv_b = self.blk4.data[midx - self.blk4.stride].mv;
861 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 16 / 4].mv;
862
863 let ref_a = self.blk8.data[ridx - 1].ref_idx;
864 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx;
865 let mut ref_c = self.blk8.data[ridx_c].ref_idx;
866
867 if ref_c == [MISSING_REF; 2] {
868 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv;
869 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx;
870 }
871 let mut refs = [INVALID_REF; 2];
872 for cur_ref in [ref_a, ref_b, ref_c].iter() {
873 refs[0] = refs[0].min_pos(cur_ref[0]);
874 refs[1] = refs[1].min_pos(cur_ref[1]);
875 }
876 if refs == [INVALID_REF; 2] {
877 return (ZERO_MV, ZERO_REF, ZERO_MV, ZERO_REF);
878 }
879
880 let mut col_zero = true;
881 if r1_long || col_idx != ZERO_REF {
882 col_zero = false;
883 }
884 if col_mv.x.abs() > 1 || col_mv.y.abs() > 1 {
885 col_zero = false;
886 }
887 let mut mvs = [ZERO_MV; 2];
888 for ref_l in 0..2 {
889 if mbi.mb_type.is_intra() || (!refs[ref_l].not_avail() && !(refs[ref_l] == ZERO_REF && col_zero)) {
890 let ref_idx = refs[ref_l];
891 mvs[ref_l] = if ref_b[ref_l] == MISSING_REF && ref_c[ref_l] == MISSING_REF {
892 mv_a[ref_l]
893 } else {
894 let count = ((ref_a[ref_l] == ref_idx) as u8) + ((ref_b[ref_l] == ref_idx) as u8) + ((ref_c[ref_l] == ref_idx) as u8);
895 if count == 1 {
896 if ref_a[ref_l] == ref_idx {
897 mv_a[ref_l]
898 } else if ref_b[ref_l] == ref_idx {
899 mv_b[ref_l]
900 } else {
901 mv_c[ref_l]
902 }
903 } else {
904 MV::pred(mv_a[ref_l], mv_b[ref_l], mv_c[ref_l])
905 }
906 };
907 }
908 }
909 (mvs[0], refs[0], mvs[1], refs[1])
910 }
911 }
912 pub fn fill_mv(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
913 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
914 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
915 for blk in row[..bw / 4].iter_mut() {
916 blk.mv[ref_l] = mv;
917 }
918 }
919 }
920 pub fn fill_mvd(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
921 let mvd = MV{ x: mv.x.abs().min(128), y: mv.y.abs().min(128) };
922 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
923 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
924 for blk in row[..bw / 4].iter_mut() {
925 blk.mvd[ref_l] = mvd;
926 }
927 }
928 }
929 pub fn fill_ref(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, ref_idx: PicRef) {
930 let start = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
931 if bw < 8 || bh < 8 {
932 self.blk8.data[start].ref_idx[ref_l] = ref_idx;
933 } else {
934 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(bh / 8) {
935 for blk in row[..bw / 8].iter_mut() {
936 blk.ref_idx[ref_l] = ref_idx;
937 }
938 }
939 }
940 }
941 }
942
943 #[cfg(not(target_arch="x86_64"))]
944 fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
945 let mvd0 = mv1[0] - mv2[0];
946 let mvd1 = mv1[1] - mv2[1];
947 (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4)
948 }
949
950 #[cfg(target_arch="x86_64")]
951 fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
952 unsafe {
953 let mut flag = false;
954 let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr());
955 let mut m0 = *ptr;
956 let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr());
957 let mut m1 = *ptr;
958 for _ in 0..4 {
959 let tmp = m0.wrapping_sub(m1) as u16;
960 flag |= tmp.wrapping_add(3) > 6;
961 m0 >>= 16;
962 m1 >>= 16;
963 }
964 flag
965 }
966 }