h264: miscellaneous micro-optimisations
[nihav.git] / nihav-itu / src / codecs / h264 / types.rs
CommitLineData
22de733b 1use nihav_core::frame::NASimpleVideoFrame;
696e4e20
KS
2use nihav_codec_support::codecs::{MV, ZERO_MV};
3use nihav_codec_support::data::GenericCache;
56a17e69 4use super::SliceRefs;
15845d1a 5use super::pic_ref::FrameMBInfo;
696e4e20
KS
6
7#[repr(u8)]
8#[derive(Clone,Copy,Debug,PartialEq)]
9pub enum BMode {
10 L0,
11 L1,
12 Bi,
13}
14
e6aaad5c 15#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20 16pub enum MBType {
e6aaad5c 17 #[default]
696e4e20
KS
18 Intra4x4,
19 Intra8x8,
20 Intra16x16(u8, u8, u8),
21 PCM,
22
23 P16x16,
24 P16x8,
25 P8x16,
26 P8x8,
27 P8x8Ref0,
28 PSkip,
29
30 Direct,
31 B16x16(BMode),
32 B16x8(BMode, BMode),
33 B8x16(BMode, BMode),
34 B8x8,
35 BSkip,
36}
37
38impl MBType {
39 pub fn is_intra(self) -> bool {
42005e25 40 matches!(self, MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM)
696e4e20
KS
41 }
42 pub fn is_intra16x16(self) -> bool {
42005e25 43 matches!(self, MBType::Intra16x16(_, _, _))
696e4e20
KS
44 }
45 pub fn is_skip(self) -> bool {
42005e25 46 matches!(self, MBType::PSkip | MBType::BSkip)
696e4e20
KS
47 }
48 pub fn is_4x4(self) -> bool { self.num_parts() == 4 }
49 pub fn is_l0(self, part: usize) -> bool {
50 match self {
51 MBType::B16x16(mode) => mode == BMode::L0,
52 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
53 if part == 0 {
54 mode0 == BMode::L0
55 } else {
56 mode1 == BMode::L0
57 }
58 },
59 MBType::Direct | MBType::BSkip => false,
60 _ => true,
61 }
62 }
63 pub fn is_l1(self, part: usize) -> bool {
64 match self {
65 MBType::B16x16(mode) => mode == BMode::L1,
66 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
67 if part == 0 {
68 mode0 == BMode::L1
69 } else {
70 mode1 == BMode::L1
71 }
72 },
73 _ => false,
74 }
75 }
76 pub fn num_parts(self) -> usize {
77 match self {
78 MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM |
79 MBType::PSkip |
80 MBType::Direct | MBType::BSkip
81 => 1,
82 MBType::P16x16 |
83 MBType::B16x16(_)
84 => 1,
85 MBType::P16x8 | MBType::P8x16 |
86 MBType::B16x8(_, _) | MBType::B8x16(_, _)
87 => 2,
88 _ => 4,
89 }
90 }
91 pub fn size(self) -> (usize, usize) {
92 match self {
93 MBType::Intra4x4 |
94 MBType::Intra8x8 |
95 MBType::Intra16x16(_, _, _) |
96 MBType::PCM |
97 MBType::P16x16 |
98 MBType::PSkip |
99 MBType::Direct |
100 MBType::B16x16(_) |
101 MBType::BSkip
102 => (16, 16),
103 MBType::P16x8 | MBType::B16x8(_, _) => (16, 8),
104 MBType::P8x16 | MBType::B8x16(_, _) => (8, 16),
105 _ => (8, 8),
106 }
107 }
108}
109
e6aaad5c 110#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20
KS
111pub enum SubMBType {
112 P8x8,
113 P8x4,
114 P4x8,
115 P4x4,
e6aaad5c 116 #[default]
696e4e20
KS
117 Direct8x8,
118 B8x8(BMode),
119 B8x4(BMode),
120 B4x8(BMode),
121 B4x4(BMode),
122}
123
124impl SubMBType {
125 pub fn num_parts(self) -> usize {
126 match self {
127 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => 1,
128 SubMBType::P4x4 | SubMBType::B4x4(_) => 4,
129 _ => 2,
130 }
131 }
132 pub fn size(self) -> (usize, usize) {
133 match self {
134 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => (8, 8),
135 SubMBType::P8x4 | SubMBType::B8x4(_) => (8, 4),
136 SubMBType::P4x8 | SubMBType::B4x8(_) => (4, 8),
137 SubMBType::P4x4 | SubMBType::B4x4(_) => (4, 4),
138 }
139 }
140 pub fn is_l0(self) -> bool {
141 match self {
142 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
143 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
144 mode == BMode::L0
145 },
146 _ => true,
147 }
148 }
149 pub fn is_l1(self) -> bool {
150 match self {
151 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
152 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
153 mode == BMode::L1
154 },
155 _ => false,
156 }
157 }
158}
159
696e4e20 160#[repr(u8)]
e6aaad5c 161#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20
KS
162pub enum CompactMBType {
163 Intra4x4,
164 Intra8x8,
165 Intra16x16,
166 PCM,
167
168 P16x16,
169 P16x8,
170 P8x16,
171 P8x8,
172 P8x8Ref0,
173 PSkip,
174
175 Direct,
176 B16x16,
177 B16x8,
178 B8x16,
179 B8x8,
180 BSkip,
181
e6aaad5c 182 #[default]
696e4e20
KS
183 None,
184}
185
186impl CompactMBType {
187 pub fn is_intra(self) -> bool {
42005e25 188 matches!(self, CompactMBType::Intra4x4 | CompactMBType::Intra8x8 | CompactMBType::Intra16x16)
696e4e20
KS
189 }
190 pub fn is_intra16orpcm(self) -> bool {
42005e25 191 matches!(self, CompactMBType::Intra16x16 | CompactMBType::PCM)
696e4e20
KS
192 }
193 pub fn is_skip(self) -> bool {
42005e25 194 matches!(self, CompactMBType::PSkip | CompactMBType::BSkip)
696e4e20
KS
195 }
196 pub fn is_direct(self) -> bool {
42005e25 197 matches!(self, CompactMBType::BSkip | CompactMBType::Direct | CompactMBType::None)
696e4e20
KS
198 }
199 pub fn is_inter(self) -> bool {
200 !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM
201 }
495b7ec0 202 pub fn is_16x16_ref(self) -> bool {
42005e25 203 matches!(self,
495b7ec0
KS
204 CompactMBType::Intra4x4 |
205 CompactMBType::Intra8x8 |
206 CompactMBType::Intra16x16 |
207 CompactMBType::PCM |
208 CompactMBType::P16x16 |
42005e25 209 CompactMBType::B16x16)
696e4e20
KS
210 }
211}
212
696e4e20
KS
213impl From<MBType> for CompactMBType {
214 fn from(mbtype: MBType) -> Self {
215 match mbtype {
216 MBType::Intra4x4 => CompactMBType::Intra4x4,
217 MBType::Intra8x8 => CompactMBType::Intra8x8,
218 MBType::Intra16x16(_, _, _) => CompactMBType::Intra16x16,
219 MBType::PCM => CompactMBType::PCM,
220 MBType::P16x16 => CompactMBType::P16x16,
221 MBType::P16x8 => CompactMBType::P16x8,
222 MBType::P8x16 => CompactMBType::P8x16,
223 MBType::P8x8 => CompactMBType::P8x8,
224 MBType::P8x8Ref0 => CompactMBType::P8x8Ref0,
225 MBType::PSkip => CompactMBType::PSkip,
226 MBType::Direct => CompactMBType::Direct,
227 MBType::B16x16(_) => CompactMBType::B16x16,
228 MBType::B16x8(_, _) => CompactMBType::B16x8,
229 MBType::B8x16(_, _) => CompactMBType::B8x16,
230 MBType::B8x8 => CompactMBType::B8x8,
231 MBType::BSkip => CompactMBType::BSkip,
232 }
233 }
234}
235
236#[repr(u8)]
e6aaad5c 237#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20
KS
238pub enum IntraPredMode {
239 Vertical,
240 Horizontal,
241 DC,
242 DiagDownLeft,
243 DiagDownRight,
244 VerRight,
245 HorDown,
246 VerLeft,
247 HorUp,
e6aaad5c 248 #[default]
696e4e20
KS
249 None,
250}
251
252impl IntraPredMode {
253 pub fn is_none(self) -> bool { self == IntraPredMode::None }
254 pub fn into_pred_idx(self) -> i8 {
255 if !self.is_none() {
256 self as u8 as i8
257 } else {
258 -1
259 }
260 }
261}
262
696e4e20
KS
263impl From<u8> for IntraPredMode {
264 fn from(val: u8) -> Self {
265 match val {
266 0 => IntraPredMode::Vertical,
267 1 => IntraPredMode::Horizontal,
268 2 => IntraPredMode::DC,
269 3 => IntraPredMode::DiagDownLeft,
270 4 => IntraPredMode::DiagDownRight,
271 5 => IntraPredMode::VerRight,
272 6 => IntraPredMode::HorDown,
273 7 => IntraPredMode::VerLeft,
274 8 => IntraPredMode::HorUp,
275 _ => IntraPredMode::None,
276 }
277 }
278}
279
42005e25
KS
280impl From<IntraPredMode> for u8 {
281 fn from(val: IntraPredMode) -> Self {
282 match val {
696e4e20
KS
283 IntraPredMode::Vertical => 0,
284 IntraPredMode::Horizontal => 1,
285 IntraPredMode::DC => 2,
286 IntraPredMode::DiagDownLeft => 3,
287 IntraPredMode::DiagDownRight => 4,
288 IntraPredMode::VerRight => 5,
289 IntraPredMode::HorDown => 6,
290 IntraPredMode::VerLeft => 7,
291 IntraPredMode::HorUp => 8,
292 _ => 9,
293 }
294 }
295}
296
297pub const MISSING_POC: u16 = 0xFFFF;
298
299#[derive(Clone,Copy,Debug)]
300pub struct PicRef {
301 ref_idx: u8
302}
303
304pub const MISSING_REF: PicRef = PicRef { ref_idx: 0xFF };
305pub const INVALID_REF: PicRef = PicRef { ref_idx: 0xFE };
306pub const ZERO_REF: PicRef = PicRef { ref_idx: 0 };
307const DIRECT_FLAG: u8 = 0x40;
308
309impl PicRef {
310 pub fn new(ref_idx: u8) -> Self {
311 Self { ref_idx }
312 }
313 pub fn not_avail(self) -> bool {
314 self == MISSING_REF || self == INVALID_REF
315 }
316 pub fn index(self) -> usize { (self.ref_idx & !DIRECT_FLAG) as usize }
317 pub fn is_direct(self) -> bool { (self.ref_idx & DIRECT_FLAG) != 0 }
318 pub fn set_direct(&mut self) { self.ref_idx |= DIRECT_FLAG; }
319 fn min_pos(self, other: Self) -> Self {
320 match (self.not_avail(), other.not_avail()) {
321 (true, true) => self,
322 (false, true) => self,
323 (true, false) => other,
324 (false, false) => PicRef::new((self.ref_idx & !DIRECT_FLAG).min(other.ref_idx & !DIRECT_FLAG)),
325 }
326 }
327}
328
329impl Default for PicRef {
330 fn default() -> Self { MISSING_REF }
331}
332
333impl PartialEq for PicRef {
334 fn eq(&self, other: &Self) -> bool {
335 (self.ref_idx | DIRECT_FLAG) == (other.ref_idx | DIRECT_FLAG)
336 }
337}
338
339impl std::fmt::Display for PicRef {
340 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341 if *self == MISSING_REF {
342 write!(f, "-1")
343 } else if *self == INVALID_REF {
344 write!(f, "-2")
345 } else {
346 write!(f, "{}", self.ref_idx & !DIRECT_FLAG)
347 }
348 }
349}
350
351#[derive(Clone,Copy,Default)]
352pub struct MBData {
353 pub mb_type: CompactMBType,
354 pub cbp: u8,
355 pub coded_flags: u32,
356 pub cmode: u8,
357 pub qp_y: u8,
358 pub qp_u: u8,
359 pub qp_v: u8,
360 pub transform_8x8: bool,
361}
362
363pub fn blk4_to_blk8(blk4: usize) -> usize {
364 const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ];
365 MAP[blk4 & 0xF]
366}
367
368#[derive(Clone,Copy)]
369pub struct Blk8Data {
370 pub ref_idx: [PicRef; 2],
371 pub ncoded_c: [u8; 2],
372}
373
374impl Default for Blk8Data {
375 fn default() -> Self {
376 Self {
377 ref_idx: [MISSING_REF; 2],
378 ncoded_c: [0; 2],
379 }
380 }
381}
382
383#[derive(Clone,Copy,Default)]
384pub struct Blk4Data {
385 pub ncoded: u8,
386 pub ipred: IntraPredMode,
387 pub mv: [MV; 2],
388 pub mvd: [MV; 2],
389}
390
391pub struct SliceState {
392 pub mb_x: usize,
393 pub mb_y: usize,
394 pub mb_w: usize,
395 pub mb_h: usize,
396 pub mb_start: usize,
397
398 pub mb: GenericCache<MBData>,
399 pub blk8: GenericCache<Blk8Data>,
400 pub blk4: GenericCache<Blk4Data>,
401
22de733b 402 pub deblock: [u8; 16],
696e4e20
KS
403
404 pub has_top: bool,
405 pub has_left: bool,
22de733b
KS
406
407 pub top_line_y: Vec<u8>,
408 pub left_y: [u8; 17], // first element is top-left
409 pub top_line_c: [Vec<u8>; 2],
410 pub left_c: [[u8; 9]; 2],
696e4e20
KS
411}
412
d85f94f7
KS
413const BLK4_TO_D8: [usize; 16] = [ 0, 0, 3, 3, 0, 0, 3, 3, 12, 12, 15, 15, 12, 12, 15, 15 ];
414
696e4e20
KS
415impl SliceState {
416 pub fn new() -> Self {
417 Self {
418 mb_x: 0,
419 mb_y: 0,
420 mb_w: 0,
421 mb_h: 0,
422 mb_start: 0,
423 mb: GenericCache::new(0, 0, MBData::default()),
424 blk8: GenericCache::new(0, 0, Blk8Data::default()),
425 blk4: GenericCache::new(0, 0, Blk4Data::default()),
426
22de733b 427 deblock: [0; 16],
696e4e20
KS
428
429 has_top: false,
430 has_left: false,
22de733b
KS
431
432 top_line_y: Vec::new(),
433 left_y: [0; 17],
434 top_line_c: [Vec::new(), Vec::new()],
435 left_c: [[0; 9]; 2],
696e4e20
KS
436 }
437 }
438 pub fn reset(&mut self, mb_w: usize, mb_h: usize, mb_pos: usize) {
439 self.mb_w = mb_w;
440 self.mb_h = mb_h;
441 self.mb_start = mb_pos;
442 if mb_w > 0 {
443 self.mb_x = mb_pos % mb_w;
444 self.mb_y = mb_pos / mb_w;
445 } else {
446 self.mb_x = 0;
447 self.mb_y = 0;
448 }
449 self.mb = GenericCache::new(1, mb_w + 2, MBData::default());
450 self.blk8 = GenericCache::new(2, mb_w * 2 + 2, Blk8Data::default());
451 self.blk4 = GenericCache::new(4, mb_w * 4 + 2, Blk4Data::default());
452
696e4e20
KS
453 self.has_top = false;
454 self.has_left = false;
22de733b
KS
455
456 self.top_line_y.resize(mb_w * 16 + 1, 0x80);
457 self.top_line_c[0].resize(mb_w * 8 + 1, 0x80);
458 self.top_line_c[1].resize(mb_w * 8 + 1, 0x80);
459 self.left_y = [0x80; 17];
460 self.left_c = [[0x80; 9]; 2];
696e4e20 461 }
22de733b
KS
462 pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame<u8>) {
463 let dstoff = self.mb_x * 16;
464 let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0];
465 self.left_y[0] = self.top_line_y[dstoff + 15];
466 self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]);
467 for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) {
468 *dst = src[15];
469 }
470 for chroma in 0..2 {
471 let cstride = frm.stride[chroma + 1];
472 let dstoff = self.mb_x * 8;
473 let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride;
474 self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7];
475 self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]);
476 for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) {
477 *dst = src[7];
478 }
479 }
480 }
56a17e69 481 pub fn fill_deblock(&mut self, frefs: &SliceRefs, deblock_mode: u8, is_s: bool) {
696e4e20
KS
482 if deblock_mode == 1 {
483 return;
484 }
485
22de733b
KS
486 self.deblock = [0; 16];
487
696e4e20
KS
488 let tx8x8 = self.get_cur_mb().transform_8x8;
489
15845d1a
KS
490 let cur_intra = self.get_cur_mb().mb_type.is_intra();
491 let left_intra = self.get_left_mb().mb_type.is_intra();
492 let mut top_intra = self.get_top_mb().mb_type.is_intra();
ef19a935
KS
493
494 let mut coded_cache = [false; 25];
495 let mut mv_cache = [[ZERO_MV; 2]; 25];
496 let mut ref_cache = [[INVALID_REF; 2]; 25];
497
498 if self.mb_y != 0 || self.has_top {
499 for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() {
500 let blk4 = self.get_top_blk4(x);
501 *cc = blk4.ncoded != 0;
502 *mv = blk4.mv;
503 if (x & 1) == 0 {
504 let blk8 = self.get_top_blk8(x / 2);
505 ref_cache[x + 1] = blk8.ref_idx;
506 } else {
507 ref_cache[x + 1] = ref_cache[x];
508 }
509 }
510 }
511 for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip(
512 mv_cache[5..].chunks_exact_mut(5)).enumerate() {
513 if self.has_left || self.mb_x != 0 {
514 let blk4 = self.get_left_blk4(y * 4);
515 ccs[0] = blk4.ncoded != 0;
516 mvs[0] = blk4.mv;
517 if (y & 1) == 0 {
518 let blk8 = self.get_left_blk8(y);
519 ref_cache[y * 5 + 5] = blk8.ref_idx;
520 } else {
521 ref_cache[y * 5 + 5] = ref_cache[y * 5];
522 }
523 }
524 for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() {
525 let blk4 = self.get_cur_blk4(x + y * 4);
526 *cc = blk4.ncoded != 0;
527 *mv = blk4.mv;
528 ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) {
529 self.get_cur_blk8(x / 2 + y).ref_idx
530 } else {
531 ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5]
532 };
533 }
534 }
535
536 for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in
537 coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip(
538 mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip(
539 ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() {
696e4e20 540 let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2));
22de733b 541 if can_do_top && (!tx8x8 || (y & 1) == 0) {
15845d1a 542 if is_s || cur_intra || top_intra {
696e4e20 543 let val = if y == 0 { 0x40 } else { 0x30 };
22de733b 544 for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; }
696e4e20 545 } else {
ef19a935
KS
546 for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in
547 cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip(
548 cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip(
549 cur_refs[1..].iter().zip(
550 top_refs[1..].iter())).take(4).enumerate() {
551 if cur_cc || top_cc {
22de733b 552 self.deblock[y * 4 + x] |= 0x20;
696e4e20 553 } else {
754ab49a 554 if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) {
22de733b 555 self.deblock[y * 4 + x] |= 0x10;
696e4e20
KS
556 }
557 }
558 }
559 }
560 }
15845d1a 561 let mut lleft_intra = left_intra;
ef19a935
KS
562 for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in
563 cur_ccs[1..].iter().zip(cur_ccs.iter()).zip(
564 cur_mvs[1..].iter().zip(cur_mvs.iter())).zip(
565 cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() {
22de733b 566 let skip_8 = tx8x8 && (x & 1) != 0;
696e4e20
KS
567 let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2);
568 if !can_do_left {
569 continue;
570 }
22de733b 571 if skip_8 {
15845d1a 572 } else if is_s || cur_intra || lleft_intra {
22de733b 573 self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 };
ef19a935 574 } else if cur_cc || left_cc {
22de733b 575 self.deblock[y * 4 + x] |= 2;
696e4e20 576 } else {
754ab49a 577 if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) {
22de733b 578 self.deblock[y * 4 + x] |= 1;
696e4e20
KS
579 }
580 }
15845d1a 581 lleft_intra = cur_intra;
696e4e20 582 }
15845d1a 583 top_intra = cur_intra;
696e4e20
KS
584 }
585 }
586 pub fn next_mb(&mut self) {
587 self.mb_x += 1;
588 self.has_left = true;
589 if self.mb_x == self.mb_w {
590 self.mb_x = 0;
591 self.mb_y += 1;
592 self.mb.update_row();
593 self.blk8.update_row();
594 self.blk4.update_row();
595
696e4e20
KS
596 self.has_left = false;
597 }
598 self.has_top = self.mb_x + self.mb_y * self.mb_w >= self.mb_start + self.mb_w;
599 }
600 pub fn get_cur_mb_idx(&self) -> usize { self.mb.xpos + self.mb_x }
601 pub fn get_cur_blk8_idx(&self, blk_no: usize) -> usize {
602 self.blk8.xpos + self.mb_x * 2 + (blk_no & 1) + (blk_no >> 1) * self.blk8.stride
603 }
604 pub fn get_cur_blk4_idx(&self, blk_no: usize) -> usize {
605 self.blk4.xpos + self.mb_x * 4 + (blk_no & 3) + (blk_no >> 2) * self.blk4.stride
606 }
607 pub fn get_cur_mb(&mut self) -> &mut MBData {
608 let idx = self.get_cur_mb_idx();
609 &mut self.mb.data[idx]
610 }
611 pub fn get_left_mb(&self) -> &MBData {
612 &self.mb.data[self.get_cur_mb_idx() - 1]
613 }
614 pub fn get_top_mb(&self) -> &MBData {
615 &self.mb.data[self.get_cur_mb_idx() - self.mb.stride]
616 }
617 pub fn get_cur_blk8(&mut self, blk_no: usize) -> &mut Blk8Data {
618 let idx = self.get_cur_blk8_idx(blk_no);
619 &mut self.blk8.data[idx]
620 }
621 pub fn get_left_blk8(&self, blk_no: usize) -> &Blk8Data {
622 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - 1]
623 }
624 pub fn get_top_blk8(&self, blk_no: usize) -> &Blk8Data {
625 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - self.blk8.stride]
626 }
627 pub fn get_cur_blk4(&mut self, blk_no: usize) -> &mut Blk4Data {
628 let idx = self.get_cur_blk4_idx(blk_no);
629 &mut self.blk4.data[idx]
630 }
631 pub fn get_left_blk4(&self, blk_no: usize) -> &Blk4Data {
632 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - 1]
633 }
634 pub fn get_top_blk4(&self, blk_no: usize) -> &Blk4Data {
635 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - self.blk4.stride]
636 }
637
638 pub fn apply_to_blk8<F: (Fn(&mut Blk8Data))>(&mut self, f: F) {
639 let start = self.get_cur_blk8_idx(0);
640 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(2) {
641 for el in row[..2].iter_mut() {
642 f(el);
643 }
644 }
645 }
646 pub fn apply_to_blk4<F: (Fn(&mut Blk4Data))>(&mut self, f: F) {
647 let start = self.get_cur_blk4_idx(0);
648 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(4) {
649 for el in row[..4].iter_mut() {
650 f(el);
651 }
652 }
653 }
654
655 pub fn fill_ipred(&mut self, imode: IntraPredMode) {
656 self.apply_to_blk4(|blk| blk.ipred = imode);
657 }
658 pub fn fill_ncoded(&mut self, nc: u8) {
659 self.apply_to_blk4(|blk| blk.ncoded = nc);
660 self.apply_to_blk8(|blk| blk.ncoded_c = [nc; 2]);
661 }
662 pub fn reset_mb_mv(&mut self) {
663 self.apply_to_blk8(|blk| blk.ref_idx = [INVALID_REF; 2]);
664 }
665
666 pub fn get_mv_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> (usize, usize) {
667 let blk_no = xoff / 4 + yoff;
668 let mv_a = self.get_left_blk4(blk_no).mvd[ref_l];
669 let mv_b = self.get_top_blk4(blk_no).mvd[ref_l];
670 let mv = mv_a + mv_b;
671 let ctx0 = if mv.x < 3 { 0 } else if mv.x <= 32 { 1 } else { 2 };
672 let ctx1 = if mv.y < 3 { 0 } else if mv.y <= 32 { 1 } else { 2 };
673 (ctx0, ctx1)
674 }
675 pub fn get_mv_ref_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> usize {
676 let blk_no = xoff / 8 + (yoff / 8) * 2;
677 let mut ctx = 0;
678 let left_ref = self.get_left_blk8(blk_no).ref_idx[ref_l];
679 let top_ref = self.get_top_blk8(blk_no).ref_idx[ref_l];
680 if !left_ref.not_avail() && !left_ref.is_direct() && left_ref.index() > 0 {
681 ctx += 1;
682 }
683 if !top_ref.not_avail() && !top_ref.is_direct() && top_ref.index() > 0 {
684 ctx += 2;
685 }
686 ctx
687 }
4a1ca15c 688 #[allow(clippy::if_same_then_else)]
696e4e20
KS
689 pub fn predict(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, diff_mv: MV, ref_idx: PicRef) {
690 let midx = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
691 let ridx = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
692 let ridx_c = self.get_cur_blk8_idx(0) + (xpos + bw) / 8 + ypos / 8 * self.blk8.stride - if (ypos & 4) == 0 { self.blk8.stride } else { 0 };
693
694 let mv_a = self.blk4.data[midx - 1].mv[ref_l];
695 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[ref_l];
696 let mut mv_c = self.blk4.data[midx - self.blk4.stride + bw / 4].mv[ref_l];
697
698 let rx = if (xpos & 4) != 0 { 0 } else { 1 };
699 let ry = if (ypos & 4) != 0 { 0 } else { self.blk8.stride };
700 let ref_a = self.blk8.data[ridx - rx].ref_idx[ref_l];
701 let ref_b = self.blk8.data[ridx - ry].ref_idx[ref_l];
702 let mut ref_c = self.blk8.data[ridx_c].ref_idx[ref_l];
703
704 if ref_c == MISSING_REF || (((xpos + bw) & 4) == 0 && (ypos & 4) != 0) {
705 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[ref_l];
706 ref_c = self.blk8.data[ridx - rx - ry].ref_idx[ref_l];
707 }
708
709 let pred_mv = if bw == 16 && bh == 8 && ypos == 0 && ref_b == ref_idx {
710 mv_b
711 } else if bw == 16 && bh == 8 && ypos != 0 && ref_a == ref_idx {
712 mv_a
713 } else if bw == 8 && bh == 16 && xpos == 0 && ref_a == ref_idx {
714 mv_a
715 } else if bw == 8 && bh == 16 && xpos != 0 && ref_c == ref_idx {
716 mv_c
717 } else if ref_b == MISSING_REF && ref_c == MISSING_REF {
718 mv_a
719 } else {
720 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
721 if count == 1 {
722 if ref_a == ref_idx {
723 mv_a
724 } else if ref_b == ref_idx {
725 mv_b
726 } else {
727 mv_c
728 }
729 } else {
730 MV::pred(mv_a, mv_b, mv_c)
731 }
732 };
733
734 let mv = pred_mv + diff_mv;
735 self.fill_mv (xpos, ypos, bw, bh, ref_l, mv);
736 self.fill_ref(xpos, ypos, bw, bh, ref_l, ref_idx);
737 }
738 pub fn predict_pskip(&mut self) {
739 let midx = self.get_cur_blk4_idx(0);
740 let ridx = self.get_cur_blk8_idx(0);
741
742 let mv_a = self.blk4.data[midx - 1].mv[0];
743 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[0];
744 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 4].mv[0];
745
746 let ref_a = self.blk8.data[ridx - 1].ref_idx[0];
747 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx[0];
748 let mut ref_c = self.blk8.data[ridx - self.blk8.stride + 2].ref_idx[0];
749
750 if ref_c == MISSING_REF {
751 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[0];
752 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx[0];
753 }
754
755 let ref_idx = ZERO_REF;
756 let mv = if ref_a == MISSING_REF || ref_b == MISSING_REF || (ref_a == ZERO_REF && mv_a == ZERO_MV) || (ref_b == ZERO_REF && mv_b == ZERO_MV) {
757 ZERO_MV
758 } else {
759 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
760 if count == 1 {
761 if ref_a == ref_idx {
762 mv_a
763 } else if ref_b == ref_idx {
764 mv_b
765 } else {
766 mv_c
767 }
768 } else {
769 MV::pred(mv_a, mv_b, mv_c)
770 }
771 };
772
773 self.fill_mv (0, 0, 16, 16, 0, mv);
774 self.fill_ref(0, 0, 16, 16, 0, ref_idx);
775 }
56a17e69 776 pub fn predict_direct_mb(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
15845d1a 777 let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
d85f94f7
KS
778 if direct_8x8 {
779 for blk4 in 0..16 {
15845d1a 780 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]);
d85f94f7
KS
781 self.get_cur_blk4(blk4).mv = [mv0, mv1];
782 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
783 }
784 } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv {
15845d1a 785 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0);
696e4e20
KS
786 self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]);
787 self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]);
788 } else {
789 for blk4 in 0..16 {
15845d1a 790 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4);
696e4e20
KS
791 self.get_cur_blk4(blk4).mv = [mv0, mv1];
792 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
793 }
794 }
795 }
56a17e69 796 pub fn predict_direct_sub(&mut self, frame_refs: &SliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
d85f94f7 797 let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
15845d1a
KS
798 let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
799 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
4a1ca15c
KS
800 self.get_cur_blk4(blk4).mv = [mv0, mv1];
801 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
696e4e20 802 }
42005e25 803 #[allow(clippy::nonminimal_bool)]
56a17e69 804 pub fn get_direct_mv(&self, frame_refs: &SliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
696e4e20
KS
805 let blk8 = blk4_to_blk8(blk4);
806 let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
807 (ZERO_MV, MISSING_POC, MISSING_REF)
808 } else if mbi.ref_poc[blk8][0] != MISSING_POC {
809 (mbi.mv[blk4][0], mbi.ref_poc[blk8][0], mbi.ref_idx[blk8][0])
810 } else {
811 (mbi.mv[blk4][1], mbi.ref_poc[blk8][1], mbi.ref_idx[blk8][1])
812 };
813 let (col_ref, r0_long) = frame_refs.map_ref0(r0_poc);
814 if temporal_mv {
815 let td = (i32::from(r1_poc) - i32::from(r0_poc)).max(-128).min(127);
816 if r0_long || td == 0 {
817 (col_mv, col_ref, ZERO_MV, ZERO_REF)
818 } else {
819 let tx = (16384 + (td / 2).abs()) / td;
820 let tb = (i32::from(cur_id) - i32::from(r0_poc)).max(-128).min(127);
821 let scale = ((tb * tx + 32) >> 6).max(-1024).min(1023);
822 let mv0 = MV {
823 x: ((i32::from(col_mv.x) * scale + 128) >> 8) as i16,
824 y: ((i32::from(col_mv.y) * scale + 128) >> 8) as i16,
825 };
826 let mv1 = mv0 - col_mv;
827 (mv0, col_ref, mv1, ZERO_REF)
828 }
829 } else {
830 let blk4 = 0; // we generate the same MV prediction for the whole MB
831 let blk8 = blk4_to_blk8(blk4);
832 let midx = self.get_cur_blk4_idx(blk4);
833 let ridx = self.get_cur_blk8_idx(blk8);
834 let ridx_c = self.get_cur_blk8_idx(blk8) + 16 / 8 - self.blk8.stride;
835
836 let mv_a = self.blk4.data[midx - 1].mv;
837 let mv_b = self.blk4.data[midx - self.blk4.stride].mv;
838 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 16 / 4].mv;
839
840 let ref_a = self.blk8.data[ridx - 1].ref_idx;
841 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx;
842 let mut ref_c = self.blk8.data[ridx_c].ref_idx;
843
844 if ref_c == [MISSING_REF; 2] {
845 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv;
846 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx;
847 }
848 let mut refs = [INVALID_REF; 2];
849 for cur_ref in [ref_a, ref_b, ref_c].iter() {
850 refs[0] = refs[0].min_pos(cur_ref[0]);
851 refs[1] = refs[1].min_pos(cur_ref[1]);
852 }
853 if refs == [INVALID_REF; 2] {
854 return (ZERO_MV, ZERO_REF, ZERO_MV, ZERO_REF);
855 }
856
857 let mut col_zero = true;
858 if r1_long || col_idx != ZERO_REF {
859 col_zero = false;
860 }
861 if col_mv.x.abs() > 1 || col_mv.y.abs() > 1 {
862 col_zero = false;
863 }
864 let mut mvs = [ZERO_MV; 2];
865 for ref_l in 0..2 {
866 if mbi.mb_type.is_intra() || (!refs[ref_l].not_avail() && !(refs[ref_l] == ZERO_REF && col_zero)) {
867 let ref_idx = refs[ref_l];
868 mvs[ref_l] = if ref_b[ref_l] == MISSING_REF && ref_c[ref_l] == MISSING_REF {
869 mv_a[ref_l]
870 } else {
871 let count = ((ref_a[ref_l] == ref_idx) as u8) + ((ref_b[ref_l] == ref_idx) as u8) + ((ref_c[ref_l] == ref_idx) as u8);
872 if count == 1 {
873 if ref_a[ref_l] == ref_idx {
874 mv_a[ref_l]
875 } else if ref_b[ref_l] == ref_idx {
876 mv_b[ref_l]
877 } else {
878 mv_c[ref_l]
879 }
880 } else {
881 MV::pred(mv_a[ref_l], mv_b[ref_l], mv_c[ref_l])
882 }
883 };
884 }
885 }
886 (mvs[0], refs[0], mvs[1], refs[1])
887 }
888 }
889 pub fn fill_mv(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
890 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
891 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
892 for blk in row[..bw / 4].iter_mut() {
893 blk.mv[ref_l] = mv;
894 }
895 }
896 }
897 pub fn fill_mvd(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
898 let mvd = MV{ x: mv.x.abs().min(128), y: mv.y.abs().min(128) };
899 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
900 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
901 for blk in row[..bw / 4].iter_mut() {
902 blk.mvd[ref_l] = mvd;
903 }
904 }
905 }
906 pub fn fill_ref(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, ref_idx: PicRef) {
907 let start = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
908 if bw < 8 || bh < 8 {
909 self.blk8.data[start].ref_idx[ref_l] = ref_idx;
910 } else {
911 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(bh / 8) {
912 for blk in row[..bw / 8].iter_mut() {
913 blk.ref_idx[ref_l] = ref_idx;
914 }
915 }
916 }
917 }
918}
919
754ab49a
KS
920#[cfg(not(target_arch="x86_64"))]
921fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
922 let mvd0 = mv1[0] - mv2[0];
923 let mvd1 = mv1[1] - mv2[1];
924 (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4)
925}
926
927#[cfg(target_arch="x86_64")]
928fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
929 unsafe {
930 let mut flag = false;
931 let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr());
932 let mut m0 = *ptr;
933 let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr());
934 let mut m1 = *ptr;
935 for _ in 0..4 {
936 let tmp = m0.wrapping_sub(m1) as u16;
937 flag |= tmp.wrapping_add(3) > 6;
938 m0 >>= 16;
939 m1 >>= 16;
940 }
941 flag
942 }
696e4e20 943}