h264: more micro-optimisations
[nihav.git] / nihav-itu / src / codecs / h264 / types.rs
CommitLineData
5f223cdb 1use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame};
696e4e20
KS
2use nihav_codec_support::codecs::{MV, ZERO_MV};
3use nihav_codec_support::data::GenericCache;
5f223cdb 4use super::SimplifiedSliceRefs;
15845d1a 5use super::pic_ref::FrameMBInfo;
696e4e20 6
5f223cdb
KS
7#[derive(Clone,Copy)]
8pub struct SimpleFrame<'a> {
9 pub data: &'a [u8],
10 pub offset: [usize; 3],
11 pub stride: [usize; 3],
12}
13
14impl<'a> SimpleFrame<'a> {
15 pub fn new(buf: &'a NAVideoBuffer<u8>) -> Self {
16 let mut offset = [0; 3];
17 let mut stride = [0; 3];
18 for (plane, (offs, strd)) in offset.iter_mut().zip(stride.iter_mut()).enumerate() {
19 *offs = buf.get_offset(plane);
20 *strd = buf.get_stride(plane);
21 }
22 Self {
23 data: buf.get_data(),
24 offset, stride
25 }
26 }
27}
28
696e4e20
KS
29#[repr(u8)]
30#[derive(Clone,Copy,Debug,PartialEq)]
31pub enum BMode {
32 L0,
33 L1,
34 Bi,
35}
36
e6aaad5c 37#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20 38pub enum MBType {
e6aaad5c 39 #[default]
696e4e20
KS
40 Intra4x4,
41 Intra8x8,
42 Intra16x16(u8, u8, u8),
43 PCM,
44
45 P16x16,
46 P16x8,
47 P8x16,
48 P8x8,
49 P8x8Ref0,
50 PSkip,
51
52 Direct,
53 B16x16(BMode),
54 B16x8(BMode, BMode),
55 B8x16(BMode, BMode),
56 B8x8,
57 BSkip,
58}
59
60impl MBType {
61 pub fn is_intra(self) -> bool {
42005e25 62 matches!(self, MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM)
696e4e20
KS
63 }
64 pub fn is_intra16x16(self) -> bool {
42005e25 65 matches!(self, MBType::Intra16x16(_, _, _))
696e4e20
KS
66 }
67 pub fn is_skip(self) -> bool {
42005e25 68 matches!(self, MBType::PSkip | MBType::BSkip)
696e4e20
KS
69 }
70 pub fn is_4x4(self) -> bool { self.num_parts() == 4 }
71 pub fn is_l0(self, part: usize) -> bool {
72 match self {
73 MBType::B16x16(mode) => mode == BMode::L0,
74 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
75 if part == 0 {
76 mode0 == BMode::L0
77 } else {
78 mode1 == BMode::L0
79 }
80 },
81 MBType::Direct | MBType::BSkip => false,
82 _ => true,
83 }
84 }
85 pub fn is_l1(self, part: usize) -> bool {
86 match self {
87 MBType::B16x16(mode) => mode == BMode::L1,
88 MBType::B16x8(mode0, mode1) | MBType::B8x16(mode0, mode1) => {
89 if part == 0 {
90 mode0 == BMode::L1
91 } else {
92 mode1 == BMode::L1
93 }
94 },
95 _ => false,
96 }
97 }
98 pub fn num_parts(self) -> usize {
99 match self {
100 MBType::Intra4x4 | MBType::Intra8x8 | MBType::Intra16x16(_, _, _) | MBType::PCM |
101 MBType::PSkip |
102 MBType::Direct | MBType::BSkip
103 => 1,
104 MBType::P16x16 |
105 MBType::B16x16(_)
106 => 1,
107 MBType::P16x8 | MBType::P8x16 |
108 MBType::B16x8(_, _) | MBType::B8x16(_, _)
109 => 2,
110 _ => 4,
111 }
112 }
113 pub fn size(self) -> (usize, usize) {
114 match self {
115 MBType::Intra4x4 |
116 MBType::Intra8x8 |
117 MBType::Intra16x16(_, _, _) |
118 MBType::PCM |
119 MBType::P16x16 |
120 MBType::PSkip |
121 MBType::Direct |
122 MBType::B16x16(_) |
123 MBType::BSkip
124 => (16, 16),
125 MBType::P16x8 | MBType::B16x8(_, _) => (16, 8),
126 MBType::P8x16 | MBType::B8x16(_, _) => (8, 16),
127 _ => (8, 8),
128 }
129 }
130}
131
e6aaad5c 132#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20
KS
133pub enum SubMBType {
134 P8x8,
135 P8x4,
136 P4x8,
137 P4x4,
e6aaad5c 138 #[default]
696e4e20
KS
139 Direct8x8,
140 B8x8(BMode),
141 B8x4(BMode),
142 B4x8(BMode),
143 B4x4(BMode),
144}
145
146impl SubMBType {
147 pub fn num_parts(self) -> usize {
148 match self {
149 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => 1,
150 SubMBType::P4x4 | SubMBType::B4x4(_) => 4,
151 _ => 2,
152 }
153 }
154 pub fn size(self) -> (usize, usize) {
155 match self {
156 SubMBType::P8x8 | SubMBType::Direct8x8 | SubMBType::B8x8(_) => (8, 8),
157 SubMBType::P8x4 | SubMBType::B8x4(_) => (8, 4),
158 SubMBType::P4x8 | SubMBType::B4x8(_) => (4, 8),
159 SubMBType::P4x4 | SubMBType::B4x4(_) => (4, 4),
160 }
161 }
162 pub fn is_l0(self) -> bool {
163 match self {
164 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
165 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
166 mode == BMode::L0
167 },
168 _ => true,
169 }
170 }
171 pub fn is_l1(self) -> bool {
172 match self {
173 SubMBType::B8x8(mode) | SubMBType::B8x4(mode) |
174 SubMBType::B4x8(mode) | SubMBType::B4x4(mode) => {
175 mode == BMode::L1
176 },
177 _ => false,
178 }
179 }
180}
181
696e4e20 182#[repr(u8)]
e6aaad5c 183#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20
KS
184pub enum CompactMBType {
185 Intra4x4,
186 Intra8x8,
187 Intra16x16,
188 PCM,
189
190 P16x16,
191 P16x8,
192 P8x16,
193 P8x8,
194 P8x8Ref0,
195 PSkip,
196
197 Direct,
198 B16x16,
199 B16x8,
200 B8x16,
201 B8x8,
202 BSkip,
203
e6aaad5c 204 #[default]
696e4e20
KS
205 None,
206}
207
208impl CompactMBType {
209 pub fn is_intra(self) -> bool {
42005e25 210 matches!(self, CompactMBType::Intra4x4 | CompactMBType::Intra8x8 | CompactMBType::Intra16x16)
696e4e20
KS
211 }
212 pub fn is_intra16orpcm(self) -> bool {
42005e25 213 matches!(self, CompactMBType::Intra16x16 | CompactMBType::PCM)
696e4e20
KS
214 }
215 pub fn is_skip(self) -> bool {
42005e25 216 matches!(self, CompactMBType::PSkip | CompactMBType::BSkip)
696e4e20
KS
217 }
218 pub fn is_direct(self) -> bool {
42005e25 219 matches!(self, CompactMBType::BSkip | CompactMBType::Direct | CompactMBType::None)
696e4e20
KS
220 }
221 pub fn is_inter(self) -> bool {
222 !self.is_intra() && !self.is_skip() && self != CompactMBType::PCM
223 }
495b7ec0 224 pub fn is_16x16_ref(self) -> bool {
42005e25 225 matches!(self,
495b7ec0
KS
226 CompactMBType::Intra4x4 |
227 CompactMBType::Intra8x8 |
228 CompactMBType::Intra16x16 |
229 CompactMBType::PCM |
230 CompactMBType::P16x16 |
42005e25 231 CompactMBType::B16x16)
696e4e20
KS
232 }
233}
234
696e4e20
KS
235impl From<MBType> for CompactMBType {
236 fn from(mbtype: MBType) -> Self {
237 match mbtype {
238 MBType::Intra4x4 => CompactMBType::Intra4x4,
239 MBType::Intra8x8 => CompactMBType::Intra8x8,
240 MBType::Intra16x16(_, _, _) => CompactMBType::Intra16x16,
241 MBType::PCM => CompactMBType::PCM,
242 MBType::P16x16 => CompactMBType::P16x16,
243 MBType::P16x8 => CompactMBType::P16x8,
244 MBType::P8x16 => CompactMBType::P8x16,
245 MBType::P8x8 => CompactMBType::P8x8,
246 MBType::P8x8Ref0 => CompactMBType::P8x8Ref0,
247 MBType::PSkip => CompactMBType::PSkip,
248 MBType::Direct => CompactMBType::Direct,
249 MBType::B16x16(_) => CompactMBType::B16x16,
250 MBType::B16x8(_, _) => CompactMBType::B16x8,
251 MBType::B8x16(_, _) => CompactMBType::B8x16,
252 MBType::B8x8 => CompactMBType::B8x8,
253 MBType::BSkip => CompactMBType::BSkip,
254 }
255 }
256}
257
258#[repr(u8)]
e6aaad5c 259#[derive(Clone,Copy,Debug,PartialEq,Default)]
696e4e20
KS
260pub enum IntraPredMode {
261 Vertical,
262 Horizontal,
263 DC,
264 DiagDownLeft,
265 DiagDownRight,
266 VerRight,
267 HorDown,
268 VerLeft,
269 HorUp,
e6aaad5c 270 #[default]
696e4e20
KS
271 None,
272}
273
274impl IntraPredMode {
275 pub fn is_none(self) -> bool { self == IntraPredMode::None }
276 pub fn into_pred_idx(self) -> i8 {
277 if !self.is_none() {
278 self as u8 as i8
279 } else {
280 -1
281 }
282 }
283}
284
696e4e20
KS
285impl From<u8> for IntraPredMode {
286 fn from(val: u8) -> Self {
287 match val {
288 0 => IntraPredMode::Vertical,
289 1 => IntraPredMode::Horizontal,
290 2 => IntraPredMode::DC,
291 3 => IntraPredMode::DiagDownLeft,
292 4 => IntraPredMode::DiagDownRight,
293 5 => IntraPredMode::VerRight,
294 6 => IntraPredMode::HorDown,
295 7 => IntraPredMode::VerLeft,
296 8 => IntraPredMode::HorUp,
297 _ => IntraPredMode::None,
298 }
299 }
300}
301
42005e25
KS
302impl From<IntraPredMode> for u8 {
303 fn from(val: IntraPredMode) -> Self {
304 match val {
696e4e20
KS
305 IntraPredMode::Vertical => 0,
306 IntraPredMode::Horizontal => 1,
307 IntraPredMode::DC => 2,
308 IntraPredMode::DiagDownLeft => 3,
309 IntraPredMode::DiagDownRight => 4,
310 IntraPredMode::VerRight => 5,
311 IntraPredMode::HorDown => 6,
312 IntraPredMode::VerLeft => 7,
313 IntraPredMode::HorUp => 8,
314 _ => 9,
315 }
316 }
317}
318
319pub const MISSING_POC: u16 = 0xFFFF;
320
321#[derive(Clone,Copy,Debug)]
322pub struct PicRef {
323 ref_idx: u8
324}
325
326pub const MISSING_REF: PicRef = PicRef { ref_idx: 0xFF };
327pub const INVALID_REF: PicRef = PicRef { ref_idx: 0xFE };
328pub const ZERO_REF: PicRef = PicRef { ref_idx: 0 };
329const DIRECT_FLAG: u8 = 0x40;
330
331impl PicRef {
332 pub fn new(ref_idx: u8) -> Self {
333 Self { ref_idx }
334 }
335 pub fn not_avail(self) -> bool {
336 self == MISSING_REF || self == INVALID_REF
337 }
338 pub fn index(self) -> usize { (self.ref_idx & !DIRECT_FLAG) as usize }
339 pub fn is_direct(self) -> bool { (self.ref_idx & DIRECT_FLAG) != 0 }
340 pub fn set_direct(&mut self) { self.ref_idx |= DIRECT_FLAG; }
341 fn min_pos(self, other: Self) -> Self {
342 match (self.not_avail(), other.not_avail()) {
343 (true, true) => self,
344 (false, true) => self,
345 (true, false) => other,
346 (false, false) => PicRef::new((self.ref_idx & !DIRECT_FLAG).min(other.ref_idx & !DIRECT_FLAG)),
347 }
348 }
349}
350
351impl Default for PicRef {
352 fn default() -> Self { MISSING_REF }
353}
354
355impl PartialEq for PicRef {
356 fn eq(&self, other: &Self) -> bool {
357 (self.ref_idx | DIRECT_FLAG) == (other.ref_idx | DIRECT_FLAG)
358 }
359}
360
361impl std::fmt::Display for PicRef {
362 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
363 if *self == MISSING_REF {
364 write!(f, "-1")
365 } else if *self == INVALID_REF {
366 write!(f, "-2")
367 } else {
368 write!(f, "{}", self.ref_idx & !DIRECT_FLAG)
369 }
370 }
371}
372
373#[derive(Clone,Copy,Default)]
374pub struct MBData {
375 pub mb_type: CompactMBType,
376 pub cbp: u8,
377 pub coded_flags: u32,
378 pub cmode: u8,
379 pub qp_y: u8,
380 pub qp_u: u8,
381 pub qp_v: u8,
382 pub transform_8x8: bool,
383}
384
385pub fn blk4_to_blk8(blk4: usize) -> usize {
fe64781d
KS
386 /*const MAP: [usize; 16] = [ 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 ];
387 MAP[blk4 & 0xF]*/
388 ((blk4 & 2) >> 1) | ((blk4 & 8) >> 2)
696e4e20
KS
389}
390
391#[derive(Clone,Copy)]
392pub struct Blk8Data {
393 pub ref_idx: [PicRef; 2],
394 pub ncoded_c: [u8; 2],
395}
396
397impl Default for Blk8Data {
398 fn default() -> Self {
399 Self {
400 ref_idx: [MISSING_REF; 2],
401 ncoded_c: [0; 2],
402 }
403 }
404}
405
406#[derive(Clone,Copy,Default)]
407pub struct Blk4Data {
408 pub ncoded: u8,
409 pub ipred: IntraPredMode,
410 pub mv: [MV; 2],
411 pub mvd: [MV; 2],
412}
413
414pub struct SliceState {
415 pub mb_x: usize,
416 pub mb_y: usize,
417 pub mb_w: usize,
418 pub mb_h: usize,
419 pub mb_start: usize,
420
421 pub mb: GenericCache<MBData>,
422 pub blk8: GenericCache<Blk8Data>,
423 pub blk4: GenericCache<Blk4Data>,
424
22de733b 425 pub deblock: [u8; 16],
696e4e20
KS
426
427 pub has_top: bool,
428 pub has_left: bool,
22de733b
KS
429
430 pub top_line_y: Vec<u8>,
431 pub left_y: [u8; 17], // first element is top-left
432 pub top_line_c: [Vec<u8>; 2],
433 pub left_c: [[u8; 9]; 2],
696e4e20
KS
434}
435
d85f94f7
KS
436const BLK4_TO_D8: [usize; 16] = [ 0, 0, 3, 3, 0, 0, 3, 3, 12, 12, 15, 15, 12, 12, 15, 15 ];
437
696e4e20
KS
438impl SliceState {
439 pub fn new() -> Self {
440 Self {
441 mb_x: 0,
442 mb_y: 0,
443 mb_w: 0,
444 mb_h: 0,
445 mb_start: 0,
446 mb: GenericCache::new(0, 0, MBData::default()),
447 blk8: GenericCache::new(0, 0, Blk8Data::default()),
448 blk4: GenericCache::new(0, 0, Blk4Data::default()),
449
22de733b 450 deblock: [0; 16],
696e4e20
KS
451
452 has_top: false,
453 has_left: false,
22de733b
KS
454
455 top_line_y: Vec::new(),
456 left_y: [0; 17],
457 top_line_c: [Vec::new(), Vec::new()],
458 left_c: [[0; 9]; 2],
696e4e20
KS
459 }
460 }
461 pub fn reset(&mut self, mb_w: usize, mb_h: usize, mb_pos: usize) {
462 self.mb_w = mb_w;
463 self.mb_h = mb_h;
464 self.mb_start = mb_pos;
465 if mb_w > 0 {
466 self.mb_x = mb_pos % mb_w;
467 self.mb_y = mb_pos / mb_w;
468 } else {
469 self.mb_x = 0;
470 self.mb_y = 0;
471 }
472 self.mb = GenericCache::new(1, mb_w + 2, MBData::default());
473 self.blk8 = GenericCache::new(2, mb_w * 2 + 2, Blk8Data::default());
474 self.blk4 = GenericCache::new(4, mb_w * 4 + 2, Blk4Data::default());
475
696e4e20
KS
476 self.has_top = false;
477 self.has_left = false;
22de733b
KS
478
479 self.top_line_y.resize(mb_w * 16 + 1, 0x80);
480 self.top_line_c[0].resize(mb_w * 8 + 1, 0x80);
481 self.top_line_c[1].resize(mb_w * 8 + 1, 0x80);
482 self.left_y = [0x80; 17];
483 self.left_c = [[0x80; 9]; 2];
696e4e20 484 }
22de733b
KS
485 pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame<u8>) {
486 let dstoff = self.mb_x * 16;
487 let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0];
488 self.left_y[0] = self.top_line_y[dstoff + 15];
489 self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]);
490 for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) {
491 *dst = src[15];
492 }
493 for chroma in 0..2 {
494 let cstride = frm.stride[chroma + 1];
495 let dstoff = self.mb_x * 8;
496 let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride;
497 self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7];
498 self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]);
499 for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) {
500 *dst = src[7];
501 }
502 }
503 }
5f223cdb 504 pub fn fill_deblock(&mut self, frefs: &SimplifiedSliceRefs, deblock_mode: u8, is_s: bool) {
696e4e20
KS
505 if deblock_mode == 1 {
506 return;
507 }
508
22de733b
KS
509 self.deblock = [0; 16];
510
696e4e20
KS
511 let tx8x8 = self.get_cur_mb().transform_8x8;
512
15845d1a
KS
513 let cur_intra = self.get_cur_mb().mb_type.is_intra();
514 let left_intra = self.get_left_mb().mb_type.is_intra();
515 let mut top_intra = self.get_top_mb().mb_type.is_intra();
ef19a935
KS
516
517 let mut coded_cache = [false; 25];
518 let mut mv_cache = [[ZERO_MV; 2]; 25];
519 let mut ref_cache = [[INVALID_REF; 2]; 25];
520
521 if self.mb_y != 0 || self.has_top {
522 for (x, (cc, mv)) in coded_cache[1..5].iter_mut().zip(mv_cache[1..5].iter_mut()).enumerate() {
523 let blk4 = self.get_top_blk4(x);
524 *cc = blk4.ncoded != 0;
525 *mv = blk4.mv;
526 if (x & 1) == 0 {
527 let blk8 = self.get_top_blk8(x / 2);
528 ref_cache[x + 1] = blk8.ref_idx;
529 } else {
530 ref_cache[x + 1] = ref_cache[x];
531 }
532 }
533 }
534 for (y, (ccs, mvs)) in coded_cache[5..].chunks_exact_mut(5).zip(
535 mv_cache[5..].chunks_exact_mut(5)).enumerate() {
536 if self.has_left || self.mb_x != 0 {
537 let blk4 = self.get_left_blk4(y * 4);
538 ccs[0] = blk4.ncoded != 0;
539 mvs[0] = blk4.mv;
540 if (y & 1) == 0 {
541 let blk8 = self.get_left_blk8(y);
542 ref_cache[y * 5 + 5] = blk8.ref_idx;
543 } else {
544 ref_cache[y * 5 + 5] = ref_cache[y * 5];
545 }
546 }
547 for (x, (cc, mv)) in ccs[1..].iter_mut().zip(mvs[1..].iter_mut()).enumerate() {
548 let blk4 = self.get_cur_blk4(x + y * 4);
549 *cc = blk4.ncoded != 0;
550 *mv = blk4.mv;
551 ref_cache[x + 1 + (y + 1) * 5] = if ((x & 1) == 0) && ((y & 1) == 0) {
552 self.get_cur_blk8(x / 2 + y).ref_idx
553 } else {
554 ref_cache[(x & !1) + 1 + ((y & !1) + 1) * 5]
555 };
556 }
557 }
558
559 for (y, (((top_ccs, cur_ccs), (top_mvs, cur_mvs)), (cur_refs, top_refs))) in
560 coded_cache.chunks_exact(5).take(4).zip(coded_cache[5..].chunks_exact(5)).zip(
561 mv_cache.chunks_exact(5).zip(mv_cache[5..].chunks_exact(5))).zip(
562 ref_cache[5..].chunks_exact(5).zip(ref_cache.chunks_exact(5))).enumerate() {
696e4e20 563 let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2));
22de733b 564 if can_do_top && (!tx8x8 || (y & 1) == 0) {
15845d1a 565 if is_s || cur_intra || top_intra {
696e4e20 566 let val = if y == 0 { 0x40 } else { 0x30 };
22de733b 567 for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; }
696e4e20 568 } else {
ef19a935
KS
569 for (x, (((&cur_cc, &top_cc), (cur_mv, top_mv)), (&cur_ref, &top_ref))) in
570 cur_ccs[1..].iter().zip(top_ccs[1..].iter()).zip(
571 cur_mvs[1..].iter().zip(top_mvs[1..].iter())).zip(
572 cur_refs[1..].iter().zip(
573 top_refs[1..].iter())).take(4).enumerate() {
574 if cur_cc || top_cc {
22de733b 575 self.deblock[y * 4 + x] |= 0x20;
696e4e20 576 } else {
754ab49a 577 if mvdiff4(cur_mv, top_mv) || !frefs.cmp_refs(cur_ref, top_ref) {
22de733b 578 self.deblock[y * 4 + x] |= 0x10;
696e4e20
KS
579 }
580 }
581 }
582 }
583 }
15845d1a 584 let mut lleft_intra = left_intra;
ef19a935
KS
585 for (x, (((&cur_cc, &left_cc), (cur_mv, left_mv)), (&cur_ref, &left_ref))) in
586 cur_ccs[1..].iter().zip(cur_ccs.iter()).zip(
587 cur_mvs[1..].iter().zip(cur_mvs.iter())).zip(
588 cur_refs[1..].iter().zip(cur_refs.iter())).enumerate() {
22de733b 589 let skip_8 = tx8x8 && (x & 1) != 0;
696e4e20
KS
590 let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2);
591 if !can_do_left {
592 continue;
593 }
22de733b 594 if skip_8 {
15845d1a 595 } else if is_s || cur_intra || lleft_intra {
22de733b 596 self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 };
ef19a935 597 } else if cur_cc || left_cc {
22de733b 598 self.deblock[y * 4 + x] |= 2;
696e4e20 599 } else {
754ab49a 600 if mvdiff4(cur_mv, left_mv) || !frefs.cmp_refs(cur_ref, left_ref) {
22de733b 601 self.deblock[y * 4 + x] |= 1;
696e4e20
KS
602 }
603 }
15845d1a 604 lleft_intra = cur_intra;
696e4e20 605 }
15845d1a 606 top_intra = cur_intra;
696e4e20
KS
607 }
608 }
609 pub fn next_mb(&mut self) {
610 self.mb_x += 1;
611 self.has_left = true;
612 if self.mb_x == self.mb_w {
613 self.mb_x = 0;
614 self.mb_y += 1;
615 self.mb.update_row();
616 self.blk8.update_row();
617 self.blk4.update_row();
618
696e4e20
KS
619 self.has_left = false;
620 }
621 self.has_top = self.mb_x + self.mb_y * self.mb_w >= self.mb_start + self.mb_w;
622 }
623 pub fn get_cur_mb_idx(&self) -> usize { self.mb.xpos + self.mb_x }
624 pub fn get_cur_blk8_idx(&self, blk_no: usize) -> usize {
625 self.blk8.xpos + self.mb_x * 2 + (blk_no & 1) + (blk_no >> 1) * self.blk8.stride
626 }
627 pub fn get_cur_blk4_idx(&self, blk_no: usize) -> usize {
628 self.blk4.xpos + self.mb_x * 4 + (blk_no & 3) + (blk_no >> 2) * self.blk4.stride
629 }
630 pub fn get_cur_mb(&mut self) -> &mut MBData {
631 let idx = self.get_cur_mb_idx();
632 &mut self.mb.data[idx]
633 }
634 pub fn get_left_mb(&self) -> &MBData {
635 &self.mb.data[self.get_cur_mb_idx() - 1]
636 }
637 pub fn get_top_mb(&self) -> &MBData {
638 &self.mb.data[self.get_cur_mb_idx() - self.mb.stride]
639 }
640 pub fn get_cur_blk8(&mut self, blk_no: usize) -> &mut Blk8Data {
641 let idx = self.get_cur_blk8_idx(blk_no);
642 &mut self.blk8.data[idx]
643 }
644 pub fn get_left_blk8(&self, blk_no: usize) -> &Blk8Data {
645 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - 1]
646 }
647 pub fn get_top_blk8(&self, blk_no: usize) -> &Blk8Data {
648 &self.blk8.data[self.get_cur_blk8_idx(blk_no) - self.blk8.stride]
649 }
650 pub fn get_cur_blk4(&mut self, blk_no: usize) -> &mut Blk4Data {
651 let idx = self.get_cur_blk4_idx(blk_no);
652 &mut self.blk4.data[idx]
653 }
654 pub fn get_left_blk4(&self, blk_no: usize) -> &Blk4Data {
655 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - 1]
656 }
657 pub fn get_top_blk4(&self, blk_no: usize) -> &Blk4Data {
658 &self.blk4.data[self.get_cur_blk4_idx(blk_no) - self.blk4.stride]
659 }
660
661 pub fn apply_to_blk8<F: (Fn(&mut Blk8Data))>(&mut self, f: F) {
662 let start = self.get_cur_blk8_idx(0);
663 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(2) {
664 for el in row[..2].iter_mut() {
665 f(el);
666 }
667 }
668 }
669 pub fn apply_to_blk4<F: (Fn(&mut Blk4Data))>(&mut self, f: F) {
670 let start = self.get_cur_blk4_idx(0);
671 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(4) {
672 for el in row[..4].iter_mut() {
673 f(el);
674 }
675 }
676 }
677
678 pub fn fill_ipred(&mut self, imode: IntraPredMode) {
679 self.apply_to_blk4(|blk| blk.ipred = imode);
680 }
681 pub fn fill_ncoded(&mut self, nc: u8) {
682 self.apply_to_blk4(|blk| blk.ncoded = nc);
683 self.apply_to_blk8(|blk| blk.ncoded_c = [nc; 2]);
684 }
685 pub fn reset_mb_mv(&mut self) {
686 self.apply_to_blk8(|blk| blk.ref_idx = [INVALID_REF; 2]);
687 }
688
689 pub fn get_mv_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> (usize, usize) {
690 let blk_no = xoff / 4 + yoff;
691 let mv_a = self.get_left_blk4(blk_no).mvd[ref_l];
692 let mv_b = self.get_top_blk4(blk_no).mvd[ref_l];
693 let mv = mv_a + mv_b;
694 let ctx0 = if mv.x < 3 { 0 } else if mv.x <= 32 { 1 } else { 2 };
695 let ctx1 = if mv.y < 3 { 0 } else if mv.y <= 32 { 1 } else { 2 };
696 (ctx0, ctx1)
697 }
698 pub fn get_mv_ref_ctx(&self, xoff: usize, yoff: usize, ref_l: usize) -> usize {
699 let blk_no = xoff / 8 + (yoff / 8) * 2;
700 let mut ctx = 0;
701 let left_ref = self.get_left_blk8(blk_no).ref_idx[ref_l];
702 let top_ref = self.get_top_blk8(blk_no).ref_idx[ref_l];
703 if !left_ref.not_avail() && !left_ref.is_direct() && left_ref.index() > 0 {
704 ctx += 1;
705 }
706 if !top_ref.not_avail() && !top_ref.is_direct() && top_ref.index() > 0 {
707 ctx += 2;
708 }
709 ctx
710 }
4a1ca15c 711 #[allow(clippy::if_same_then_else)]
696e4e20
KS
712 pub fn predict(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, diff_mv: MV, ref_idx: PicRef) {
713 let midx = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
714 let ridx = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
715 let ridx_c = self.get_cur_blk8_idx(0) + (xpos + bw) / 8 + ypos / 8 * self.blk8.stride - if (ypos & 4) == 0 { self.blk8.stride } else { 0 };
716
717 let mv_a = self.blk4.data[midx - 1].mv[ref_l];
718 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[ref_l];
719 let mut mv_c = self.blk4.data[midx - self.blk4.stride + bw / 4].mv[ref_l];
720
721 let rx = if (xpos & 4) != 0 { 0 } else { 1 };
722 let ry = if (ypos & 4) != 0 { 0 } else { self.blk8.stride };
723 let ref_a = self.blk8.data[ridx - rx].ref_idx[ref_l];
724 let ref_b = self.blk8.data[ridx - ry].ref_idx[ref_l];
725 let mut ref_c = self.blk8.data[ridx_c].ref_idx[ref_l];
726
727 if ref_c == MISSING_REF || (((xpos + bw) & 4) == 0 && (ypos & 4) != 0) {
728 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[ref_l];
729 ref_c = self.blk8.data[ridx - rx - ry].ref_idx[ref_l];
730 }
731
732 let pred_mv = if bw == 16 && bh == 8 && ypos == 0 && ref_b == ref_idx {
733 mv_b
734 } else if bw == 16 && bh == 8 && ypos != 0 && ref_a == ref_idx {
735 mv_a
736 } else if bw == 8 && bh == 16 && xpos == 0 && ref_a == ref_idx {
737 mv_a
738 } else if bw == 8 && bh == 16 && xpos != 0 && ref_c == ref_idx {
739 mv_c
740 } else if ref_b == MISSING_REF && ref_c == MISSING_REF {
741 mv_a
742 } else {
743 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
744 if count == 1 {
745 if ref_a == ref_idx {
746 mv_a
747 } else if ref_b == ref_idx {
748 mv_b
749 } else {
750 mv_c
751 }
752 } else {
753 MV::pred(mv_a, mv_b, mv_c)
754 }
755 };
756
757 let mv = pred_mv + diff_mv;
758 self.fill_mv (xpos, ypos, bw, bh, ref_l, mv);
759 self.fill_ref(xpos, ypos, bw, bh, ref_l, ref_idx);
760 }
761 pub fn predict_pskip(&mut self) {
762 let midx = self.get_cur_blk4_idx(0);
763 let ridx = self.get_cur_blk8_idx(0);
764
765 let mv_a = self.blk4.data[midx - 1].mv[0];
766 let mv_b = self.blk4.data[midx - self.blk4.stride].mv[0];
767 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 4].mv[0];
768
769 let ref_a = self.blk8.data[ridx - 1].ref_idx[0];
770 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx[0];
771 let mut ref_c = self.blk8.data[ridx - self.blk8.stride + 2].ref_idx[0];
772
773 if ref_c == MISSING_REF {
774 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv[0];
775 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx[0];
776 }
777
778 let ref_idx = ZERO_REF;
779 let mv = if ref_a == MISSING_REF || ref_b == MISSING_REF || (ref_a == ZERO_REF && mv_a == ZERO_MV) || (ref_b == ZERO_REF && mv_b == ZERO_MV) {
780 ZERO_MV
781 } else {
782 let count = ((ref_a == ref_idx) as u8) + ((ref_b == ref_idx) as u8) + ((ref_c == ref_idx) as u8);
783 if count == 1 {
784 if ref_a == ref_idx {
785 mv_a
786 } else if ref_b == ref_idx {
787 mv_b
788 } else {
789 mv_c
790 }
791 } else {
792 MV::pred(mv_a, mv_b, mv_c)
793 }
794 };
795
796 self.fill_mv (0, 0, 16, 16, 0, mv);
797 self.fill_ref(0, 0, 16, 16, 0, ref_idx);
798 }
5f223cdb 799 pub fn predict_direct_mb(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct_8x8: bool, cur_id: u16) {
15845d1a 800 let (col_mb, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
d85f94f7
KS
801 if direct_8x8 {
802 for blk4 in 0..16 {
15845d1a 803 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, BLK4_TO_D8[blk4]);
d85f94f7
KS
804 self.get_cur_blk4(blk4).mv = [mv0, mv1];
805 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
806 }
807 } else if col_mb.mb_type.is_16x16_ref() || !temporal_mv {
15845d1a 808 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, 0);
696e4e20
KS
809 self.apply_to_blk4(|blk4| blk4.mv = [mv0, mv1]);
810 self.apply_to_blk8(|blk8| blk8.ref_idx = [ref0, ref1]);
811 } else {
812 for blk4 in 0..16 {
15845d1a 813 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &col_mb, r1_poc, r1_long, temporal_mv, cur_id, blk4);
696e4e20
KS
814 self.get_cur_blk4(blk4).mv = [mv0, mv1];
815 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
816 }
817 }
818 }
5f223cdb 819 pub fn predict_direct_sub(&mut self, frame_refs: &SimplifiedSliceRefs, temporal_mv: bool, direct8x8: bool, cur_id: u16, blk4: usize) {
d85f94f7 820 let src_blk = if !direct8x8 { blk4 } else { BLK4_TO_D8[blk4] };
15845d1a
KS
821 let (mbi, r1_poc, r1_long) = frame_refs.get_colocated_info(self.mb_x, self.mb_y);
822 let (mv0, ref0, mv1, ref1) = self.get_direct_mv(frame_refs, &mbi, r1_poc, r1_long, temporal_mv, cur_id, src_blk);
4a1ca15c
KS
823 self.get_cur_blk4(blk4).mv = [mv0, mv1];
824 self.get_cur_blk8(blk4_to_blk8(blk4)).ref_idx = [ref0, ref1];
696e4e20 825 }
42005e25 826 #[allow(clippy::nonminimal_bool)]
5f223cdb 827 pub fn get_direct_mv(&self, frame_refs: &SimplifiedSliceRefs, mbi: &FrameMBInfo, r1_poc: u16, r1_long: bool, temporal_mv: bool, cur_id: u16, blk4: usize) -> (MV, PicRef, MV, PicRef) {
696e4e20
KS
828 let blk8 = blk4_to_blk8(blk4);
829 let (col_mv, r0_poc, col_idx) = if mbi.ref_poc[blk8] == [MISSING_POC; 2] {
830 (ZERO_MV, MISSING_POC, MISSING_REF)
831 } else if mbi.ref_poc[blk8][0] != MISSING_POC {
832 (mbi.mv[blk4][0], mbi.ref_poc[blk8][0], mbi.ref_idx[blk8][0])
833 } else {
834 (mbi.mv[blk4][1], mbi.ref_poc[blk8][1], mbi.ref_idx[blk8][1])
835 };
836 let (col_ref, r0_long) = frame_refs.map_ref0(r0_poc);
837 if temporal_mv {
838 let td = (i32::from(r1_poc) - i32::from(r0_poc)).max(-128).min(127);
839 if r0_long || td == 0 {
840 (col_mv, col_ref, ZERO_MV, ZERO_REF)
841 } else {
842 let tx = (16384 + (td / 2).abs()) / td;
843 let tb = (i32::from(cur_id) - i32::from(r0_poc)).max(-128).min(127);
844 let scale = ((tb * tx + 32) >> 6).max(-1024).min(1023);
845 let mv0 = MV {
846 x: ((i32::from(col_mv.x) * scale + 128) >> 8) as i16,
847 y: ((i32::from(col_mv.y) * scale + 128) >> 8) as i16,
848 };
849 let mv1 = mv0 - col_mv;
850 (mv0, col_ref, mv1, ZERO_REF)
851 }
852 } else {
853 let blk4 = 0; // we generate the same MV prediction for the whole MB
854 let blk8 = blk4_to_blk8(blk4);
855 let midx = self.get_cur_blk4_idx(blk4);
856 let ridx = self.get_cur_blk8_idx(blk8);
857 let ridx_c = self.get_cur_blk8_idx(blk8) + 16 / 8 - self.blk8.stride;
858
859 let mv_a = self.blk4.data[midx - 1].mv;
860 let mv_b = self.blk4.data[midx - self.blk4.stride].mv;
861 let mut mv_c = self.blk4.data[midx - self.blk4.stride + 16 / 4].mv;
862
863 let ref_a = self.blk8.data[ridx - 1].ref_idx;
864 let ref_b = self.blk8.data[ridx - self.blk8.stride].ref_idx;
865 let mut ref_c = self.blk8.data[ridx_c].ref_idx;
866
867 if ref_c == [MISSING_REF; 2] {
868 mv_c = self.blk4.data[midx - self.blk4.stride - 1].mv;
869 ref_c = self.blk8.data[ridx - self.blk8.stride - 1].ref_idx;
870 }
871 let mut refs = [INVALID_REF; 2];
872 for cur_ref in [ref_a, ref_b, ref_c].iter() {
873 refs[0] = refs[0].min_pos(cur_ref[0]);
874 refs[1] = refs[1].min_pos(cur_ref[1]);
875 }
876 if refs == [INVALID_REF; 2] {
877 return (ZERO_MV, ZERO_REF, ZERO_MV, ZERO_REF);
878 }
879
880 let mut col_zero = true;
881 if r1_long || col_idx != ZERO_REF {
882 col_zero = false;
883 }
884 if col_mv.x.abs() > 1 || col_mv.y.abs() > 1 {
885 col_zero = false;
886 }
887 let mut mvs = [ZERO_MV; 2];
888 for ref_l in 0..2 {
889 if mbi.mb_type.is_intra() || (!refs[ref_l].not_avail() && !(refs[ref_l] == ZERO_REF && col_zero)) {
890 let ref_idx = refs[ref_l];
891 mvs[ref_l] = if ref_b[ref_l] == MISSING_REF && ref_c[ref_l] == MISSING_REF {
892 mv_a[ref_l]
893 } else {
894 let count = ((ref_a[ref_l] == ref_idx) as u8) + ((ref_b[ref_l] == ref_idx) as u8) + ((ref_c[ref_l] == ref_idx) as u8);
895 if count == 1 {
896 if ref_a[ref_l] == ref_idx {
897 mv_a[ref_l]
898 } else if ref_b[ref_l] == ref_idx {
899 mv_b[ref_l]
900 } else {
901 mv_c[ref_l]
902 }
903 } else {
904 MV::pred(mv_a[ref_l], mv_b[ref_l], mv_c[ref_l])
905 }
906 };
907 }
908 }
909 (mvs[0], refs[0], mvs[1], refs[1])
910 }
911 }
912 pub fn fill_mv(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
913 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
914 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
915 for blk in row[..bw / 4].iter_mut() {
916 blk.mv[ref_l] = mv;
917 }
918 }
919 }
920 pub fn fill_mvd(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, mv: MV) {
921 let mvd = MV{ x: mv.x.abs().min(128), y: mv.y.abs().min(128) };
922 let start = self.get_cur_blk4_idx(0) + xpos / 4 + ypos / 4 * self.blk4.stride;
923 for row in self.blk4.data[start..].chunks_mut(self.blk4.stride).take(bh / 4) {
924 for blk in row[..bw / 4].iter_mut() {
925 blk.mvd[ref_l] = mvd;
926 }
927 }
928 }
929 pub fn fill_ref(&mut self, xpos: usize, ypos: usize, bw: usize, bh: usize, ref_l: usize, ref_idx: PicRef) {
930 let start = self.get_cur_blk8_idx(0) + xpos / 8 + ypos / 8 * self.blk8.stride;
931 if bw < 8 || bh < 8 {
932 self.blk8.data[start].ref_idx[ref_l] = ref_idx;
933 } else {
934 for row in self.blk8.data[start..].chunks_mut(self.blk8.stride).take(bh / 8) {
935 for blk in row[..bw / 8].iter_mut() {
936 blk.ref_idx[ref_l] = ref_idx;
937 }
938 }
939 }
940 }
941}
942
754ab49a
KS
943#[cfg(not(target_arch="x86_64"))]
944fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
945 let mvd0 = mv1[0] - mv2[0];
946 let mvd1 = mv1[1] - mv2[1];
947 (mvd0.x.abs() >= 4) || (mvd0.y.abs() >= 4) || (mvd1.x.abs() >= 4) || (mvd1.y.abs() >= 4)
948}
949
950#[cfg(target_arch="x86_64")]
951fn mvdiff4(mv1: &[MV; 2], mv2: &[MV; 2]) -> bool {
952 unsafe {
953 let mut flag = false;
954 let ptr = std::mem::transmute::<*const MV, *const u64>(mv1.as_ptr());
955 let mut m0 = *ptr;
956 let ptr = std::mem::transmute::<*const MV, *const u64>(mv2.as_ptr());
957 let mut m1 = *ptr;
958 for _ in 0..4 {
959 let tmp = m0.wrapping_sub(m1) as u16;
960 flag |= tmp.wrapping_add(3) > 6;
961 m0 >>= 16;
962 m1 >>= 16;
963 }
964 flag
965 }
696e4e20 966}