1 use nihav_core::frame::NASimpleVideoFrame;
2 use super::super::types::DeblockInfo;
5 const Y_TOP_ROW_MASK: u32 = 0x000F;
6 const Y_BOT_ROW_MASK: u32 = 0xF000;
7 const Y_LEFT_COL_MASK: u32 = 0x1111;
8 const Y_RIGHT_COL_MASK: u32 = 0x8888;
9 const C_TOP_ROW_MASK: u8 = 0x3;
10 const C_BOT_ROW_MASK: u8 = 0xC;
11 const C_LEFT_COL_MASK: u8 = 0x5;
12 const C_RIGHT_COL_MASK: u8 = 0xA;
14 macro_rules! test_bit {
15 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
18 pub fn loop_filter_frame(dst: &mut NASimpleVideoFrame<u8>, dblk: &[DeblockInfo], mb_w: usize, mb_h: usize) {
19 let small_frame = dst.width[0] * dst.height[0] <= 176 * 144;
23 let is_last_row = mb_y == mb_h - 1;
24 let mut left_q: usize = 0;
25 let mut left_cbp_y = 0;
26 let mut left_cbp_c = 0;
27 let mut left_dbk_y = 0;
30 let q = usize::from(dblk[mb_pos].q);
31 let alpha = RV40_ALPHA_TAB[q];
32 let beta = RV40_BETA_TAB[q];
33 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
34 let beta_c = beta * 3;
36 let is_strong = dblk[mb_pos].is_strong;
37 let top_is_strong = mb_y > 0 && dblk[mb_pos - mb_w].is_strong;
38 let left_is_strong = mb_x > 0 && dblk[mb_pos - 1].is_strong;
39 let bot_is_strong = !is_last_row && dblk[mb_pos + mb_w].is_strong;
41 let cur_dbk_y = dblk[mb_pos].deblock_y;
42 let cur_cbp_y = if is_strong { 0xFFFF } else { u32::from(dblk[mb_pos].cbp_y) };
44 let (top_cbp_y, top_dbk_y) = if mb_y > 0 {
45 (if top_is_strong { 0xFFFF } else { u32::from(dblk[mb_pos - mb_w].cbp_y) }, dblk[mb_pos - mb_w].deblock_y)
49 let bot_dbk_y = if !is_last_row {
50 dblk[mb_pos + mb_w].deblock_y
55 let y_to_deblock = (cur_dbk_y as u32) | ((bot_dbk_y as u32) << 16);
56 let mut y_h_deblock = y_to_deblock | ((cur_cbp_y << 4) & !Y_TOP_ROW_MASK) | ((top_cbp_y & Y_BOT_ROW_MASK) >> 12);
57 let mut y_v_deblock = y_to_deblock | ((cur_cbp_y << 1) & !Y_LEFT_COL_MASK) | ((left_cbp_y & Y_RIGHT_COL_MASK) >> 3);
60 y_v_deblock &= !Y_LEFT_COL_MASK;
63 y_h_deblock &= !Y_TOP_ROW_MASK;
65 if is_last_row || is_strong || bot_is_strong {
66 y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
70 let yoff = dst.offset[0] + mb_x * 16 + (mb_y * 16 + y * 4) * dst.stride[0];
73 let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
75 let cur_strength: usize;
78 } else if test_bit!(cur_dbk_y, bpos) {
84 let left_strength: usize;
88 } else if test_bit!(cur_dbk_y, bpos - 1) {
96 } else if test_bit!(left_dbk_y, bpos + 3) {
105 let bot_strength: usize;
109 } else if test_bit!(cur_dbk_y, bpos + 4) {
114 } else if !is_last_row {
115 if dblk[mb_pos + mb_w].is_strong {
117 } else if test_bit!(bot_dbk_y, x) {
126 let top_strength: usize;
130 } else if test_bit!(cur_dbk_y, bpos - 4) {
138 } else if test_bit!(top_dbk_y, bpos + 12) {
147 let l_q = if x > 0 { q } else { left_q };
148 let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 };
150 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
151 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
152 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
153 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
155 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
157 if test_bit!(y_h_deblock, bpos + 4) {
158 rv40_loop_filter4_h(dst.data, yoff + 4 * dst.stride[0] + x * 4, dst.stride[0],
159 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
161 if test_bit!(y_v_deblock, bpos) && !ver_strong {
162 rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0],
163 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
165 if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
166 rv40_loop_filter4_h(dst.data, yoff + x * 4, dst.stride[0],
167 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
169 if test_bit!(y_v_deblock, bpos) && ver_strong {
170 rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0],
171 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
176 let cur_cbp_c = dblk[mb_pos].cbp_c;
177 let top_cbp_c = if mb_y > 0 {
178 if top_is_strong { 0xFF } else { dblk[mb_pos - mb_w].cbp_c }
182 let bot_cbp_c = if !is_last_row {
183 dblk[mb_pos + mb_w].cbp_c
188 let cshift = (comp - 1) * 4;
189 let c_cur_cbp = (cur_cbp_c >> cshift) & 0xF;
190 let c_top_cbp = (top_cbp_c >> cshift) & 0xF;
191 let c_left_cbp = (left_cbp_c >> cshift) & 0xF;
192 let c_bot_cbp = (bot_cbp_c >> cshift) & 0xF;
194 let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
195 let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
196 let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
198 c_v_deblock &= !C_LEFT_COL_MASK;
201 c_h_deblock &= !C_TOP_ROW_MASK;
203 if is_last_row || is_strong || bot_is_strong {
204 c_h_deblock &= !(C_TOP_ROW_MASK << 4);
208 let coff = dst.offset[comp] + mb_x * 8 + (mb_y * 8 + y * 4) * dst.stride[comp];
210 let bpos = x + y * 2;
212 let ver_strong = (x == 0) && (is_strong || left_is_strong);
214 let cur_strength: usize;
217 } else if test_bit!(c_cur_cbp, bpos) {
223 let left_strength: usize;
227 } else if test_bit!(c_cur_cbp, bpos - 1) {
235 } else if test_bit!(c_left_cbp, bpos + 1) {
244 let bot_strength: usize;
248 } else if test_bit!(c_cur_cbp, bpos + 2) {
253 } else if !is_last_row {
254 if dblk[mb_pos + mb_w].is_strong {
256 } else if test_bit!(c_bot_cbp, x) {
265 let top_strength: usize;
269 } else if test_bit!(c_cur_cbp, bpos - 2) {
277 } else if test_bit!(c_top_cbp, bpos + 2) {
286 let l_q = if x > 0 { q } else { left_q };
287 let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 };
289 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
290 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
291 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
292 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
294 if test_bit!(c_h_deblock, bpos + 2) {
295 rv40_loop_filter4_h(dst.data, coff + 4 * dst.stride[comp] + x * 4, dst.stride[comp],
296 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
298 if test_bit!(c_v_deblock, bpos) && !ver_strong {
299 rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp],
300 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
302 if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
303 rv40_loop_filter4_h(dst.data, coff + x * 4, dst.stride[comp],
304 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
306 if test_bit!(c_v_deblock, bpos) && ver_strong {
307 rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp],
308 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
315 left_dbk_y = cur_dbk_y;
316 left_cbp_y = cur_cbp_y;
317 left_cbp_c = cur_cbp_c;
325 ($src: ident, $o: expr) => ($src[$o] as i16);
328 fn clip_symm(a: i16, lim: i16) -> i16 {
338 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
339 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
340 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
342 let p0 = el!(pix, off - step);
343 let q0 = el!(pix, off);
351 let u = (alpha * t.wrapping_abs()) >> 7;
352 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
357 let p2 = el!(pix, off - 3*step);
358 let p1 = el!(pix, off - 2*step);
359 let q1 = el!(pix, off + step);
360 let q2 = el!(pix, off + 2*step);
363 if filter_p1 && filter_q1 {
364 strength = (t << 2) + (p1 - q1);
369 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
370 pix[off - step] = clip8(p0 + diff);
371 pix[off ] = clip8(q0 - diff);
373 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
374 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
375 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
378 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
379 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
380 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
387 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
388 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
389 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
390 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
392 #[allow(clippy::eq_op)]
393 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
394 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
395 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
396 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
397 for ch in src.chunks_mut(stride).take(4) {
398 assert!(ch.len() >= 3 + 3);
399 let p0 = el!(ch, 3 - 1);
407 let u = (alpha * t.wrapping_abs()) >> 7;
408 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
412 let p2 = el!(ch, 3 - 3);
413 let p1 = el!(ch, 3 - 2);
414 let q1 = el!(ch, 3 + 1);
415 let q2 = el!(ch, 3 + 2);
418 if filter_p1 && filter_q1 {
419 strength = (t << 2) + (p1 - q1);
424 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
425 ch[3 - 1] = clip8(p0 + diff);
426 ch[3 ] = clip8(q0 - diff);
428 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
429 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
430 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
433 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
434 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
435 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
440 #[allow(clippy::many_single_char_names)]
441 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
442 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
446 } else if val > c + lims {
456 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
457 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
459 let p0 = el!(pix, off - step);
460 let q0 = el!(pix, off);
468 let fmode = (alpha * t.wrapping_abs()) >> 7;
474 let p3 = el!(pix, off - 4*step);
475 let p2 = el!(pix, off - 3*step);
476 let p1 = el!(pix, off - 2*step);
477 let q1 = el!(pix, off + step);
478 let q2 = el!(pix, off + 2*step);
479 let q3 = el!(pix, off + 3*step);
481 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
482 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
484 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
485 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
487 pix[off - 2*step] = np1 as u8;
488 pix[off - step] = np0 as u8;
489 pix[off] = nq0 as u8;
490 pix[off + step] = nq1 as u8;
493 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
494 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
495 pix[off - 3*step] = np2 as u8;
496 pix[off + 2*step] = nq2 as u8;
503 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
504 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
505 let mut sum_p1p0 = 0;
506 let mut sum_q1q0 = 0;
510 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
511 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
515 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
516 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
518 if (!filter_p1 || !filter_q1) || !edge {
519 return (false, filter_p1, filter_q1);
522 let mut sum_p1p2 = 0;
523 let mut sum_q1q2 = 0;
527 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
528 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
532 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
534 (strong, filter_p1, filter_q1)
537 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
538 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
539 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
542 #[allow(clippy::eq_op)]
543 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
544 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
545 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
546 let mut sum_p1p0 = 0;
547 let mut sum_q1q0 = 0;
549 for ch in src.chunks(stride).take(4) {
550 assert!(ch.len() >= 3 + 3);
551 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
552 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
555 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
556 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
558 if (!filter_p1 || !filter_q1) || !edge {
559 return (false, filter_p1, filter_q1);
562 let mut sum_p1p2 = 0;
563 let mut sum_q1q2 = 0;
565 for ch in src.chunks(stride).take(4) {
566 assert!(ch.len() >= 3 + 3);
567 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
568 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
571 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
573 (strong, filter_p1, filter_q1)
576 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
577 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
578 chroma: bool, edge: bool) {
579 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
580 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
583 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
584 } else if filter_p1 && filter_q1 {
585 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
586 lims, lim_p1, lim_q1);
587 } else if filter_p1 || filter_q1 {
588 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
589 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
593 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
594 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
595 chroma: bool, edge: bool) {
596 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
597 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
600 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
601 } else if filter_p1 && filter_q1 {
602 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
603 lims, lim_p1, lim_q1);
604 } else if filter_p1 || filter_q1 {
605 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
606 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
610 const RV40_DITHER_L: [i16; 16] = [
611 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
612 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
614 const RV40_DITHER_R: [i16; 16] = [
615 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
616 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
619 const RV40_ALPHA_TAB: [i16; 32] = [
620 128, 128, 128, 128, 128, 128, 128, 128,
621 128, 128, 122, 96, 75, 59, 47, 37,
622 29, 23, 18, 15, 13, 11, 10, 9,
623 8, 7, 6, 5, 4, 3, 2, 1
626 const RV40_BETA_TAB: [i16; 32] = [
627 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
628 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
631 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
633 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
637 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
639 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
640 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9