1 use nihav_core::frame::NASimpleVideoFrame;
2 use super::super::types::DeblockInfo;
5 const Y_TOP_ROW_MASK: u32 = 0x000F;
6 const Y_BOT_ROW_MASK: u32 = 0xF000;
7 const Y_LEFT_COL_MASK: u32 = 0x1111;
8 const Y_RIGHT_COL_MASK: u32 = 0x8888;
9 const C_TOP_ROW_MASK: u8 = 0x3;
10 const C_BOT_ROW_MASK: u8 = 0xC;
11 const C_LEFT_COL_MASK: u8 = 0x5;
12 const C_RIGHT_COL_MASK: u8 = 0xA;
14 macro_rules! test_bit {
15 ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 )
18 pub fn loop_filter_frame(dst: &mut NASimpleVideoFrame<u8>, dblk: &[DeblockInfo], mb_w: usize, mb_h: usize) {
19 let small_frame = dst.width[0] * dst.height[0] <= 176 * 144;
23 let is_last_row = mb_y == mb_h - 1;
24 let mut left_q: usize = 0;
25 let mut left_cbp_y = 0;
26 let mut left_cbp_c = 0;
27 let mut left_dbk_y = 0;
30 let q = usize::from(dblk[mb_pos].q);
31 let alpha = RV40_ALPHA_TAB[q];
32 let beta = RV40_BETA_TAB[q];
33 let beta_y = if small_frame { beta * 4 } else { beta * 3 };
34 let beta_c = beta * 3;
36 let is_strong = dblk[mb_pos].is_strong;
37 let top_is_strong = mb_y > 0 && dblk[mb_pos - mb_w].is_strong;
38 let left_is_strong = mb_x > 0 && dblk[mb_pos - 1].is_strong;
39 let bot_is_strong = !is_last_row && dblk[mb_pos + mb_w].is_strong;
41 let cur_dbk_y = dblk[mb_pos].deblock_y;
42 let cur_cbp_y = if is_strong { 0xFFFF } else { u32::from(dblk[mb_pos].cbp_y) };
44 let (top_cbp_y, top_dbk_y) = if mb_y > 0 {
45 (if top_is_strong { 0xFFFF } else { u32::from(dblk[mb_pos - mb_w].cbp_y) }, dblk[mb_pos - mb_w].deblock_y)
49 let bot_dbk_y = if !is_last_row {
50 dblk[mb_pos + mb_w].deblock_y
55 let y_to_deblock = (cur_dbk_y as u32) | ((bot_dbk_y as u32) << 16);
56 let mut y_h_deblock = y_to_deblock | ((cur_cbp_y << 4) & !Y_TOP_ROW_MASK) | ((top_cbp_y & Y_BOT_ROW_MASK) >> 12);
57 let mut y_v_deblock = y_to_deblock | ((cur_cbp_y << 1) & !Y_LEFT_COL_MASK) | ((left_cbp_y & Y_RIGHT_COL_MASK) >> 3);
60 y_v_deblock &= !Y_LEFT_COL_MASK;
63 y_h_deblock &= !Y_TOP_ROW_MASK;
65 if is_last_row || is_strong || bot_is_strong {
66 y_h_deblock &= !(Y_TOP_ROW_MASK << 16);
70 let yoff = dst.offset[0] + mb_x * 16 + (mb_y * 16 + y * 4) * dst.stride[0];
73 let ver_strong = (x == 0) && (mb_x > 0) && (is_strong || left_is_strong);
75 let cur_strength: usize;
78 } else if test_bit!(cur_dbk_y, bpos) {
84 let left_strength: usize;
88 } else if test_bit!(cur_dbk_y, bpos - 1) {
96 } else if test_bit!(left_dbk_y, bpos + 3) {
105 let bot_strength: usize;
109 } else if test_bit!(cur_dbk_y, bpos + 4) {
114 } else if !is_last_row {
115 if dblk[mb_pos + mb_w].is_strong {
117 } else if test_bit!(bot_dbk_y, x) {
126 let top_strength: usize;
130 } else if test_bit!(cur_dbk_y, bpos - 4) {
138 } else if test_bit!(top_dbk_y, bpos + 12) {
147 let l_q = if x > 0 { q } else { left_q };
148 let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 };
150 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
151 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
152 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
153 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
155 let dmode = if y > 0 { x + y * 4 } else { x * 4 };
157 if test_bit!(y_h_deblock, bpos + 4) {
158 rv40_loop_filter4_h(dst.data, yoff + 4 * dst.stride[0] + x * 4, dst.stride[0],
159 dmode, lim_cur, lim_bottom, alpha, beta, beta_y, false, false);
161 if test_bit!(y_v_deblock, bpos) && !ver_strong {
162 rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0],
163 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, false);
165 if (y == 0) && test_bit!(y_h_deblock, bpos) && (is_strong || top_is_strong) {
166 rv40_loop_filter4_h(dst.data, yoff + x * 4, dst.stride[0],
167 dmode, lim_top, lim_cur, alpha, beta, beta_y, false, true);
169 if test_bit!(y_v_deblock, bpos) && ver_strong {
170 rv40_loop_filter4_v(dst.data, yoff + x * 4, dst.stride[0],
171 dmode, lim_left, lim_cur, alpha, beta, beta_y, false, true);
176 let cur_cbp_c = dblk[mb_pos].cbp_c;
177 let top_cbp_c = if mb_y > 0 {
178 if top_is_strong { 0xFF } else { dblk[mb_pos - mb_w].cbp_c }
182 let bot_cbp_c = if !is_last_row {
183 dblk[mb_pos + mb_w].cbp_c
188 let cshift = (comp - 1) * 4;
189 let c_cur_cbp = (cur_cbp_c >> cshift) & 0xF;
190 let c_top_cbp = (top_cbp_c >> cshift) & 0xF;
191 let c_left_cbp = (left_cbp_c >> cshift) & 0xF;
192 let c_bot_cbp = (bot_cbp_c >> cshift) & 0xF;
194 let c_deblock = c_cur_cbp | (c_bot_cbp << 4);
195 let mut c_v_deblock = c_deblock | ((c_cur_cbp << 1) & !C_LEFT_COL_MASK) | ((c_left_cbp & C_RIGHT_COL_MASK) >> 1);
196 let mut c_h_deblock = c_deblock | ((c_cur_cbp & C_TOP_ROW_MASK) << 2) | ((c_top_cbp & C_BOT_ROW_MASK) >> 2);
198 c_v_deblock &= !C_LEFT_COL_MASK;
201 c_h_deblock &= !C_TOP_ROW_MASK;
203 if is_last_row || is_strong || bot_is_strong {
204 c_h_deblock &= !(C_TOP_ROW_MASK << 4);
208 let coff = dst.offset[comp] + mb_x * 8 + (mb_y * 8 + y * 4) * dst.stride[comp];
210 let bpos = x + y * 2;
212 let ver_strong = (x == 0) && (is_strong || left_is_strong);
214 let cur_strength: usize;
217 } else if test_bit!(c_cur_cbp, bpos) {
223 let left_strength: usize;
227 } else if test_bit!(c_cur_cbp, bpos - 1) {
235 } else if test_bit!(c_left_cbp, bpos + 1) {
244 let bot_strength: usize;
248 } else if test_bit!(c_cur_cbp, bpos + 2) {
253 } else if !is_last_row {
254 if dblk[mb_pos + mb_w].is_strong {
256 } else if test_bit!(c_bot_cbp, x) {
265 let top_strength: usize;
269 } else if test_bit!(c_cur_cbp, bpos - 2) {
277 } else if test_bit!(c_top_cbp, bpos + 2) {
286 let l_q = if x > 0 { q } else { left_q };
287 let top_q = if mb_y > 0 { usize::from(dblk[mb_pos - mb_w].q) } else { 0 };
289 let lim_cur = RV40_FILTER_CLIP_TBL [cur_strength][q];
290 let lim_top = RV40_FILTER_CLIP_TBL [top_strength][top_q];
291 let lim_left = RV40_FILTER_CLIP_TBL[left_strength][l_q];
292 let lim_bottom = RV40_FILTER_CLIP_TBL [bot_strength][q];
294 if test_bit!(c_h_deblock, bpos + 2) {
295 rv40_loop_filter4_h(dst.data, coff + 4 * dst.stride[comp] + x * 4, dst.stride[comp],
296 x * 8, lim_cur, lim_bottom, alpha, beta, beta_c, true, false);
298 if test_bit!(c_v_deblock, bpos) && !ver_strong {
299 rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp],
300 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, false);
302 if (y == 0) && test_bit!(c_h_deblock, bpos) && (is_strong || top_is_strong) {
303 rv40_loop_filter4_h(dst.data, coff + x * 4, dst.stride[comp],
304 x * 8, lim_top, lim_cur, alpha, beta, beta_c, true, true);
306 if test_bit!(c_v_deblock, bpos) && ver_strong {
307 rv40_loop_filter4_v(dst.data, coff + x * 4, dst.stride[comp],
308 y * 8, lim_left, lim_cur, alpha, beta, beta_c, true, true);
315 left_dbk_y = cur_dbk_y;
316 left_cbp_y = cur_cbp_y;
317 left_cbp_c = cur_cbp_c;
325 ($src: ident, $o: expr) => ($src[$o] as i16);
328 fn clip_symm(a: i16, lim: i16) -> i16 {
338 fn rv40_weak_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
339 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
340 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
342 let p0 = el!(pix, off - step);
343 let q0 = el!(pix, off);
351 let u = (alpha * t.wrapping_abs()) >> 7;
352 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
357 let p2 = el!(pix, off - 3*step);
358 let p1 = el!(pix, off - 2*step);
359 let q1 = el!(pix, off + step);
360 let q2 = el!(pix, off + 2*step);
362 let strength = if filter_p1 && filter_q1 {
368 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
369 pix[off - step] = clip8(p0 + diff);
370 pix[off ] = clip8(q0 - diff);
372 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
373 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
374 pix[off - 2*step] = clip8(p1 - clip_symm(p1_diff, lim_p1));
377 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
378 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
379 pix[off + step] = clip8(q1 - clip_symm(q1_diff, lim_q1));
386 fn rv40_weak_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
387 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
388 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
389 rv40_weak_loop_filter4(pix, off, stride, 1, filter_p1, filter_q1, alpha, beta, lim_p0q0, lim_p1, lim_q1);
391 fn rv40_weak_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
392 filter_p1: bool, filter_q1: bool, alpha: i16, beta: i16,
393 lim_p0q0: i16, lim_p1: i16, lim_q1: i16) {
394 let src = &mut pix[off - 3..][..stride * 3 + 3 + 3];
395 for ch in src.chunks_mut(stride).take(4) {
396 assert!(ch.len() >= 3 + 3);
397 let p0 = el!(ch, 3 - 1);
405 let u = (alpha * t.wrapping_abs()) >> 7;
406 if u > (if filter_p1 && filter_q1 { 2 } else { 3 }) {
410 let p2 = el!(ch, 3 - 3);
411 let p1 = el!(ch, 3 - 2);
412 let q1 = el!(ch, 3 + 1);
413 let q2 = el!(ch, 3 + 2);
415 let strength = if filter_p1 && filter_q1 {
421 let diff = clip_symm((strength + 4) >> 3, lim_p0q0);
422 ch[3 - 1] = clip8(p0 + diff);
423 ch[3 ] = clip8(q0 - diff);
425 if filter_p1 && ((p1 - p2).wrapping_abs() <= beta) {
426 let p1_diff = ((p1 - p0) + (p1 - p2) - diff) >> 1;
427 ch[3 - 2] = clip8(p1 - clip_symm(p1_diff, lim_p1));
430 if filter_q1 && ((q1 - q2).wrapping_abs() <= beta) {
431 let q1_diff = ((q1 - q0) + (q1 - q2) + diff) >> 1;
432 ch[3 + 1] = clip8(q1 - clip_symm(q1_diff, lim_q1));
437 #[allow(clippy::many_single_char_names)]
438 fn sfilter(a: i16, b: i16, c: i16, d: i16, e: i16, dither: i16, clip: bool, lims: i16) -> i16 {
439 let val = (25 * (a + e) + 26 * (b + c + d) + dither) >> 7;
443 } else if val > c + lims {
453 fn rv40_strong_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize,
454 alpha: i16, lims: i16, dmode: usize, chroma: bool) {
456 let p0 = el!(pix, off - step);
457 let q0 = el!(pix, off);
465 let fmode = (alpha * t.wrapping_abs()) >> 7;
471 let p3 = el!(pix, off - 4*step);
472 let p2 = el!(pix, off - 3*step);
473 let p1 = el!(pix, off - 2*step);
474 let q1 = el!(pix, off + step);
475 let q2 = el!(pix, off + 2*step);
476 let q3 = el!(pix, off + 3*step);
478 let np0 = sfilter(p2, p1, p0, q0, q1, RV40_DITHER_L[dmode + i], fmode != 0, lims);
479 let nq0 = sfilter( p1, p0, q0, q1, q2, RV40_DITHER_R[dmode + i], fmode != 0, lims);
481 let np1 = sfilter(p3, p2, p1, np0, q0, RV40_DITHER_L[dmode + i], fmode != 0, lims);
482 let nq1 = sfilter( p0, nq0, q1, q2, q3, RV40_DITHER_R[dmode + i], fmode != 0, lims);
484 pix[off - 2*step] = np1 as u8;
485 pix[off - step] = np0 as u8;
486 pix[off] = nq0 as u8;
487 pix[off + step] = nq1 as u8;
490 let np2 = sfilter(np0, np1, p2, p3, p2, 64, false, 0);
491 let nq2 = sfilter(nq0, nq1, q2, q3, q2, 64, false, 0);
492 pix[off - 3*step] = np2 as u8;
493 pix[off + 2*step] = nq2 as u8;
500 fn rv40_loop_strength(pix: &[u8], off: usize, step: usize, stride: usize,
501 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
502 let mut sum_p1p0 = 0;
503 let mut sum_q1q0 = 0;
507 sum_p1p0 += el!(pix, off1 - 2 * step) - el!(pix, off1 - step);
508 sum_q1q0 += el!(pix, off1 + step) - el!(pix, off1);
512 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
513 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
515 if (!filter_p1 || !filter_q1) || !edge {
516 return (false, filter_p1, filter_q1);
519 let mut sum_p1p2 = 0;
520 let mut sum_q1q2 = 0;
524 sum_p1p2 += el!(pix, off1 - 2 * step) - el!(pix, off1 - 3 * step);
525 sum_q1q2 += el!(pix, off1 + step) - el!(pix, off1 + 2 * step);
529 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
531 (strong, filter_p1, filter_q1)
534 fn rv40_loop_strength_h(pix: &[u8], off: usize, stride: usize,
535 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
536 rv40_loop_strength(pix, off, stride, 1, beta, beta2, edge)
539 fn rv40_loop_strength_v(pix: &[u8], off: usize, stride: usize,
540 beta: i16, beta2: i16, edge: bool) -> (bool, bool, bool) {
541 let src = &pix[off - 3..][..stride * 3 + 3 + 3];
542 let mut sum_p1p0 = 0;
543 let mut sum_q1q0 = 0;
545 for ch in src.chunks(stride).take(4) {
546 assert!(ch.len() >= 3 + 3);
547 sum_p1p0 += el!(ch, 3 - 2) - el!(ch, 3 - 1);
548 sum_q1q0 += el!(ch, 3 + 1) - el!(ch, 3);
551 let filter_p1 = sum_p1p0.wrapping_abs() < beta * 4;
552 let filter_q1 = sum_q1q0.wrapping_abs() < beta * 4;
554 if (!filter_p1 || !filter_q1) || !edge {
555 return (false, filter_p1, filter_q1);
558 let mut sum_p1p2 = 0;
559 let mut sum_q1q2 = 0;
561 for ch in src.chunks(stride).take(4) {
562 assert!(ch.len() >= 3 + 3);
563 sum_p1p2 += el!(ch, 3 - 2) - el!(ch, 3 - 3);
564 sum_q1q2 += el!(ch, 3 + 1) - el!(ch, 3 + 2);
567 let strong = (sum_p1p2.wrapping_abs() < beta2) && (sum_q1q2.wrapping_abs() < beta2);
569 (strong, filter_p1, filter_q1)
572 fn rv40_loop_filter4_h(pix: &mut [u8], off: usize, stride: usize,
573 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
574 chroma: bool, edge: bool) {
575 let (strong, filter_p1, filter_q1) = rv40_loop_strength_h(pix, off, stride, beta, beta2, edge);
576 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
579 rv40_strong_loop_filter4(pix, off, stride, 1, alpha, lims, dmode, chroma);
580 } else if filter_p1 && filter_q1 {
581 rv40_weak_loop_filter4_h(pix, off, stride, true, true, alpha, beta,
582 lims, lim_p1, lim_q1);
583 } else if filter_p1 || filter_q1 {
584 rv40_weak_loop_filter4_h(pix, off, stride, filter_p1, filter_q1, alpha, beta,
585 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
589 fn rv40_loop_filter4_v(pix: &mut [u8], off: usize, stride: usize,
590 dmode: usize, lim_p1: i16, lim_q1: i16, alpha: i16, beta: i16, beta2: i16,
591 chroma: bool, edge: bool) {
592 let (strong, filter_p1, filter_q1) = rv40_loop_strength_v(pix, off, stride, beta, beta2, edge);
593 let lims = (filter_p1 as i16) + (filter_q1 as i16) + ((lim_p1 + lim_q1) >> 1) + 1;
596 rv40_strong_loop_filter4(pix, off, 1, stride, alpha, lims, dmode, chroma);
597 } else if filter_p1 && filter_q1 {
598 rv40_weak_loop_filter4_v(pix, off, stride, true, true, alpha, beta,
599 lims, lim_p1, lim_q1);
600 } else if filter_p1 || filter_q1 {
601 rv40_weak_loop_filter4_v(pix, off, stride, filter_p1, filter_q1, alpha, beta,
602 lims >> 1, lim_p1 >> 1, lim_q1 >> 1);
606 const RV40_DITHER_L: [i16; 16] = [
607 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
608 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
610 const RV40_DITHER_R: [i16; 16] = [
611 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
612 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
615 const RV40_ALPHA_TAB: [i16; 32] = [
616 128, 128, 128, 128, 128, 128, 128, 128,
617 128, 128, 122, 96, 75, 59, 47, 37,
618 29, 23, 18, 15, 13, 11, 10, 9,
619 8, 7, 6, 5, 4, 3, 2, 1
622 const RV40_BETA_TAB: [i16; 32] = [
623 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, 6, 6,
624 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 13, 14, 15, 16, 17
627 const RV40_FILTER_CLIP_TBL: [[i16; 32]; 3] = [
629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
632 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
633 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5
635 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
636 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 7, 8, 9