From: Kostya Shishkov Date: Wed, 27 Jul 2022 08:14:49 +0000 (+0200) Subject: h264: rework MB reconstruction and fix loop filtering X-Git-Url: https://git.nihav.org/?a=commitdiff_plain;h=22de733b14e3ef23eabaaa9f8b2e98a59188c5cf;p=nihav.git h264: rework MB reconstruction and fix loop filtering --- diff --git a/nihav-itu/src/codecs/h264/dsp/mod.rs b/nihav-itu/src/codecs/h264/dsp/mod.rs index b19225d..19d260a 100644 --- a/nihav-itu/src/codecs/h264/dsp/mod.rs +++ b/nihav-itu/src/codecs/h264/dsp/mod.rs @@ -277,101 +277,101 @@ pub fn avg(dst: &mut [u8], dstride: usize, fn clip8(val: i16) -> u8 { val.max(0).min(255) as u8 } -fn ipred_dc128(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { - for _ in 0..bsize { - for x in 0..bsize { buf[idx + x] = 128; } - idx += stride; +fn ipred_dc128(buf: &mut [u8], stride: usize, bsize: usize) { + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = 128; + } } } -fn ipred_ver(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { - let oidx = idx - stride; - for _ in 0..bsize { - for x in 0..bsize { buf[idx + x] = buf[oidx + x]; } - idx += stride; +fn ipred_ver(buf: &mut [u8], stride: usize, top: &[u8], bsize: usize) { + for row in buf.chunks_mut(stride).take(bsize) { + row[..bsize].copy_from_slice(&top[..bsize]); } } -fn ipred_hor(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { - for _ in 0..bsize { - for x in 0..bsize { buf[idx + x] = buf[idx - 1]; } - idx += stride; +fn ipred_hor(buf: &mut [u8], stride: usize, left: &[u8], bsize: usize) { + for (row, &left) in buf.chunks_mut(stride).zip(left[1..].iter()).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = left; + } } } -fn ipred_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { +fn ipred_dc(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8], bsize: usize, shift: u8) { let mut adc: u16 = 0; - for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } - for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } + for i in 0..bsize { adc += u16::from(top[i]); } + for i in 0..bsize { adc += u16::from(left[i + 1]); } let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; - for _ in 0..bsize { - for x in 0..bsize { buf[idx + x] = dc; } - idx += stride; + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = dc; + } } } -fn ipred_left_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { +fn ipred_left_dc(buf: &mut [u8], stride: usize, left: &[u8], bsize: usize, shift: u8) { let mut adc: u16 = 0; - for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } + for i in 0..bsize { adc += u16::from(left[i + 1]); } let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; - for _ in 0..bsize { - for x in 0..bsize { buf[idx + x] = dc; } - idx += stride; + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = dc; + } } } -fn ipred_top_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { +fn ipred_top_dc(buf: &mut [u8], stride: usize, top: &[u8], bsize: usize, shift: u8) { let mut adc: u16 = 0; - for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } + for i in 0..bsize { adc += u16::from(top[i]); } let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; - for _ in 0..bsize { - for x in 0..bsize { buf[idx + x] = dc; } - idx += stride; + for row in buf.chunks_mut(stride).take(bsize) { + for el in row[..bsize].iter_mut() { + *el = dc; + } } } -fn load_top(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { - for i in 0..len { dst[i] = u16::from(buf[idx - stride + i]); } -} -fn load_left(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { - for i in 0..len { dst[i] = u16::from(buf[idx - 1 + i * stride]); } +fn load(dst: &mut [u16], src: &[u8]) { + for (dst, &src) in dst.iter_mut().zip(src.iter()) { + *dst = u16::from(src); + } } -fn ipred_4x4_ver(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { - ipred_ver(buf, idx, stride, 4); +fn ipred_4x4_ver(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8], _tr: &[u8]) { + ipred_ver(buf, stride, top, 4); } -fn ipred_4x4_hor(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { - ipred_hor(buf, idx, stride, 4); +fn ipred_4x4_hor(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8], _tr: &[u8]) { + ipred_hor(buf, stride, left, 4); } -fn ipred_4x4_diag_down_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { +fn ipred_4x4_diag_down_left(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8], tr: &[u8]) { let mut t: [u16; 9] = [0; 9]; - load_top(&mut t, buf, idx, stride, 4); - for i in 0..4 { - t[i + 4] = u16::from(tr[i]); - } + load(&mut t[..4], top); + load(&mut t[4..8], tr); t[8] = t[7]; - let dst = &mut buf[idx..]; for i in 0..4 { - dst[i] = ((t[i] + 2 * t[i + 1] + t[i + 2] + 2) >> 2) as u8; + buf[i] = ((t[i] + 2 * t[i + 1] + t[i + 2] + 2) >> 2) as u8; } - let dst = &mut buf[idx + stride..]; + let dst = &mut buf[stride..]; for i in 0..4 { dst[i] = ((t[i + 1] + 2 * t[i + 2] + t[i + 3] + 2) >> 2) as u8; } - let dst = &mut buf[idx + stride * 2..]; + let dst = &mut buf[stride * 2..]; for i in 0..4 { dst[i] = ((t[i + 2] + 2 * t[i + 3] + t[i + 4] + 2) >> 2) as u8; } - let dst = &mut buf[idx + stride * 3..]; + let dst = &mut buf[stride * 3..]; for i in 0..4 { dst[i] = ((t[i + 3] + 2 * t[i + 4] + t[i + 5] + 2) >> 2) as u8; } } -fn ipred_4x4_diag_down_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { +fn ipred_4x4_diag_down_right(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8], _tr: &[u8]) { let mut t: [u16; 5] = [0; 5]; + t[0] = u16::from(left[0]); + load(&mut t[1..], &top); let mut l: [u16; 5] = [0; 5]; - load_top(&mut t, buf, idx - 1, stride, 5); - load_left(&mut l, buf, idx - stride, stride, 5); - let dst = &mut buf[idx..]; + load(&mut l, left); + let dst = buf; for j in 0..4 { for i in 0..j { @@ -383,12 +383,13 @@ fn ipred_4x4_diag_down_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u } } } -fn ipred_4x4_ver_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { +fn ipred_4x4_ver_right(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8], _tr: &[u8]) { let mut t: [u16; 5] = [0; 5]; + t[0] = u16::from(left[0]); + load(&mut t[1..], &top); let mut l: [u16; 5] = [0; 5]; - load_top(&mut t, buf, idx - 1, stride, 5); - load_left(&mut l, buf, idx - stride, stride, 5); - let dst = &mut buf[idx..]; + load(&mut l, left); + let dst = buf; for j in 0..4 { for i in 0..4 { @@ -411,11 +412,11 @@ fn ipred_4x4_ver_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { } } } -fn ipred_4x4_ver_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { +fn ipred_4x4_ver_left(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8], tr: &[u8]) { let mut t: [u16; 8] = [0; 8]; - load_top(&mut t, buf, idx, stride, 4); - for i in 0..4 { t[i + 4] = u16::from(tr[i]); } - let dst = &mut buf[idx..]; + load(&mut t[..4], &top); + load(&mut t[4..], tr); + let dst = buf; dst[0 + 0 * stride] = ((t[0] + t[1] + 1) >> 1) as u8; let pix = ((t[1] + t[2] + 1) >> 1) as u8; @@ -440,12 +441,13 @@ fn ipred_4x4_ver_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { dst[2 + 3 * stride] = pix; dst[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; } -fn ipred_4x4_hor_down(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { +fn ipred_4x4_hor_down(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8], _tr: &[u8]) { let mut t: [u16; 5] = [0; 5]; + t[0] = u16::from(left[0]); + load(&mut t[1..], &top); let mut l: [u16; 5] = [0; 5]; - load_top(&mut t, buf, idx - 1, stride, 5); - load_left(&mut l, buf, idx - stride, stride, 5); - let dst = &mut buf[idx..]; + load(&mut l, left); + let dst = buf; for j in 0..4 { for i in 0..4 { @@ -468,10 +470,10 @@ fn ipred_4x4_hor_down(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { } } } -fn ipred_4x4_hor_up(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { +fn ipred_4x4_hor_up(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8], _tr: &[u8]) { let mut l: [u16; 8] = [0; 8]; - load_left(&mut l, buf, idx, stride, 8); - let dst = &mut buf[idx..]; + load(&mut l, &left[1..]); + let dst = buf; dst[0 + 0 * stride] = ((l[0] + l[1] + 1) >> 1) as u8; dst[1 + 0 * stride] = ((l[0] + 2*l[1] + l[2] + 2) >> 2) as u8; @@ -494,17 +496,17 @@ fn ipred_4x4_hor_up(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { dst[2 + 3 * stride] = l[3] as u8; dst[3 + 3 * stride] = l[3] as u8; } -fn ipred_4x4_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { - ipred_dc(buf, idx, stride, 4, 3); +fn ipred_4x4_dc(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8], _tr: &[u8]) { + ipred_dc(buf, stride, top, left, 4, 3); } -fn ipred_4x4_left_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { - ipred_left_dc(buf, idx, stride, 4, 2); +fn ipred_4x4_left_dc(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8], _tr: &[u8]) { + ipred_left_dc(buf, stride, left, 4, 2); } -fn ipred_4x4_top_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { - ipred_top_dc(buf, idx, stride, 4, 2); +fn ipred_4x4_top_dc(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8], _tr: &[u8]) { + ipred_top_dc(buf, stride, top, 4, 2); } -fn ipred_4x4_dc128(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { - ipred_dc128(buf, idx, stride, 4); +fn ipred_4x4_dc128(buf: &mut [u8], stride: usize, _top: &[u8], _left: &[u8], _tr: &[u8]) { + ipred_dc128(buf, stride, 4); } pub struct IPred8Context { @@ -521,14 +523,14 @@ impl IPred8Context { tl: 128, } } - pub fn fill(&mut self, buf: &[u8], idx: usize, stride: usize, has_t: bool, has_tr: bool, has_l: bool, has_tl: bool) { + pub fn fill(&mut self, top: &[u8], left: &[u8], has_t: bool, has_tr: bool, has_l: bool, has_tl: bool) { let mut t = [0x80u8; 19]; let mut l = [0x80u8; 11]; if has_t { - t[1..8 + 1].copy_from_slice(&buf[idx - stride..][..8]); + t[1..8 + 1].copy_from_slice(&top[..8]); } if has_tr { - t[8 + 1..16 + 1].copy_from_slice(&buf[idx - stride + 8..][..8]); + t[8 + 1..16 + 1].copy_from_slice(&top[8..][..8]); t[16 + 1] = t[15 + 1]; t[17 + 1] = t[15 + 1]; } else { @@ -538,15 +540,13 @@ impl IPred8Context { } } if has_l { - for i in 0..8 { - l[i + 1] = buf[idx - 1 + stride * i]; - } + l[1..9].copy_from_slice(&left[1..9]); l[8 + 1] = l[7 + 1]; l[9 + 1] = l[7 + 1]; } if has_tl { - t[0] = buf[idx - 1 - stride]; - l[0] = buf[idx - 1 - stride]; + t[0] = left[0]; + l[0] = left[0]; } else { t[0] = t[1]; l[0] = l[1]; @@ -582,9 +582,7 @@ fn ipred_y_8x8_hor(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { } fn ipred_y_8x8_diag_down_left(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { let mut t = [0u16; 16]; - for (dt, &st) in t.iter_mut().zip(ctx.t.iter()) { - *dt = u16::from(st); - } + load(&mut t, &ctx.t); for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { for (x, pix) in row.iter_mut().take(8).enumerate() { @@ -599,14 +597,10 @@ fn ipred_y_8x8_diag_down_left(buf: &mut [u8], stride: usize, ctx: &IPred8Context fn ipred_y_8x8_diag_down_right(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { let mut t = [0u16; 9]; t[0] = u16::from(ctx.tl); - for (dt, &st) in t[1..].iter_mut().zip(ctx.t.iter()) { - *dt = u16::from(st); - } + load(&mut t[1..], &ctx.t); let mut l = [0u16; 9]; l[0] = u16::from(ctx.tl); - for (dl, &sl) in l[1..].iter_mut().zip(ctx.l.iter()) { - *dl = u16::from(sl); - } + load(&mut l[1..], &ctx.l); let diag = t[1] + 2 * t[0] + l[1]; for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { @@ -624,14 +618,10 @@ fn ipred_y_8x8_diag_down_right(buf: &mut [u8], stride: usize, ctx: &IPred8Contex fn ipred_y_8x8_ver_right(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { let mut t = [0u16; 9]; t[0] = u16::from(ctx.tl); - for (dt, &st) in t[1..].iter_mut().zip(ctx.t.iter()) { - *dt = u16::from(st); - } + load(&mut t[1..], &ctx.t); let mut l = [0u16; 9]; l[0] = u16::from(ctx.tl); - for (dl, &sl) in l[1..].iter_mut().zip(ctx.l.iter()) { - *dl = u16::from(sl); - } + load(&mut l[1..], &ctx.l); for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { for (x, pix) in row.iter_mut().take(8).enumerate() { @@ -654,9 +644,7 @@ fn ipred_y_8x8_ver_right(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { } fn ipred_y_8x8_ver_left(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { let mut t = [0u16; 16]; - for (dt, &st) in t.iter_mut().zip(ctx.t.iter()) { - *dt = u16::from(st); - } + load(&mut t, &ctx.t); for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { for (x, pix) in row.iter_mut().take(8).enumerate() { @@ -673,14 +661,10 @@ fn ipred_y_8x8_ver_left(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { fn ipred_y_8x8_hor_down(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { let mut t = [0u16; 9]; t[0] = u16::from(ctx.tl); - for (dt, &st) in t[1..].iter_mut().zip(ctx.t.iter()) { - *dt = u16::from(st); - } + load(&mut t[1..], &ctx.t); let mut l = [0u16; 9]; l[0] = u16::from(ctx.tl); - for (dl, &sl) in l[1..].iter_mut().zip(ctx.l.iter()) { - *dl = u16::from(sl); - } + load(&mut l[1..], &ctx.l); for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { for (x, pix) in row.iter_mut().take(8).enumerate() { @@ -703,9 +687,7 @@ fn ipred_y_8x8_hor_down(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { } fn ipred_y_8x8_hor_up(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { let mut l = [0u16; 8]; - for (dl, &sl) in l.iter_mut().zip(ctx.l.iter()) { - *dl = u16::from(sl); - } + load(&mut l, &ctx.l); for (y, row) in buf.chunks_mut(stride).take(8).enumerate() { for (x, pix) in row.iter_mut().take(8).enumerate() { @@ -763,20 +745,20 @@ fn ipred_y_8x8_top_dc(buf: &mut [u8], stride: usize, ctx: &IPred8Context) { } } fn ipred_y_8x8_dc128(buf: &mut [u8], stride: usize, _ctx: &IPred8Context) { - ipred_dc128(buf, 0, stride, 8); + ipred_dc128(buf, stride, 8); } -fn ipred_8x8_ver(buf: &mut [u8], idx: usize, stride: usize) { - ipred_ver(buf, idx, stride, 8); +fn ipred_8x8_ver(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8]) { + ipred_ver(buf, stride, top, 8); } -fn ipred_8x8_hor(buf: &mut [u8], idx: usize, stride: usize) { - ipred_hor(buf, idx, stride, 8); +fn ipred_8x8_hor(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8]) { + ipred_hor(buf, stride, left, 8); } -fn ipred_8x8_dc(buf: &mut [u8], idx: usize, stride: usize) { - let mut t: [u16; 8] = [0; 8]; - load_top(&mut t, buf, idx, stride, 8); - let mut l: [u16; 8] = [0; 8]; - load_left(&mut l, buf, idx, stride, 8); +fn ipred_8x8_dc(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8]) { + let mut l = [0; 8]; + load(&mut l, &left[1..]); + let mut t = [0; 8]; + load(&mut t, &top); let dc0 = ((t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3) as u8; let sum1 = t[4] + t[5] + t[6] + t[7]; @@ -785,60 +767,56 @@ fn ipred_8x8_dc(buf: &mut [u8], idx: usize, stride: usize) { let dc2 = ((sum2 + 2) >> 2) as u8; let dc3 = ((sum1 + sum2 + 4) >> 3) as u8; - let dst = &mut buf[idx..]; - for row in dst.chunks_mut(stride).take(4) { + for row in buf.chunks_mut(stride).take(4) { row[..4].copy_from_slice(&[dc0; 4]); row[4..8].copy_from_slice(&[dc1; 4]); } - for row in dst.chunks_mut(stride).skip(4).take(4) { + for row in buf.chunks_mut(stride).skip(4).take(4) { row[..4].copy_from_slice(&[dc2; 4]); row[4..8].copy_from_slice(&[dc3; 4]); } } -fn ipred_8x8_left_dc(buf: &mut [u8], idx: usize, stride: usize) { +fn ipred_8x8_left_dc(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8]) { let mut left_dc0 = 0; let mut left_dc1 = 0; - for row in buf[idx - 1..].chunks(stride).take(4) { - left_dc0 += u16::from(row[0]); + for &el in left[1..].iter().take(4) { + left_dc0 += u16::from(el); } - for row in buf[idx - 1..].chunks(stride).skip(4).take(4) { - left_dc1 += u16::from(row[0]); + for &el in left[1..].iter().skip(4).take(4) { + left_dc1 += u16::from(el); } let dc0 = ((left_dc0 + 2) >> 2) as u8; let dc2 = ((left_dc1 + 2) >> 2) as u8; - for row in buf[idx..].chunks_mut(stride).take(4) { + for row in buf.chunks_mut(stride).take(4) { row[..8].copy_from_slice(&[dc0; 8]); } - for row in buf[idx..].chunks_mut(stride).skip(4).take(4) { + for row in buf.chunks_mut(stride).skip(4).take(4) { row[..8].copy_from_slice(&[dc2; 8]); } } -fn ipred_8x8_top_dc(buf: &mut [u8], idx: usize, stride: usize) { - ipred_top_dc(buf, idx, stride, 4, 2); - ipred_top_dc(buf, idx + 4, stride, 4, 2); - ipred_top_dc(buf, idx + 4 * stride, stride, 4, 2); - ipred_top_dc(buf, idx + 4 + 4 * stride, stride, 4, 2); +fn ipred_8x8_top_dc(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8]) { + ipred_top_dc(buf, stride, top, 4, 2); + ipred_top_dc(&mut buf[4..], stride, &top[4..], 4, 2); + let mut top = [0; 8]; + top.copy_from_slice(&buf[stride * 3..][..8]); + ipred_top_dc(&mut buf[4 * stride..], stride, &top, 4, 2); + ipred_top_dc(&mut buf[4 + 4 * stride..], stride, &top[4..], 4, 2); } -fn ipred_8x8_dc128(buf: &mut [u8], idx: usize, stride: usize) { - ipred_dc128(buf, idx, stride, 8); +fn ipred_8x8_dc128(buf: &mut [u8], stride: usize, _top: &[u8], _left: &[u8]) { + ipred_dc128(buf, stride, 8); } -fn ipred_8x8_plane(buf: &mut [u8], idx: usize, stride: usize) { - let mut h: i32 = 0; - let mut v: i32 = 0; - let idx0 = idx + 3 - stride; - let mut idx1 = idx + 4 * stride - 1; - let mut idx2 = idx + 2 * stride - 1; - for i in 0..4 { +fn ipred_8x8_plane(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8]) { + let mut h: i32 = 4 * (i32::from(top[7]) - i32::from(left[0])); + let mut v: i32 = 4 * (i32::from(left[8]) - i32::from(left[0])); + for i in 0..3 { let i1 = (i + 1) as i32; - h += i1 * (i32::from(buf[idx0 + i + 1]) - i32::from(buf[idx0 - i - 1])); - v += i1 * (i32::from(buf[idx1]) - i32::from(buf[idx2])); - idx1 += stride; - idx2 -= stride; + h += i1 * (i32::from(top[4 + i]) - i32::from(top[2 - i])); + v += i1 * (i32::from(left[5 + i]) - i32::from(left[3 - i])); } let b = (17 * h + 16) >> 5; let c = (17 * v + 16) >> 5; - let mut a = 16 * (i32::from(buf[idx - 1 + 7 * stride]) + i32::from(buf[idx + 7 - stride])) - 3 * (b + c) + 16; - for line in buf[idx..].chunks_mut(stride).take(8) { + let mut a = 16 * (i32::from(left[8]) + i32::from(top[7])) - 3 * (b + c) + 16; + for line in buf.chunks_mut(stride).take(8) { let mut acc = a; for el in line.iter_mut().take(8) { *el = clip8((acc >> 5) as i16); @@ -848,44 +826,38 @@ fn ipred_8x8_plane(buf: &mut [u8], idx: usize, stride: usize) { } } -fn ipred_16x16_ver(buf: &mut [u8], idx: usize, stride: usize) { - ipred_ver(buf, idx, stride, 16); +fn ipred_16x16_ver(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8]) { + ipred_ver(buf, stride, top, 16); } -fn ipred_16x16_hor(buf: &mut [u8], idx: usize, stride: usize) { - ipred_hor(buf, idx, stride, 16); +fn ipred_16x16_hor(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8]) { + ipred_hor(buf, stride, left, 16); } -fn ipred_16x16_dc(buf: &mut [u8], idx: usize, stride: usize) { - ipred_dc(buf, idx, stride, 16, 5); +fn ipred_16x16_dc(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8]) { + ipred_dc(buf, stride, top, left, 16, 5); } -fn ipred_16x16_left_dc(buf: &mut [u8], idx: usize, stride: usize) { - ipred_left_dc(buf, idx, stride, 16, 4); +fn ipred_16x16_left_dc(buf: &mut [u8], stride: usize, _top: &[u8], left: &[u8]) { + ipred_left_dc(buf, stride, left, 16, 4); } -fn ipred_16x16_top_dc(buf: &mut [u8], idx: usize, stride: usize) { - ipred_top_dc(buf, idx, stride, 16, 4); +fn ipred_16x16_top_dc(buf: &mut [u8], stride: usize, top: &[u8], _left: &[u8]) { + ipred_top_dc(buf, stride, top, 16, 4); } -fn ipred_16x16_dc128(buf: &mut [u8], idx: usize, stride: usize) { - ipred_dc128(buf, idx, stride, 16); +fn ipred_16x16_dc128(buf: &mut [u8], stride: usize, _top: &[u8], _left: &[u8]) { + ipred_dc128(buf, stride, 16); } -fn ipred_16x16_plane(buf: &mut [u8], idx: usize, stride: usize) { - let idx0 = idx + 7 - stride; - let mut idx1 = idx + 8 * stride - 1; - let mut idx2 = idx1 - 2 * stride; - - let mut h = i32::from(buf[idx0 + 1]) - i32::from(buf[idx0 - 1]); - let mut v = i32::from(buf[idx1]) - i32::from(buf[idx2]); - - for k in 2..9 { - idx1 += stride; - idx2 -= stride; - h += (k as i32) * (i32::from(buf[idx0 + k]) - i32::from(buf[idx0 - k])); - v += (k as i32) * (i32::from(buf[idx1]) - i32::from(buf[idx2])); +fn ipred_16x16_plane(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8]) { + let mut h = 8 * (i32::from(top[15]) - i32::from(left[0])); + let mut v = 8 * (i32::from(left[16]) - i32::from(left[0])); + for k in 0..7 { + h += ((k as i32) + 1) * (i32::from(top[8 + k]) - i32::from(top[6 - k])); + v += ((k as i32) + 1) * (i32::from(left[9 + k]) - i32::from(left[7 - k])); } + h = (5 * h + 32) >> 6; v = (5 * v + 32) >> 6; - let mut a = 16 * (i32::from(buf[idx - 1 + 15 * stride]) + i32::from(buf[idx + 15 - stride]) + 1) - 7 * (v + h); + let mut a = 16 * (i32::from(left[16]) + i32::from(top[15]) + 1) - 7 * (v + h); - for row in buf[idx..].chunks_mut(stride).take(16) { + for row in buf.chunks_mut(stride).take(16) { let mut b = a; a += v; @@ -899,8 +871,8 @@ fn ipred_16x16_plane(buf: &mut [u8], idx: usize, stride: usize) { } } -pub type IPred4x4Func = fn(buf: &mut [u8], off: usize, stride: usize, tr: &[u8]); -pub type IPred8x8Func = fn(buf: &mut [u8], off: usize, stride: usize); +pub type IPred4x4Func = fn(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8], tr: &[u8]); +pub type IPred8x8Func = fn(buf: &mut [u8], stride: usize, top: &[u8], left: &[u8]); pub type IPred8x8LumaFunc = fn(buf: &mut [u8], stride: usize, ctx: &IPred8Context); pub const IPRED4_DC128: usize = 11; @@ -976,14 +948,14 @@ pub fn gray_block(frm: &mut NASimpleVideoFrame, x: usize, y: usize, w: usize let coff = [frm.offset[1] + x / 2 + y / 2 * frm.stride[1], frm.offset[2] + x / 2 + y / 2 * frm.stride[2]]; if w == 16 && h == 16 { - IPRED_FUNCS16X16[IPRED8_DC128](frm.data, yoff, frm.stride[0]); + IPRED_FUNCS16X16[IPRED8_DC128](&mut frm.data[yoff..], frm.stride[0], &[], &[]); for chroma in 1..2 { - IPRED_FUNCS8X8_CHROMA[IPRED8_DC128](frm.data, coff[chroma - 1], frm.stride[chroma]); + IPRED_FUNCS8X8_CHROMA[IPRED8_DC128](&mut frm.data[coff[chroma - 1]..], frm.stride[chroma], &[], &[]); } } else if w == 8 && h == 8 { - IPRED_FUNCS8X8_CHROMA[IPRED8_DC128](frm.data, yoff, frm.stride[0]); + IPRED_FUNCS8X8_CHROMA[IPRED8_DC128](&mut frm.data[yoff..], frm.stride[0], &[], &[]); for chroma in 1..2 { - IPRED_FUNCS4X4[IPRED4_DC128](frm.data, coff[chroma - 1], frm.stride[chroma], &[128; 4]); + IPRED_FUNCS4X4[IPRED4_DC128](&mut frm.data[coff[chroma - 1]..], frm.stride[chroma], &[128; 4], &[128; 4], &[128; 4]); } } else { for row in frm.data[yoff..].chunks_mut(frm.stride[0]).take(h) { @@ -1117,7 +1089,7 @@ pub fn loop_filter_lumanormal_h(dst: &mut [u8], off: usize, stride: usize, alpha } } pub fn loop_filter_chromaedge_v(dst: &mut [u8], mut off: usize, stride: usize, alpha: i16, beta: i16) { - for _ in 0..4 { + for _ in 0..2 { if check_filter(dst, off, 1, alpha, beta) { loop_filter!(chromaedge; dst, off, 1); } @@ -1125,14 +1097,14 @@ pub fn loop_filter_chromaedge_v(dst: &mut [u8], mut off: usize, stride: usize, a } } pub fn loop_filter_chromaedge_h(dst: &mut [u8], off: usize, stride: usize, alpha: i16, beta: i16) { - for x in 0..4 { + for x in 0..2 { if check_filter(dst, off + x, stride, alpha, beta) { loop_filter!(chromaedge; dst, off + x, stride); } } } pub fn loop_filter_chromanormal_v(dst: &mut [u8], mut off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) { - for _ in 0..4 { + for _ in 0..2 { if check_filter(dst, off, 1, alpha, beta) { loop_filter!(chromanormal; dst, off, 1, tc0); } @@ -1140,7 +1112,7 @@ pub fn loop_filter_chromanormal_v(dst: &mut [u8], mut off: usize, stride: usize, } } pub fn loop_filter_chromanormal_h(dst: &mut [u8], off: usize, stride: usize, alpha: i16, beta: i16, tc0: i16) { - for x in 0..4 { + for x in 0..2 { if check_filter(dst, off + x, stride, alpha, beta) { loop_filter!(chromanormal; dst, off + x, stride, tc0); } diff --git a/nihav-itu/src/codecs/h264/loopfilter.rs b/nihav-itu/src/codecs/h264/loopfilter.rs dissimilarity index 80% index a5ae123..0890f30 100644 --- a/nihav-itu/src/codecs/h264/loopfilter.rs +++ b/nihav-itu/src/codecs/h264/loopfilter.rs @@ -1,210 +1,111 @@ -use nihav_core::frame::NASimpleVideoFrame; -use super::types::SliceState; -use super::dsp::*; - -const ALPHA: [i16; 52] = [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28, - 32, 36, 40, 45, 50, 56, 63, 71, 80, 90, 100, 113, 127, 144, 162, 182, - 203, 226, 255, 255 -]; -const BETA: [i16; 52] = [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, - 17, 17, 18, 18 -]; - -const TC0: [[u8; 3]; 52] = [ - [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], - [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], - [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], - [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], - [ 0, 0, 0], [ 0, 0, 1], [ 0, 0, 1], [ 0, 0, 1], - [ 0, 0, 1], [ 0, 1, 1], [ 0, 1, 1], [ 1, 1, 1], - [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 2], - [ 1, 1, 2], [ 1, 1, 2], [ 1, 1, 2], [ 1, 2, 3], - [ 1, 2, 3], [ 2, 2, 3], [ 2, 2, 4], [ 2, 3, 4], - [ 2, 3, 4], [ 3, 3, 5], [ 3, 4, 6], [ 3, 4, 6], - [ 4, 5, 7], [ 4, 5, 8], [ 4, 6, 9], [ 5, 7, 10], - [ 6, 8, 11], [ 6, 8, 13], [ 7, 10, 14], [ 8, 11, 16], - [ 9, 12, 18], [10, 13, 20], [11, 15, 23], [13, 17, 25] -]; - -fn get_lf_idx(qp0: u8, qp1: u8, off: i8) -> usize { - (i16::from((qp0 + qp1 + 1) >> 1) + i16::from(off)).max(0).min(51) as usize -} - -fn filter_mb_row4_y(dst: &mut [u8], off: usize, stride: usize, dmodes: [u8; 4], quants: [u8; 3], alpha_off: i8, beta_off: i8) { - let q = quants[0]; - let qleft = quants[1]; - let dmode = dmodes[0] & 0xF; - if dmode != 0 { - let index_a_y = get_lf_idx(q, qleft, alpha_off); - let alpha_y = ALPHA[index_a_y]; - let beta_y = BETA[get_lf_idx(q, qleft, beta_off)]; - if dmode == 4 { - loop_filter_lumaedge_v(dst, off, stride, alpha_y, beta_y); - } else { - let tc0 = i16::from(TC0[index_a_y][(dmode - 1) as usize]); - loop_filter_lumanormal_v(dst, off, stride, alpha_y, beta_y, tc0); - } - } - let index_a_y = get_lf_idx(q, q, alpha_off); - let alpha_y = ALPHA[index_a_y]; - let beta_y = BETA[get_lf_idx(q, q, beta_off)]; - - for i in 1..4 { - let dmode = dmodes[i] & 0xF; - if dmode != 0 { - let tc0 = i16::from(TC0[index_a_y][(dmode - 1) as usize]); - loop_filter_lumanormal_v(dst, off + i * 4, stride, alpha_y, beta_y, tc0); - } - } - - let qtop = quants[2]; - let index_a_y = get_lf_idx(q, qtop, alpha_off); - let alpha_y = ALPHA[index_a_y]; - let beta_y = BETA[get_lf_idx(q, qtop, beta_off)]; - for i in 0..4 { - let dmode = dmodes[i] >> 4; - if dmode == 4 { - loop_filter_lumaedge_h(dst, off + i * 4, stride, alpha_y, beta_y); - } else if dmode != 0 { - let tc0 = i16::from(TC0[index_a_y][(dmode - 1) as usize]); - loop_filter_lumanormal_h(dst, off + i * 4, stride, alpha_y, beta_y, tc0); - } - } -} - -fn filter_mb_row4_c(dst: &mut [u8], off: usize, stride: usize, dmodes: [u8; 4], quants: [u8; 3], alpha_off: i8, beta_off: i8) { - let q = quants[0]; - let qleft = quants[1]; - - let dmode = dmodes[0] & 0xF; - if dmode != 0 { - let index_a_c = get_lf_idx(q, qleft, alpha_off); - let alpha_c = ALPHA[index_a_c]; - let beta_c = BETA[get_lf_idx(q, qleft, beta_off)]; - if dmode == 4 { - loop_filter_chromaedge_v(dst, off, stride, alpha_c, beta_c); - } else { - let tc0 = i16::from(TC0[index_a_c][(dmode - 1) as usize]); - loop_filter_chromanormal_v(dst, off, stride, alpha_c, beta_c, tc0); - } - } - let dmode = dmodes[2] & 0xF; - if dmode != 0 { - let index_a_c = get_lf_idx(q, q, alpha_off); - let alpha_c = ALPHA[index_a_c]; - let beta_c = BETA[get_lf_idx(q, q, beta_off)]; - let tc0 = i16::from(TC0[index_a_c][(dmode - 1) as usize]); - loop_filter_chromanormal_v(dst, off + 4, stride, alpha_c, beta_c, tc0); - } - - let qtop = quants[2]; - let index_a_c = get_lf_idx(q, qtop, alpha_off); - let alpha_c = ALPHA[index_a_c]; - let beta_c = BETA[get_lf_idx(q, qtop, beta_off)]; - for i in 0..2 { - let dmode = dmodes[i * 2] >> 4; - if dmode == 4 { - loop_filter_chromaedge_h(dst, off + i * 4, stride, alpha_c, beta_c); - } else if dmode != 0 { - let tc0 = i16::from(TC0[index_a_c][(dmode - 1) as usize]); - loop_filter_chromanormal_h(dst, off + i * 4, stride, alpha_c, beta_c, tc0); - } - } -} - -pub fn loop_filter_row(frm: &mut NASimpleVideoFrame, sstate: &SliceState, alpha_off: i8, beta_off: i8) { - let mut db_idx = sstate.deblock.xpos - sstate.deblock.stride; - let mut yoff = frm.offset[0] + sstate.mb_y * 16 * frm.stride[0]; - let mut uoff = frm.offset[1] + sstate.mb_y * 8 * frm.stride[1]; - let mut voff = frm.offset[2] + sstate.mb_y * 8 * frm.stride[2]; - let mut tlq = [0; 3]; - let mut lq = [0; 3]; - let mut mb_idx = sstate.mb.xpos; - for _mb_x in 0..sstate.mb_w { - let mut tqy = sstate.mb.data[mb_idx - sstate.mb.stride].qp_y; - let tqu = sstate.mb.data[mb_idx - sstate.mb.stride].qp_u; - let tqv = sstate.mb.data[mb_idx - sstate.mb.stride].qp_v; - if sstate.mb_y > 0 { - let dmodes = [sstate.deblock.data[db_idx], - sstate.deblock.data[db_idx + 1], - sstate.deblock.data[db_idx + 2], - sstate.deblock.data[db_idx + 3]]; - - filter_mb_row4_y(frm.data, yoff - frm.stride[0] * 4, frm.stride[0], dmodes, [tqy, tlq[0], tqy], alpha_off, beta_off); - filter_mb_row4_c(frm.data, uoff - frm.stride[1] * 4, frm.stride[1], dmodes, [tqu, tlq[1], tqu], alpha_off, beta_off); - filter_mb_row4_c(frm.data, voff - frm.stride[2] * 4, frm.stride[2], dmodes, [tqv, tlq[2], tqv], alpha_off, beta_off); - - tlq = [tqy, tqu, tqv]; - } - - let qy = sstate.mb.data[mb_idx].qp_y; - let qu = sstate.mb.data[mb_idx].qp_u; - let qv = sstate.mb.data[mb_idx].qp_v; - - for y in 0..3 { - db_idx += sstate.deblock.stride; - let dmodes = [sstate.deblock.data[db_idx], - sstate.deblock.data[db_idx + 1], - sstate.deblock.data[db_idx + 2], - sstate.deblock.data[db_idx + 3]]; - - filter_mb_row4_y(frm.data, yoff + frm.stride[0] * 4 * y, frm.stride[0], dmodes, [qy, lq[0], tqy], alpha_off, beta_off); - if y == 0 { - filter_mb_row4_c(frm.data, uoff + frm.stride[1] * 2 * y, frm.stride[1], dmodes, [qu, lq[1], tqu], alpha_off, beta_off); - filter_mb_row4_c(frm.data, voff + frm.stride[2] * 2 * y, frm.stride[2], dmodes, [qv, lq[2], tqv], alpha_off, beta_off); - } - tqy = qy; - } - db_idx -= sstate.deblock.stride * 3; - lq = [qy, qu, qv]; - - mb_idx += 1; - db_idx += 4; - yoff += 16; - uoff += 8; - voff += 8; - } -} -pub fn loop_filter_last(frm: &mut NASimpleVideoFrame, sstate: &SliceState, alpha_off: i8, beta_off: i8) { - let mut db_idx = sstate.deblock.xpos + 3 * sstate.deblock.stride; - let mut yoff = frm.offset[0] + (sstate.mb_y * 16 + 12) * frm.stride[0]; - let mut uoff = frm.offset[1] + (sstate.mb_y * 8 + 4) * frm.stride[1]; - let mut voff = frm.offset[2] + (sstate.mb_y * 8 + 4) * frm.stride[2]; - - let mut lq = [0; 3]; - let mut mb_idx = sstate.mb.xpos; - if sstate.mb_y != 0 && sstate.mb_x == 0 { - db_idx -= 4 * sstate.deblock.stride; - mb_idx -= sstate.mb.stride; - yoff -= 16 * frm.stride[0]; - uoff -= 8 * frm.stride[1]; - voff -= 8 * frm.stride[2]; - } - for _mb_x in 0..sstate.mb_w { - let qy = sstate.mb.data[mb_idx].qp_y; - let qu = sstate.mb.data[mb_idx].qp_u; - let qv = sstate.mb.data[mb_idx].qp_v; - - let dmodes = [sstate.deblock.data[db_idx], - sstate.deblock.data[db_idx + 1], - sstate.deblock.data[db_idx + 2], - sstate.deblock.data[db_idx + 3]]; - - filter_mb_row4_y(frm.data, yoff, frm.stride[0], dmodes, [qy, lq[0], qy], alpha_off, beta_off); - filter_mb_row4_c(frm.data, uoff, frm.stride[1], dmodes, [qu, lq[1], qu], alpha_off, beta_off); - filter_mb_row4_c(frm.data, voff, frm.stride[2], dmodes, [qv, lq[2], qv], alpha_off, beta_off); - - lq = [qy, qu, qv]; - mb_idx += 1; - db_idx += 4; - yoff += 16; - uoff += 8; - voff += 8; - } -} - +use nihav_core::frame::NASimpleVideoFrame; +use super::types::SliceState; +use super::dsp::*; + +const ALPHA: [i16; 52] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28, + 32, 36, 40, 45, 50, 56, 63, 71, 80, 90, 100, 113, 127, 144, 162, 182, + 203, 226, 255, 255 +]; +const BETA: [i16; 52] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, + 17, 17, 18, 18 +]; + +const TC0: [[u8; 3]; 52] = [ + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], [ 0, 0, 0], + [ 0, 0, 0], [ 0, 0, 1], [ 0, 0, 1], [ 0, 0, 1], + [ 0, 0, 1], [ 0, 1, 1], [ 0, 1, 1], [ 1, 1, 1], + [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 1], [ 1, 1, 2], + [ 1, 1, 2], [ 1, 1, 2], [ 1, 1, 2], [ 1, 2, 3], + [ 1, 2, 3], [ 2, 2, 3], [ 2, 2, 4], [ 2, 3, 4], + [ 2, 3, 4], [ 3, 3, 5], [ 3, 4, 6], [ 3, 4, 6], + [ 4, 5, 7], [ 4, 5, 8], [ 4, 6, 9], [ 5, 7, 10], + [ 6, 8, 11], [ 6, 8, 13], [ 7, 10, 14], [ 8, 11, 16], + [ 9, 12, 18], [10, 13, 20], [11, 15, 23], [13, 17, 25] +]; + +fn get_lf_idx(qp0: u8, qp1: u8, off: i8) -> usize { + (i16::from((qp0 + qp1 + 1) >> 1) + i16::from(off)).max(0).min(51) as usize +} + +macro_rules! filter_edge_func { + ($funcname: ident, $edgefilter: ident, $normfilter: ident) => { + fn $funcname(dst: &mut [u8], off: usize, stride: usize, dmode: u8, quants: [u8; 2], alpha_off: i8, beta_off: i8) { + let q = quants[0]; + let qleft = quants[1]; + if dmode != 0 { + let index_a = get_lf_idx(q, qleft, alpha_off); + let alpha = ALPHA[index_a]; + let beta = BETA[get_lf_idx(q, qleft, beta_off)]; + if dmode == 4 { + $edgefilter(dst, off, stride, alpha, beta); + } else { + let tc0 = i16::from(TC0[index_a][(dmode - 1) as usize]); + $normfilter(dst, off, stride, alpha, beta, tc0); + } + } + } + } +} + +filter_edge_func!(filter_edge_y_v, loop_filter_lumaedge_v, loop_filter_lumanormal_v); +filter_edge_func!(filter_edge_y_h, loop_filter_lumaedge_h, loop_filter_lumanormal_h); +filter_edge_func!(filter_edge_c_v, loop_filter_chromaedge_v, loop_filter_chromanormal_v); +filter_edge_func!(filter_edge_c_h, loop_filter_chromaedge_h, loop_filter_chromanormal_h); + +pub fn loop_filter_mb(frm: &mut NASimpleVideoFrame, sstate: &SliceState, alpha_off: i8, beta_off: i8) { + let yoff = frm.offset[0] + sstate.mb_x * 16 + sstate.mb_y * 16 * frm.stride[0]; + let uoff = frm.offset[1] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[1]; + let voff = frm.offset[2] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[2]; + let mb_idx = sstate.mb.xpos + sstate.mb_x; + + let lqy = sstate.mb.data[mb_idx - 1].qp_y; + let lqu = sstate.mb.data[mb_idx - 1].qp_u; + let lqv = sstate.mb.data[mb_idx - 1].qp_v; + let qy = sstate.mb.data[mb_idx].qp_y; + let qu = sstate.mb.data[mb_idx].qp_u; + let qv = sstate.mb.data[mb_idx].qp_v; + + for (y, dmodes) in sstate.deblock.chunks(4).enumerate() { + filter_edge_y_v(frm.data, yoff + y * 4 * frm.stride[0], frm.stride[0], dmodes[0] & 0xF, [qy, lqy], alpha_off, beta_off); + for x in 1..4 { + filter_edge_y_v(frm.data, yoff + x * 4 + y * 4 * frm.stride[0], frm.stride[0], dmodes[x] & 0xF, [qy, qy], alpha_off, beta_off); + } + filter_edge_c_v(frm.data, uoff + y * 2 * frm.stride[1], frm.stride[1], dmodes[0] & 0xF, [qu, lqu], alpha_off, beta_off); + filter_edge_c_v(frm.data, uoff + y * 2 * frm.stride[1] + 4, frm.stride[1], dmodes[2] & 0xF, [qu, qu], alpha_off, beta_off); + filter_edge_c_v(frm.data, voff + y * 2 * frm.stride[2], frm.stride[2], dmodes[0] & 0xF, [qv, lqv], alpha_off, beta_off); + filter_edge_c_v(frm.data, voff + y * 2 * frm.stride[2] + 4, frm.stride[2], dmodes[2] & 0xF, [qv, qv], alpha_off, beta_off); + } + + let tqy = sstate.mb.data[mb_idx - sstate.mb.stride].qp_y; + let tqu = sstate.mb.data[mb_idx - sstate.mb.stride].qp_u; + let tqv = sstate.mb.data[mb_idx - sstate.mb.stride].qp_v; + + let dmodes = &sstate.deblock; + for x in 0..4 { + filter_edge_y_h(frm.data, yoff + x * 4, frm.stride[0], dmodes[x] >> 4, [qy, tqy], alpha_off, beta_off); + } + for x in 0..4 { + filter_edge_c_h(frm.data, uoff + x * 2, frm.stride[1], dmodes[x] >> 4, [qu, tqu], alpha_off, beta_off); + filter_edge_c_h(frm.data, voff + x * 2, frm.stride[2], dmodes[x] >> 4, [qv, tqv], alpha_off, beta_off); + } + + for (y, dmodes) in sstate.deblock.chunks(4).enumerate().skip(1) { + for x in 0..4 { + filter_edge_y_h(frm.data, yoff + x * 4 + y * 4 * frm.stride[0], frm.stride[0], dmodes[x] >> 4, [qy, qy], alpha_off, beta_off); + } + } + + let dmodes = &sstate.deblock[4 * 2..]; + for x in 0..4 { + filter_edge_c_h(frm.data, uoff + x * 2 + frm.stride[1] * 4, frm.stride[1], dmodes[x] >> 4, [qu, qu], alpha_off, beta_off); + filter_edge_c_h(frm.data, voff + x * 2 + frm.stride[2] * 4, frm.stride[2], dmodes[x] >> 4, [qv, qv], alpha_off, beta_off); + } +} diff --git a/nihav-itu/src/codecs/h264/mod.rs b/nihav-itu/src/codecs/h264/mod.rs index 3034e07..b6f6dd5 100644 --- a/nihav-itu/src/codecs/h264/mod.rs +++ b/nihav-itu/src/codecs/h264/mod.rs @@ -398,15 +398,6 @@ println!("PAFF?"); let mut cabac = CABAC::new(csrc, slice_hdr.slice_type, slice_hdr.slice_qp, slice_hdr.cabac_init_idc as usize)?; self.has_pic = self.decode_slice_cabac(&mut cabac, &slice_hdr)?; } - if !self.deblock_skip && self.deblock_mode != 1 { - if let Some(ref mut pic) = self.cur_pic { - let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap(); - if self.sstate.mb_x != 0 { - loop_filter_row(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta); - } - loop_filter_last(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta); - } - } }, 2 => { // slice data partition A //slice header @@ -469,7 +460,7 @@ println!("PAFF?"); } else { IPRED8_DC_LEFT }; - IPRED_FUNCS16X16[id](frm.data, yoff, frm.stride[0]); + IPRED_FUNCS16X16[id](&mut frm.data[yoff..], frm.stride[0], &sstate.top_line_y[sstate.mb_x * 16..], &sstate.left_y); }, MBType::Intra8x8 => { let mut ictx = IPred8Context::new(); @@ -496,7 +487,30 @@ println!("PAFF?"); let noright = (y == 2 || sstate.mb_x == sstate.mb_w - 1 || mb_idx < sstate.mb_start + sstate.mb_w) && (x == 2); let has_tl = (has_top && x > 0) || (has_left && y > 0) || (x == 0 && y == 0 && sstate.mb_x > 0 && mb_idx > sstate.mb_start + sstate.mb_w); if id != IPRED4_DC128 { - ictx.fill(frm.data, cur_yoff, frm.stride[0], has_top, has_top && !noright, has_left, has_tl); + let top = if y == 0 { + &sstate.top_line_y[sstate.mb_x * 16 + x * 4..] + } else { + &frm.data[cur_yoff - frm.stride[0]..] + }; + let mut left_buf = [0; 9]; + let left = if x == 0 { + &sstate.left_y[y * 4..] + } else { + if has_tl { + if y == 0 { + left_buf[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; + } else { + left_buf[0] = frm.data[cur_yoff - 1 - frm.stride[0]]; + } + } + if has_left { + for (dst, src) in left_buf[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { + *dst = src[0]; + } + } + &left_buf + }; + ictx.fill(top, left, has_top, has_top && !noright, has_left, has_tl); } IPRED_FUNCS8X8_LUMA[id](&mut frm.data[cur_yoff..], frm.stride[0], &ictx); if mb_info.coded[blk4] { @@ -524,12 +538,11 @@ println!("PAFF?"); }; let noright = (sstate.mb_x == sstate.mb_w - 1 || sstate.mb_x + sstate.mb_y * sstate.mb_w < sstate.mb_start + sstate.mb_w) && (x == 3); let tr: [u8; 4] = if y == 0 { + let tsrc = &sstate.top_line_y[sstate.mb_x * 16 + x * 4..]; if has_top && !noright { - let i = cur_yoff - frm.stride[0]; - [frm.data[i + 4], frm.data[i + 5], frm.data[i + 6], frm.data[i + 7]] + [tsrc[4], tsrc[5], tsrc[6], tsrc[7]] } else if has_top { - let i = cur_yoff - frm.stride[0]; - [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]] + [tsrc[3]; 4] } else { [0; 4] } @@ -540,7 +553,36 @@ println!("PAFF?"); let i = cur_yoff - frm.stride[0]; [frm.data[i + 3], frm.data[i + 3], frm.data[i + 3], frm.data[i + 3]] }; - IPRED_FUNCS4X4[id](frm.data, cur_yoff, frm.stride[0], &tr); + let mut top = [128; 4]; + let mut left = [128; 9]; + if y == 0 { + if has_top { + top.copy_from_slice(&sstate.top_line_y[sstate.mb_x * 16 + x * 4..][..4]); + } + } else { + top.copy_from_slice(&frm.data[cur_yoff - frm.stride[0]..][..4]); + } + if x == 0 { + if has_left { + for (dst, &src) in left.iter_mut().zip(sstate.left_y[y * 4..].iter()) { + *dst = src; + } + } + } else { + if y == 0 { + if x == 0 { + left[0] = sstate.left_y[y * 4]; + } else if has_top { + left[0] = sstate.top_line_y[sstate.mb_x * 16 + x * 4 - 1]; + } + } else { + left[0] = frm.data[cur_yoff - frm.stride[0] - 1]; + } + for (dst, row) in left[1..].iter_mut().zip(frm.data[cur_yoff - 1..].chunks(frm.stride[0])) { + *dst = row[0]; + } + } + IPRED_FUNCS4X4[id](&mut frm.data[cur_yoff..], frm.stride[0], &top, &left, &tr); if mb_info.coded[x + y * 4] { add_coeffs(frm.data, cur_yoff, frm.stride[0], &mb_info.coeffs[x + y * 4]); } @@ -559,7 +601,8 @@ println!("PAFF?"); }; for chroma in 1..3 { let off = frm.offset[chroma] + sstate.mb_x * 8 + sstate.mb_y * 8 * frm.stride[chroma]; - IPRED_FUNCS8X8_CHROMA[id](frm.data, off, frm.stride[chroma]); + let top = &sstate.top_line_c[chroma - 1][sstate.mb_x * 8..]; + IPRED_FUNCS8X8_CHROMA[id](&mut frm.data[off..], frm.stride[chroma], top, &sstate.left_c[chroma - 1]); } } fn add_luma(frm: &mut NASimpleVideoFrame, sstate: &SliceState, mb_info: &CurrentMBInfo) { @@ -927,6 +970,7 @@ MBType::BSkip | MBType::Direct | MBType::B16x16(_) | MBType::B16x8(_, _) | MBTyp }, _ => {}, };*/ + self.sstate.save_ipred_context(&frm); } if let Some(ref mut pic) = self.cur_pic { let mv_info = &mut pic.mv_info; @@ -942,11 +986,11 @@ _ => {}, } mv_info.mbs[mb_pos] = mb; } - self.sstate.fill_deblock(self.deblock_mode, self.is_s); - if !self.deblock_skip && self.sstate.mb_x + 1 == self.sstate.mb_w && self.deblock_mode != 1 { + if !self.deblock_skip && self.deblock_mode != 1 { + self.sstate.fill_deblock(&self.frame_refs, self.deblock_mode, self.is_s); if let Some(ref mut pic) = self.cur_pic { let mut frm = NASimpleVideoFrame::from_video_buf(&mut pic.buf).unwrap(); - loop_filter_row(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta); + loop_filter_mb(&mut frm, &self.sstate, self.lf_alpha, self.lf_beta); } } self.sstate.next_mb(); diff --git a/nihav-itu/src/codecs/h264/pic_ref.rs b/nihav-itu/src/codecs/h264/pic_ref.rs index 17a1b3b..c366b25 100644 --- a/nihav-itu/src/codecs/h264/pic_ref.rs +++ b/nihav-itu/src/codecs/h264/pic_ref.rs @@ -407,6 +407,35 @@ impl FrameRefs { }; [ref0, ref1] } + pub fn cmp_refs(&self, ref1: [PicRef; 2], ref2: [PicRef; 2]) -> bool { + if ref1 != ref2 { + self.cmp_ref(ref1[0], ref2[0], 0) && self.cmp_ref(ref1[1], ref2[1], 1) + } else { + true + } + } + fn cmp_ref(&self, ref1: PicRef, ref2: PicRef, list: u8) -> bool { + if ref1 == ref2 { + true + } else { + let idx0 = ref1.index(); + let idx1 = ref2.index(); + if idx0 == idx1 { + return true; + } + let src = if list == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if idx0 >= src.len() || idx1 >= src.len() { +//panic!("wrong refs"); + return false; + } + if let (Some(ref pic0), Some(ref pic1)) = (&src[idx0], &src[idx1]) { + pic0.full_id == pic1.full_id + } else { +//panic!("missing pics"); + false + } + } + } } fn form_ref_list(ref_list: &mut Vec>, ref_pics: &[PictureInfo], long_term: &[Option], reord_info: &ReorderingInfo, cur_id: u16, pic_num_mask: u16) { diff --git a/nihav-itu/src/codecs/h264/types.rs b/nihav-itu/src/codecs/h264/types.rs index 6b5d010..0d97e02 100644 --- a/nihav-itu/src/codecs/h264/types.rs +++ b/nihav-itu/src/codecs/h264/types.rs @@ -1,3 +1,4 @@ +use nihav_core::frame::NASimpleVideoFrame; use nihav_codec_support::codecs::{MV, ZERO_MV}; use nihav_codec_support::data::GenericCache; use super::FrameRefs; @@ -431,10 +432,15 @@ pub struct SliceState { pub blk8: GenericCache, pub blk4: GenericCache, - pub deblock: GenericCache, + pub deblock: [u8; 16], pub has_top: bool, pub has_left: bool, + + pub top_line_y: Vec, + pub left_y: [u8; 17], // first element is top-left + pub top_line_c: [Vec; 2], + pub left_c: [[u8; 9]; 2], } impl SliceState { @@ -449,10 +455,15 @@ impl SliceState { blk8: GenericCache::new(0, 0, Blk8Data::default()), blk4: GenericCache::new(0, 0, Blk4Data::default()), - deblock: GenericCache::new(0, 0, 0), + deblock: [0; 16], has_top: false, has_left: false, + + top_line_y: Vec::new(), + left_y: [0; 17], + top_line_c: [Vec::new(), Vec::new()], + left_c: [[0; 9]; 2], } } pub fn reset(&mut self, mb_w: usize, mb_h: usize, mb_pos: usize) { @@ -470,42 +481,65 @@ impl SliceState { self.blk8 = GenericCache::new(2, mb_w * 2 + 2, Blk8Data::default()); self.blk4 = GenericCache::new(4, mb_w * 4 + 2, Blk4Data::default()); - self.deblock = GenericCache::new(4, mb_w * 4 + 1, 0); - self.has_top = false; self.has_left = false; + + self.top_line_y.resize(mb_w * 16 + 1, 0x80); + self.top_line_c[0].resize(mb_w * 8 + 1, 0x80); + self.top_line_c[1].resize(mb_w * 8 + 1, 0x80); + self.left_y = [0x80; 17]; + self.left_c = [[0x80; 9]; 2]; + } + pub fn save_ipred_context(&mut self, frm: &NASimpleVideoFrame) { + let dstoff = self.mb_x * 16; + let srcoff = frm.offset[0] + self.mb_x * 16 + self.mb_y * 16 * frm.stride[0]; + self.left_y[0] = self.top_line_y[dstoff + 15]; + self.top_line_y[dstoff..][..16].copy_from_slice(&frm.data[srcoff + frm.stride[0] * 15..][..16]); + for (dst, src) in self.left_y[1..].iter_mut().zip(frm.data[srcoff..].chunks(frm.stride[0])) { + *dst = src[15]; + } + for chroma in 0..2 { + let cstride = frm.stride[chroma + 1]; + let dstoff = self.mb_x * 8; + let srcoff = frm.offset[chroma + 1] + self.mb_x * 8 + self.mb_y * 8 * cstride; + self.left_c[chroma][0] = self.top_line_c[chroma][dstoff + 7]; + self.top_line_c[chroma][dstoff..][..8].copy_from_slice(&frm.data[srcoff + cstride * 7..][..8]); + for (dst, src) in self.left_c[chroma][1..].iter_mut().zip(frm.data[srcoff..].chunks(cstride)) { + *dst = src[7]; + } + } } - pub fn fill_deblock(&mut self, deblock_mode: u8, is_s: bool) { + pub fn fill_deblock(&mut self, frefs: &FrameRefs, deblock_mode: u8, is_s: bool) { if deblock_mode == 1 { return; } + self.deblock = [0; 16]; + let tx8x8 = self.get_cur_mb().transform_8x8; - let mut idx = self.deblock.xpos + self.mb_x * 4; let cur_mbt = self.get_cur_mb().mb_type; let left_mbt = self.get_left_mb().mb_type; let mut top_mbt = self.get_top_mb().mb_type; for y in 0..4 { - if tx8x8 && (y & 1) != 0 { - continue; - } let can_do_top = y != 0 || (self.mb_y != 0 && (self.has_top || deblock_mode != 2)); - if can_do_top { + if can_do_top && (!tx8x8 || (y & 1) == 0) { if is_s || cur_mbt.is_intra() || top_mbt.is_intra() { let val = if y == 0 { 0x40 } else { 0x30 }; - for el in self.deblock.data[idx..][..4].iter_mut() { *el |= val; } + for el in self.deblock[y * 4..][..4].iter_mut() { *el |= val; } } else { for x in 0..4 { - if self.get_cur_blk4(x).ncoded != 0 || self.get_top_blk4(x).ncoded != 0 { - self.deblock.data[idx + x] |= 0x20; + let blk4 = x + y * 4; + let blk8 = x / 2 + (y / 2) * 2; + if self.get_cur_blk4(blk4).ncoded != 0 || self.get_top_blk4(blk4).ncoded != 0 { + self.deblock[y * 4 + x] |= 0x20; } else { - let cur_mv = self.get_cur_blk4(x).mv; - let top_mv = self.get_top_blk4(x).mv; - let cur_ref = self.get_cur_blk8(x / 2).ref_idx; - let top_ref = self.get_top_blk8(x / 2).ref_idx; - if mvdiff4(cur_mv[0], top_mv[0]) || mvdiff4(cur_mv[1], top_mv[1]) || cur_ref != top_ref { - self.deblock.data[idx + x] |= 0x10; + let cur_mv = self.get_cur_blk4(blk4).mv; + let top_mv = self.get_top_blk4(blk4).mv; + let cur_ref = self.get_cur_blk8(blk8).ref_idx; + let top_ref = if (y & 1) == 0 { self.get_top_blk8(blk8).ref_idx } else { cur_ref }; + if mvdiff4(cur_mv[0], top_mv[0]) || mvdiff4(cur_mv[1], top_mv[1]) || !frefs.cmp_refs(cur_ref, top_ref) { + self.deblock[y * 4 + x] |= 0x10; } } } @@ -513,32 +547,30 @@ impl SliceState { } let mut lleft_mbt = left_mbt; for x in 0..4 { - if tx8x8 && (x & 1) != 0 { - continue; - } + let skip_8 = tx8x8 && (x & 1) != 0; let can_do_left = x > 0 || self.has_left || (self.mb_x != 0 && deblock_mode != 2); if !can_do_left { continue; } let blk4 = x + y * 4; let blk8 = x / 2 + (y / 2) * 2; - if is_s || cur_mbt.is_intra() || lleft_mbt.is_intra() { - self.deblock.data[idx + x] |= if x == 0 { 4 } else { 3 }; - } else if self.get_cur_blk4(blk4).ncoded != 0 || self.get_top_blk4(blk4).ncoded != 0 { - self.deblock.data[idx + x] |= 2; + if skip_8 { + } else if is_s || cur_mbt.is_intra() || lleft_mbt.is_intra() { + self.deblock[y * 4 + x] |= if x == 0 { 4 } else { 3 }; + } else if self.get_cur_blk4(blk4).ncoded != 0 || self.get_left_blk4(blk4).ncoded != 0 { + self.deblock[y * 4 + x] |= 2; } else { let cur_mv = self.get_cur_blk4(blk4).mv; let left_mv = self.get_left_blk4(blk4).mv; let cur_ref = self.get_cur_blk8(blk8).ref_idx; - let left_ref = self.get_left_blk8(blk8).ref_idx; - if mvdiff4(cur_mv[0], left_mv[0]) || mvdiff4(cur_mv[1], left_mv[1]) || cur_ref != left_ref { - self.deblock.data[idx + x] |= 1; + let left_ref = if (x & 1) == 0 { self.get_left_blk8(blk8).ref_idx } else { cur_ref }; + if mvdiff4(cur_mv[0], left_mv[0]) || mvdiff4(cur_mv[1], left_mv[1]) || !frefs.cmp_refs(cur_ref, left_ref) { + self.deblock[y * 4 + x] |= 1; } } lleft_mbt = cur_mbt; } top_mbt = cur_mbt; - idx += self.deblock.stride; } } pub fn next_mb(&mut self) { @@ -551,8 +583,6 @@ impl SliceState { self.blk8.update_row(); self.blk4.update_row(); - self.deblock.update_row(); - self.has_left = false; } self.has_top = self.mb_x + self.mb_y * self.mb_w >= self.mb_start + self.mb_w;