X-Git-Url: https://git.nihav.org/?p=nihav.git;a=blobdiff_plain;f=nihav-realmedia%2Fsrc%2Fcodecs%2Frv40enc%2Fdsp%2Fmc.rs;fp=nihav-realmedia%2Fsrc%2Fcodecs%2Frv40enc%2Fdsp%2Fmc.rs;h=b7668de8fd59e024c3cee337124f32568f0000e0;hp=0000000000000000000000000000000000000000;hb=4965a5e560c5e194c5b5163c591fcade5f56c3f0;hpb=6bd5b458d9889f092abe9b582bd531ed08a8dc51 diff --git a/nihav-realmedia/src/codecs/rv40enc/dsp/mc.rs b/nihav-realmedia/src/codecs/rv40enc/dsp/mc.rs new file mode 100644 index 0000000..b7668de --- /dev/null +++ b/nihav-realmedia/src/codecs/rv40enc/dsp/mc.rs @@ -0,0 +1,294 @@ +use nihav_core::frame::NAVideoBuffer; +use nihav_codec_support::codecs::MV; +use nihav_codec_support::codecs::blockdsp::edge_emu; +use super::clip8; + +pub fn luma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer, xpos: usize, ypos: usize, mv: MV, is16: bool) { + const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ]; + const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ]; + let dx = mv.x >> 2; + let cx = (mv.x & 3) as usize; + let dy = mv.y >> 2; + let cy = (mv.y & 3) as usize; + let mode = cx + cy * 4; + + let (w_, h_) = pic.get_dimensions(0); + let w = (w_ + 15) & !15; + let h = (h_ + 15) & !15; + let (bsize, mc_func) = if is16 { (16, LUMA_MC_16[mode]) } else { (8, LUMA_MC_8[mode]) }; + + if check_pos(xpos, ypos, bsize, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) { + let sstride = pic.get_stride(0); + let mut soffset = pic.get_offset(0) + xpos + ypos * sstride; + let data = pic.get_data(); + let src: &[u8] = data.as_slice(); + soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; + (mc_func)(dst, dstride, src, soffset, sstride); + } else { + let mut ebuf = [0u8; 32 * 22]; + edge_emu(pic, (xpos as isize) + (dx as isize) - 2, (ypos as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4); + (mc_func)(dst, dstride, &ebuf, 32 * 2 + 2, 32); + } +} + +pub fn chroma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer, xpos: usize, ypos: usize, comp: usize, mv: MV, is16: bool) { + let mvx = mv.x / 2; + let mvy = mv.y / 2; + let dx = mvx >> 2; + let mut cx = (mvx & 3) as usize; + let dy = mvy >> 2; + let mut cy = (mvy & 3) as usize; + + if (cx == 3) && (cy == 3) { + cx = 2; + cy = 2; + } + + let (w_, h_) = pic.get_dimensions(0); + let w = ((w_ + 15) & !15) >> 1; + let h = ((h_ + 15) & !15) >> 1; + let bsize = if is16 { 8 } else { 4 }; + + if check_pos(xpos, ypos, bsize, w, h, dx, dy, 0, 1, 0, 1) { + let sstride = pic.get_stride(comp); + let mut soffset = pic.get_offset(comp) + xpos + ypos * sstride; + let data = pic.get_data(); + let src: &[u8] = data.as_slice(); + soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; + rv40_chroma_mc(dst, dstride, src, soffset, sstride, bsize, cx, cy); + } else { + let mut ebuf = [0u8; 16 * 10]; + edge_emu(pic, (xpos as isize) + (dx as isize), (ypos as isize) + (dy as isize), bsize + 1, bsize + 1, &mut ebuf, 16, comp, 4); + rv40_chroma_mc(dst, dstride, &ebuf, 0, 16, bsize, cx, cy); + } +} + +fn check_pos(x: usize, y: usize, size: usize, width: usize, height: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool { + let xn = (x as isize) + (dx as isize); + let yn = (y as isize) + (dy as isize); + + (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (width as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (height as isize)) +} + +type MCFunc = fn (&mut [u8], usize, &[u8], usize, usize); + +macro_rules! el { + ($s: ident, $o: expr) => ( $s[$o] as i16 ) +} + +macro_rules! filter { + (01; $s: ident, $o: expr, $step: expr) => ( + clip8((( el!($s, $o - 2 * $step) + -5 * el!($s, $o - 1 * $step) + +52 * el!($s, $o - 0 * $step) + +20 * el!($s, $o + 1 * $step) + -5 * el!($s, $o + 2 * $step) + + el!($s, $o + 3 * $step) + 32) >> 6) as i16) + ); + (02; $s: ident, $o: expr, $step: expr) => ( + clip8((( el!($s, $o - 2 * $step) + -5 * el!($s, $o - 1 * $step) + +20 * el!($s, $o - 0 * $step) + +20 * el!($s, $o + 1 * $step) + -5 * el!($s, $o + 2 * $step) + + el!($s, $o + 3 * $step) + 16) >> 5) as i16) + ); + (03; $s: ident, $o: expr, $step: expr) => ( + clip8((( el!($s, $o - 2 * $step) + -5 * el!($s, $o - 1 * $step) + +20 * el!($s, $o - 0 * $step) + +52 * el!($s, $o + 1 * $step) + -5 * el!($s, $o + 2 * $step) + + el!($s, $o + 3 * $step) + 32) >> 6) as i16) + ); + (33; $s: ident, $o: expr, $stride: expr) => ( + clip8((( el!($s, $o) + + el!($s, $o + 1) + + el!($s, $o + $stride) + + el!($s, $o + 1 + $stride) + 2) >> 2) as i16) + ); +} + +macro_rules! mc_func { + (copy; $name: ident, $size: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], sidx: usize, sstride: usize) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take($size) { + dline[..$size].copy_from_slice(&sline[..$size]); + } + } + ); + (mc01; $name: ident, $size: expr, $ver: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let step = if $ver { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(01; src, sidx + x, step); + } + sidx += sstride; + } + } + ); + (mc02; $name: ident, $size: expr, $ver: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let step = if $ver { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(02; src, sidx + x, step); + } + sidx += sstride; + } + } + ); + (mc03; $name: ident, $size: expr, $ver: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let step = if $ver { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(03; src, sidx + x, step); + } + sidx += sstride; + } + } + ); + (cm01; $name: ident, $size: expr, $ofilt: ident) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; + let mut bidx = 0; + let bstride = $size; + sidx -= sstride * 2; + for _ in 0..$size+5 { + for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); } + bidx += bstride; + sidx += sstride; + } + $ofilt(dst, dstride, &buf, 2*bstride, $size); + } + ); + (cm02; $name: ident, $size: expr, $ofilt: ident) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; + let mut bidx = 0; + let bstride = $size; + sidx -= sstride * 2; + for _ in 0..$size+5 { + for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); } + bidx += bstride; + sidx += sstride; + } + $ofilt(dst, dstride, &buf, 2*bstride, $size); + } + ); + (cm03; $name: ident, $size: expr, $ofilt: ident) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; + let mut bidx = 0; + let bstride = $size; + sidx -= sstride * 2; + for _ in 0..$size+5 { + for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); } + bidx += bstride; + sidx += sstride; + } + $ofilt(dst, dstride, &buf, 2*bstride, $size); + } + ); + (mc33; $name: ident, $size: expr) => ( + fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { + for dline in dst.chunks_mut(dstride).take($size) { + for (x, el) in dline[..$size].iter_mut().enumerate() { + *el = filter!(33; src, sidx + x, sstride); + } + sidx += sstride; + } + } + ); +} +mc_func!(copy; copy_16, 16); +mc_func!(copy; copy_8, 8); +mc_func!(mc01; luma_mc_10_16, 16, false); +mc_func!(mc01; luma_mc_10_8, 8, false); +mc_func!(mc02; luma_mc_20_16, 16, false); +mc_func!(mc02; luma_mc_20_8, 8, false); +mc_func!(mc03; luma_mc_30_16, 16, false); +mc_func!(mc03; luma_mc_30_8, 8, false); +mc_func!(mc01; luma_mc_01_16, 16, true); +mc_func!(mc01; luma_mc_01_8, 8, true); +mc_func!(mc02; luma_mc_02_16, 16, true); +mc_func!(mc02; luma_mc_02_8, 8, true); +mc_func!(mc03; luma_mc_03_16, 16, true); +mc_func!(mc03; luma_mc_03_8, 8, true); +mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16); +mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8); +mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16); +mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8); +mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16); +mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8); +mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16); +mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8); +mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16); +mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8); +mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16); +mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8); +mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16); +mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8); +mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16); +mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8); +mc_func!(mc33; luma_mc_33_16, 16); +mc_func!(mc33; luma_mc_33_8, 8); + +const LUMA_MC_16: [MCFunc; 16] = [ + copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16, + luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16, + luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16, + luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 +]; +const LUMA_MC_8: [MCFunc; 16] = [ + copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8, + luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8, + luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8, + luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 +]; + +#[allow(clippy::many_single_char_names)] +fn rv40_chroma_mc(dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) { + const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [ + [ 0, 4, 8, 4 ], + [ 8, 7, 8, 7 ], + [ 0, 8, 4, 8 ], + [ 8, 7, 8, 7 ] + ]; + + if (x == 0) && (y == 0) { + for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take(size) { + dline[..size].copy_from_slice(&sline[..size]); + } + return; + } + let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1]; + if (x > 0) && (y > 0) { + let a = ((4 - x) * (4 - y)) as u16; + let b = (( x) * (4 - y)) as u16; + let c = ((4 - x) * ( y)) as u16; + let d = (( x) * ( y)) as u16; + for dline in dst.chunks_mut(dstride).take(size) { + for (x, el) in dline[..size].iter_mut().enumerate() { + *el = ((a * (src[sidx + x] as u16) + + b * (src[sidx + x + 1] as u16) + + c * (src[sidx + x + sstride] as u16) + + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8; + } + sidx += sstride; + } + } else { + let a = ((4 - x) * (4 - y)) as u16; + let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16; + let step = if y > 0 { sstride } else { 1 }; + for dline in dst.chunks_mut(dstride).take(size) { + for (x, el) in dline[..size].iter_mut().enumerate() { + *el = ((a * (src[sidx + x] as u16) + + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8; + } + sidx += sstride; + } + } +}