| 1 | use nihav_core::frame::NAVideoBuffer; |
| 2 | use nihav_codec_support::codecs::MV; |
| 3 | use nihav_codec_support::codecs::blockdsp::edge_emu; |
| 4 | use super::clip8; |
| 5 | |
| 6 | pub fn luma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer<u8>, xpos: usize, ypos: usize, mv: MV, is16: bool) { |
| 7 | const RV40_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ]; |
| 8 | const RV40_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ]; |
| 9 | let dx = mv.x >> 2; |
| 10 | let cx = (mv.x & 3) as usize; |
| 11 | let dy = mv.y >> 2; |
| 12 | let cy = (mv.y & 3) as usize; |
| 13 | let mode = cx + cy * 4; |
| 14 | |
| 15 | let (w_, h_) = pic.get_dimensions(0); |
| 16 | let w = (w_ + 15) & !15; |
| 17 | let h = (h_ + 15) & !15; |
| 18 | let (bsize, mc_func) = if is16 { (16, LUMA_MC_16[mode]) } else { (8, LUMA_MC_8[mode]) }; |
| 19 | |
| 20 | if check_pos(xpos, ypos, bsize, w, h, dx, dy, RV40_EDGE1[cx], RV40_EDGE2[cx], RV40_EDGE1[cy], RV40_EDGE2[cy]) { |
| 21 | let sstride = pic.get_stride(0); |
| 22 | let mut soffset = pic.get_offset(0) + xpos + ypos * sstride; |
| 23 | let data = pic.get_data(); |
| 24 | let src: &[u8] = data.as_slice(); |
| 25 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; |
| 26 | (mc_func)(dst, dstride, src, soffset, sstride); |
| 27 | } else { |
| 28 | let mut ebuf = [0u8; 32 * 22]; |
| 29 | edge_emu(pic, (xpos as isize) + (dx as isize) - 2, (ypos as isize) + (dy as isize) - 2, 16+5, 16+5, &mut ebuf, 32, 0, 4); |
| 30 | (mc_func)(dst, dstride, &ebuf, 32 * 2 + 2, 32); |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | pub fn chroma_mc(dst: &mut [u8], dstride: usize, pic: &NAVideoBuffer<u8>, xpos: usize, ypos: usize, comp: usize, mv: MV, is16: bool) { |
| 35 | let mvx = mv.x / 2; |
| 36 | let mvy = mv.y / 2; |
| 37 | let dx = mvx >> 2; |
| 38 | let mut cx = (mvx & 3) as usize; |
| 39 | let dy = mvy >> 2; |
| 40 | let mut cy = (mvy & 3) as usize; |
| 41 | |
| 42 | if (cx == 3) && (cy == 3) { |
| 43 | cx = 2; |
| 44 | cy = 2; |
| 45 | } |
| 46 | |
| 47 | let (w_, h_) = pic.get_dimensions(0); |
| 48 | let w = ((w_ + 15) & !15) >> 1; |
| 49 | let h = ((h_ + 15) & !15) >> 1; |
| 50 | let bsize = if is16 { 8 } else { 4 }; |
| 51 | |
| 52 | if check_pos(xpos, ypos, bsize, w, h, dx, dy, 0, 1, 0, 1) { |
| 53 | let sstride = pic.get_stride(comp); |
| 54 | let mut soffset = pic.get_offset(comp) + xpos + ypos * sstride; |
| 55 | let data = pic.get_data(); |
| 56 | let src: &[u8] = data.as_slice(); |
| 57 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; |
| 58 | rv40_chroma_mc(dst, dstride, src, soffset, sstride, bsize, cx, cy); |
| 59 | } else { |
| 60 | let mut ebuf = [0u8; 16 * 10]; |
| 61 | edge_emu(pic, (xpos as isize) + (dx as isize), (ypos as isize) + (dy as isize), bsize + 1, bsize + 1, &mut ebuf, 16, comp, 4); |
| 62 | rv40_chroma_mc(dst, dstride, &ebuf, 0, 16, bsize, cx, cy); |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | fn check_pos(x: usize, y: usize, size: usize, width: usize, height: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool { |
| 67 | let xn = (x as isize) + (dx as isize); |
| 68 | let yn = (y as isize) + (dy as isize); |
| 69 | |
| 70 | (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (width as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (height as isize)) |
| 71 | } |
| 72 | |
| 73 | type MCFunc = fn (&mut [u8], usize, &[u8], usize, usize); |
| 74 | |
| 75 | macro_rules! el { |
| 76 | ($s: ident, $o: expr) => ( $s[$o] as i16 ) |
| 77 | } |
| 78 | |
| 79 | macro_rules! filter { |
| 80 | (01; $s: ident, $o: expr, $step: expr) => ( |
| 81 | clip8((( el!($s, $o - 2 * $step) |
| 82 | -5 * el!($s, $o - 1 * $step) |
| 83 | +52 * el!($s, $o - 0 * $step) |
| 84 | +20 * el!($s, $o + 1 * $step) |
| 85 | -5 * el!($s, $o + 2 * $step) |
| 86 | + el!($s, $o + 3 * $step) + 32) >> 6) as i16) |
| 87 | ); |
| 88 | (02; $s: ident, $o: expr, $step: expr) => ( |
| 89 | clip8((( el!($s, $o - 2 * $step) |
| 90 | -5 * el!($s, $o - 1 * $step) |
| 91 | +20 * el!($s, $o - 0 * $step) |
| 92 | +20 * el!($s, $o + 1 * $step) |
| 93 | -5 * el!($s, $o + 2 * $step) |
| 94 | + el!($s, $o + 3 * $step) + 16) >> 5) as i16) |
| 95 | ); |
| 96 | (03; $s: ident, $o: expr, $step: expr) => ( |
| 97 | clip8((( el!($s, $o - 2 * $step) |
| 98 | -5 * el!($s, $o - 1 * $step) |
| 99 | +20 * el!($s, $o - 0 * $step) |
| 100 | +52 * el!($s, $o + 1 * $step) |
| 101 | -5 * el!($s, $o + 2 * $step) |
| 102 | + el!($s, $o + 3 * $step) + 32) >> 6) as i16) |
| 103 | ); |
| 104 | (33; $s: ident, $o: expr, $stride: expr) => ( |
| 105 | clip8((( el!($s, $o) |
| 106 | + el!($s, $o + 1) |
| 107 | + el!($s, $o + $stride) |
| 108 | + el!($s, $o + 1 + $stride) + 2) >> 2) as i16) |
| 109 | ); |
| 110 | } |
| 111 | |
| 112 | macro_rules! mc_func { |
| 113 | (copy; $name: ident, $size: expr) => ( |
| 114 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], sidx: usize, sstride: usize) { |
| 115 | for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take($size) { |
| 116 | dline[..$size].copy_from_slice(&sline[..$size]); |
| 117 | } |
| 118 | } |
| 119 | ); |
| 120 | (mc01; $name: ident, $size: expr, $ver: expr) => ( |
| 121 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 122 | let step = if $ver { sstride } else { 1 }; |
| 123 | for dline in dst.chunks_mut(dstride).take($size) { |
| 124 | for (x, el) in dline[..$size].iter_mut().enumerate() { |
| 125 | *el = filter!(01; src, sidx + x, step); |
| 126 | } |
| 127 | sidx += sstride; |
| 128 | } |
| 129 | } |
| 130 | ); |
| 131 | (mc02; $name: ident, $size: expr, $ver: expr) => ( |
| 132 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 133 | let step = if $ver { sstride } else { 1 }; |
| 134 | for dline in dst.chunks_mut(dstride).take($size) { |
| 135 | for (x, el) in dline[..$size].iter_mut().enumerate() { |
| 136 | *el = filter!(02; src, sidx + x, step); |
| 137 | } |
| 138 | sidx += sstride; |
| 139 | } |
| 140 | } |
| 141 | ); |
| 142 | (mc03; $name: ident, $size: expr, $ver: expr) => ( |
| 143 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 144 | let step = if $ver { sstride } else { 1 }; |
| 145 | for dline in dst.chunks_mut(dstride).take($size) { |
| 146 | for (x, el) in dline[..$size].iter_mut().enumerate() { |
| 147 | *el = filter!(03; src, sidx + x, step); |
| 148 | } |
| 149 | sidx += sstride; |
| 150 | } |
| 151 | } |
| 152 | ); |
| 153 | (cm01; $name: ident, $size: expr, $ofilt: ident) => ( |
| 154 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 155 | let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; |
| 156 | let mut bidx = 0; |
| 157 | let bstride = $size; |
| 158 | sidx -= sstride * 2; |
| 159 | for _ in 0..$size+5 { |
| 160 | for x in 0..$size { buf[bidx + x] = filter!(01; src, sidx + x, 1); } |
| 161 | bidx += bstride; |
| 162 | sidx += sstride; |
| 163 | } |
| 164 | $ofilt(dst, dstride, &buf, 2*bstride, $size); |
| 165 | } |
| 166 | ); |
| 167 | (cm02; $name: ident, $size: expr, $ofilt: ident) => ( |
| 168 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 169 | let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; |
| 170 | let mut bidx = 0; |
| 171 | let bstride = $size; |
| 172 | sidx -= sstride * 2; |
| 173 | for _ in 0..$size+5 { |
| 174 | for x in 0..$size { buf[bidx + x] = filter!(02; src, sidx + x, 1); } |
| 175 | bidx += bstride; |
| 176 | sidx += sstride; |
| 177 | } |
| 178 | $ofilt(dst, dstride, &buf, 2*bstride, $size); |
| 179 | } |
| 180 | ); |
| 181 | (cm03; $name: ident, $size: expr, $ofilt: ident) => ( |
| 182 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 183 | let mut buf: [u8; ($size + 5) * $size] = [0; ($size + 5) * $size]; |
| 184 | let mut bidx = 0; |
| 185 | let bstride = $size; |
| 186 | sidx -= sstride * 2; |
| 187 | for _ in 0..$size+5 { |
| 188 | for x in 0..$size { buf[bidx + x] = filter!(03; src, sidx + x, 1); } |
| 189 | bidx += bstride; |
| 190 | sidx += sstride; |
| 191 | } |
| 192 | $ofilt(dst, dstride, &buf, 2*bstride, $size); |
| 193 | } |
| 194 | ); |
| 195 | (mc33; $name: ident, $size: expr) => ( |
| 196 | fn $name (dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { |
| 197 | for dline in dst.chunks_mut(dstride).take($size) { |
| 198 | for (x, el) in dline[..$size].iter_mut().enumerate() { |
| 199 | *el = filter!(33; src, sidx + x, sstride); |
| 200 | } |
| 201 | sidx += sstride; |
| 202 | } |
| 203 | } |
| 204 | ); |
| 205 | } |
| 206 | mc_func!(copy; copy_16, 16); |
| 207 | mc_func!(copy; copy_8, 8); |
| 208 | mc_func!(mc01; luma_mc_10_16, 16, false); |
| 209 | mc_func!(mc01; luma_mc_10_8, 8, false); |
| 210 | mc_func!(mc02; luma_mc_20_16, 16, false); |
| 211 | mc_func!(mc02; luma_mc_20_8, 8, false); |
| 212 | mc_func!(mc03; luma_mc_30_16, 16, false); |
| 213 | mc_func!(mc03; luma_mc_30_8, 8, false); |
| 214 | mc_func!(mc01; luma_mc_01_16, 16, true); |
| 215 | mc_func!(mc01; luma_mc_01_8, 8, true); |
| 216 | mc_func!(mc02; luma_mc_02_16, 16, true); |
| 217 | mc_func!(mc02; luma_mc_02_8, 8, true); |
| 218 | mc_func!(mc03; luma_mc_03_16, 16, true); |
| 219 | mc_func!(mc03; luma_mc_03_8, 8, true); |
| 220 | mc_func!(cm01; luma_mc_11_16, 16, luma_mc_01_16); |
| 221 | mc_func!(cm01; luma_mc_11_8, 8, luma_mc_01_8); |
| 222 | mc_func!(cm01; luma_mc_12_16, 16, luma_mc_02_16); |
| 223 | mc_func!(cm01; luma_mc_12_8, 8, luma_mc_02_8); |
| 224 | mc_func!(cm01; luma_mc_13_16, 16, luma_mc_03_16); |
| 225 | mc_func!(cm01; luma_mc_13_8, 8, luma_mc_03_8); |
| 226 | mc_func!(cm02; luma_mc_21_16, 16, luma_mc_01_16); |
| 227 | mc_func!(cm02; luma_mc_21_8, 8, luma_mc_01_8); |
| 228 | mc_func!(cm02; luma_mc_22_16, 16, luma_mc_02_16); |
| 229 | mc_func!(cm02; luma_mc_22_8, 8, luma_mc_02_8); |
| 230 | mc_func!(cm02; luma_mc_23_16, 16, luma_mc_03_16); |
| 231 | mc_func!(cm02; luma_mc_23_8, 8, luma_mc_03_8); |
| 232 | mc_func!(cm03; luma_mc_31_16, 16, luma_mc_01_16); |
| 233 | mc_func!(cm03; luma_mc_31_8, 8, luma_mc_01_8); |
| 234 | mc_func!(cm03; luma_mc_32_16, 16, luma_mc_02_16); |
| 235 | mc_func!(cm03; luma_mc_32_8, 8, luma_mc_02_8); |
| 236 | mc_func!(mc33; luma_mc_33_16, 16); |
| 237 | mc_func!(mc33; luma_mc_33_8, 8); |
| 238 | |
| 239 | const LUMA_MC_16: [MCFunc; 16] = [ |
| 240 | copy_16, luma_mc_10_16, luma_mc_20_16, luma_mc_30_16, |
| 241 | luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, luma_mc_31_16, |
| 242 | luma_mc_02_16, luma_mc_12_16, luma_mc_22_16, luma_mc_32_16, |
| 243 | luma_mc_03_16, luma_mc_13_16, luma_mc_23_16, luma_mc_33_16 |
| 244 | ]; |
| 245 | const LUMA_MC_8: [MCFunc; 16] = [ |
| 246 | copy_8, luma_mc_10_8, luma_mc_20_8, luma_mc_30_8, |
| 247 | luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, luma_mc_31_8, |
| 248 | luma_mc_02_8, luma_mc_12_8, luma_mc_22_8, luma_mc_32_8, |
| 249 | luma_mc_03_8, luma_mc_13_8, luma_mc_23_8, luma_mc_33_8 |
| 250 | ]; |
| 251 | |
| 252 | #[allow(clippy::many_single_char_names)] |
| 253 | fn rv40_chroma_mc(dst: &mut [u8], dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) { |
| 254 | const RV40_CHROMA_BIAS: [[u16; 4]; 4] = [ |
| 255 | [ 0, 4, 8, 4 ], |
| 256 | [ 8, 7, 8, 7 ], |
| 257 | [ 0, 8, 4, 8 ], |
| 258 | [ 8, 7, 8, 7 ] |
| 259 | ]; |
| 260 | |
| 261 | if (x == 0) && (y == 0) { |
| 262 | for (dline, sline) in dst.chunks_mut(dstride).zip(src[sidx..].chunks(sstride)).take(size) { |
| 263 | dline[..size].copy_from_slice(&sline[..size]); |
| 264 | } |
| 265 | return; |
| 266 | } |
| 267 | let bias = RV40_CHROMA_BIAS[y >> 1][x >> 1]; |
| 268 | if (x > 0) && (y > 0) { |
| 269 | let a = ((4 - x) * (4 - y)) as u16; |
| 270 | let b = (( x) * (4 - y)) as u16; |
| 271 | let c = ((4 - x) * ( y)) as u16; |
| 272 | let d = (( x) * ( y)) as u16; |
| 273 | for dline in dst.chunks_mut(dstride).take(size) { |
| 274 | for (x, el) in dline[..size].iter_mut().enumerate() { |
| 275 | *el = ((a * (src[sidx + x] as u16) |
| 276 | + b * (src[sidx + x + 1] as u16) |
| 277 | + c * (src[sidx + x + sstride] as u16) |
| 278 | + d * (src[sidx + x + 1 + sstride] as u16) + bias) >> 4) as u8; |
| 279 | } |
| 280 | sidx += sstride; |
| 281 | } |
| 282 | } else { |
| 283 | let a = ((4 - x) * (4 - y)) as u16; |
| 284 | let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16; |
| 285 | let step = if y > 0 { sstride } else { 1 }; |
| 286 | for dline in dst.chunks_mut(dstride).take(size) { |
| 287 | for (x, el) in dline[..size].iter_mut().enumerate() { |
| 288 | *el = ((a * (src[sidx + x] as u16) |
| 289 | + e * (src[sidx + x + step] as u16) + bias) >> 4) as u8; |
| 290 | } |
| 291 | sidx += sstride; |
| 292 | } |
| 293 | } |
| 294 | } |