| 1 | use nihav_core::frame::{NAVideoBuffer, NASimpleVideoFrame}; |
| 2 | use nihav_codec_support::codecs::MV; |
| 3 | use nihav_codec_support::codecs::blockdsp::edge_emu; |
| 4 | |
| 5 | fn clip8(val: i16) -> u8 { val.min(255).max(0) as u8 } |
| 6 | |
| 7 | macro_rules! el { |
| 8 | ($s: ident, $o: expr) => ( $s[$o] as i16 ) |
| 9 | } |
| 10 | |
| 11 | macro_rules! filter { |
| 12 | (01; $s: ident, $o: expr, $step: expr) => ( |
| 13 | clip8((( el!($s, $o - 2 * $step) |
| 14 | -5 * el!($s, $o - 1 * $step) |
| 15 | +52 * el!($s, $o - 0 * $step) |
| 16 | +20 * el!($s, $o + 1 * $step) |
| 17 | -5 * el!($s, $o + 2 * $step) |
| 18 | + el!($s, $o + 3 * $step) + 32) >> 6) as i16) |
| 19 | ); |
| 20 | (02; $s: ident, $o: expr, $step: expr) => ( |
| 21 | clip8((( el!($s, $o - 2 * $step) |
| 22 | -5 * el!($s, $o - 1 * $step) |
| 23 | +20 * el!($s, $o - 0 * $step) |
| 24 | +20 * el!($s, $o + 1 * $step) |
| 25 | -5 * el!($s, $o + 2 * $step) |
| 26 | + el!($s, $o + 3 * $step) + 16) >> 5) as i16) |
| 27 | ); |
| 28 | (03; $s: ident, $o: expr, $step: expr) => ( |
| 29 | clip8((( el!($s, $o - 2 * $step) |
| 30 | -5 * el!($s, $o - 1 * $step) |
| 31 | +20 * el!($s, $o - 0 * $step) |
| 32 | +52 * el!($s, $o + 1 * $step) |
| 33 | -5 * el!($s, $o + 2 * $step) |
| 34 | + el!($s, $o + 3 * $step) + 32) >> 6) as i16) |
| 35 | ); |
| 36 | } |
| 37 | |
| 38 | macro_rules! filter_row { |
| 39 | ($d: ident, $do: expr, $s: ident, $so: expr, $step: expr, $size: expr, $mode: expr) => ({ |
| 40 | match $mode { |
| 41 | 1 => { |
| 42 | for x in 0..$size { |
| 43 | $d[$do + x] = filter!(01; $s, $so + x, $step); |
| 44 | } |
| 45 | }, |
| 46 | 2 => { |
| 47 | for x in 0..$size { |
| 48 | $d[$do + x] = filter!(02; $s, $so + x, $step); |
| 49 | } |
| 50 | }, |
| 51 | 3 => { |
| 52 | for x in 0..$size { |
| 53 | $d[$do + x] = filter!(03; $s, $so + x, $step); |
| 54 | } |
| 55 | }, |
| 56 | _ => {}, |
| 57 | }; |
| 58 | }); |
| 59 | } |
| 60 | |
| 61 | #[allow(clippy::cyclomatic_complexity)] |
| 62 | fn luma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, w: usize, h: usize, cx: usize, cy: usize) { |
| 63 | if (cx == 0) && (cy == 0) { |
| 64 | for _ in 0..h { |
| 65 | dst[didx..][..w].copy_from_slice(&src[sidx..][..w]); |
| 66 | didx += dstride; |
| 67 | sidx += sstride; |
| 68 | } |
| 69 | } else if cy == 0 { |
| 70 | for _ in 0..h { |
| 71 | filter_row!(dst, didx, src, sidx, 1, w, cx); |
| 72 | didx += dstride; |
| 73 | sidx += sstride; |
| 74 | } |
| 75 | } else if cx == 0 { |
| 76 | for _ in 0..h { |
| 77 | filter_row!(dst, didx, src, sidx, sstride, w, cy); |
| 78 | didx += dstride; |
| 79 | sidx += sstride; |
| 80 | } |
| 81 | } else if (cx != 3) || (cy != 3) { |
| 82 | let mut tmp: [u8; 70 * 64] = [0; 70 * 64]; |
| 83 | for y in 0..h+5 { |
| 84 | filter_row!(tmp, y * 64, src, sidx - sstride * 2, 1, w, cx); |
| 85 | sidx += sstride; |
| 86 | } |
| 87 | for y in 0..h { |
| 88 | filter_row!(dst, didx, tmp, (y + 2) * 64, 64, w, cy); |
| 89 | didx += dstride; |
| 90 | } |
| 91 | } else { |
| 92 | for _ in 0..h { |
| 93 | for x in 0..w { |
| 94 | dst[didx + x] = ((el!(src, sidx + x) + el!(src, sidx + x + 1) + |
| 95 | el!(src, sidx + x + sstride) + el!(src, sidx + x + 1 + sstride) + 2) >> 2) as u8; |
| 96 | } |
| 97 | didx += dstride; |
| 98 | sidx += sstride; |
| 99 | } |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | fn chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, w: usize, h: usize, x: usize, y: usize) { |
| 104 | if (x == 0) && (y == 0) { |
| 105 | for _ in 0..h { |
| 106 | dst[didx..][..w].copy_from_slice(&src[sidx..][..w]); |
| 107 | didx += dstride; |
| 108 | sidx += sstride; |
| 109 | } |
| 110 | return; |
| 111 | } |
| 112 | if (x > 0) && (y > 0) { |
| 113 | let a = ((4 - x) * (4 - y)) as u16; |
| 114 | let b = (( x) * (4 - y)) as u16; |
| 115 | let c = ((4 - x) * ( y)) as u16; |
| 116 | let d = (( x) * ( y)) as u16; |
| 117 | for _ in 0..h { |
| 118 | for x in 0..w { |
| 119 | dst[didx + x] = ((a * (src[sidx + x] as u16) |
| 120 | + b * (src[sidx + x + 1] as u16) |
| 121 | + c * (src[sidx + x + sstride] as u16) |
| 122 | + d * (src[sidx + x + 1 + sstride] as u16) + 8) >> 4) as u8; |
| 123 | } |
| 124 | didx += dstride; |
| 125 | sidx += sstride; |
| 126 | } |
| 127 | } else { |
| 128 | let a = ((4 - x) * (4 - y)) as u16; |
| 129 | let e = (( x) * (4 - y) + (4 - x) * ( y)) as u16; |
| 130 | let step = if y > 0 { sstride } else { 1 }; |
| 131 | for _ in 0..h { |
| 132 | for x in 0..w { |
| 133 | dst[didx + x] = ((a * (src[sidx + x] as u16) |
| 134 | + e * (src[sidx + x + step] as u16) + 8) >> 4) as u8; |
| 135 | } |
| 136 | didx += dstride; |
| 137 | sidx += sstride; |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | fn check_pos(x: usize, y: usize, cw: usize, ch: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool { |
| 143 | let xn = (x as isize) + (dx as isize); |
| 144 | let yn = (y as isize) + (dy as isize); |
| 145 | |
| 146 | (xn - e0 >= 0) && (xn + (cw as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (ch as isize) + e3 <= (h as isize)) |
| 147 | } |
| 148 | |
| 149 | macro_rules! diff{ |
| 150 | ($src: ident, $e1: expr, $e2: expr) => ( |
| 151 | ($src[$e1] as i16) - ($src[$e2] as i16) |
| 152 | ) |
| 153 | } |
| 154 | macro_rules! strength{ |
| 155 | ($el: expr, $lim: expr) => (if $el < $lim { 3 } else { 1 }) |
| 156 | } |
| 157 | fn clip_symm(val: i16, lim: i16) -> i16 { val.max(-lim).min(lim) } |
| 158 | |
| 159 | fn filter_luma_edge(dst: &mut [u8], mut offset: usize, step: usize, stride: usize, mode1: u8, mode2: u8, lim1: i16, lim2: i16) { |
| 160 | let mut diff_q1q0: [i16; 4] = [0; 4]; |
| 161 | let mut diff_p1p0: [i16; 4] = [0; 4]; |
| 162 | for i in 0..4 { |
| 163 | let off = offset + i * stride; |
| 164 | diff_q1q0[i] = diff!(dst, off - 2 * step, off - step); |
| 165 | diff_p1p0[i] = diff!(dst, off + step, off); |
| 166 | } |
| 167 | let str_p = strength!(diff_q1q0[0] + diff_q1q0[1] + diff_q1q0[2] + diff_q1q0[3], lim2); |
| 168 | let str_q = strength!(diff_p1p0[0] + diff_p1p0[1] + diff_p1p0[2] + diff_p1p0[3], lim2); |
| 169 | if str_p + str_q > 2 { |
| 170 | let msum = ((mode1 + mode2 + str_q + str_p) >> 1) as i16; |
| 171 | let (maxprod, weak) = if (str_q == 1) || (str_p == 1) { (512, true) } else { (384, false) }; |
| 172 | for y in 0..4 { |
| 173 | let diff_p0q0 = diff!(dst, offset, offset - step); |
| 174 | if (diff_p0q0 != 0) && (lim1 * diff_p0q0.abs() < maxprod) { |
| 175 | let diff_q1q2 = diff!(dst, offset - 2 * step, offset - 3 * step); |
| 176 | let diff_p1p2 = diff!(dst, offset + step, offset + 2 * step); |
| 177 | let delta = if weak { |
| 178 | clip_symm((diff_p0q0 + 1) >> 1, msum >> 1) |
| 179 | } else { |
| 180 | let diff_strg = (diff!(dst, offset - 2 * step, offset + step) + 4 * diff_p0q0 + 4) >> 3; |
| 181 | clip_symm(diff_strg, msum) |
| 182 | }; |
| 183 | dst[offset - step] = clip8((dst[offset - step] as i16) + delta); |
| 184 | dst[offset] = clip8((dst[offset] as i16) - delta); |
| 185 | if (str_q != 1) && (diff_q1q2.abs() <= (lim1 >> 2)) { |
| 186 | let diff = (diff_q1q0[y] + diff_q1q2 - delta) >> 1; |
| 187 | let delta_q1 = if weak { |
| 188 | clip_symm(diff, (mode1 >> 1) as i16) |
| 189 | } else { |
| 190 | clip_symm(diff, mode1 as i16) |
| 191 | }; |
| 192 | dst[offset - 2 * step] = clip8((dst[offset - 2 * step] as i16) - delta_q1); |
| 193 | } |
| 194 | if (str_p != 1) && (diff_p1p2.abs() <= (lim1 >> 2)) { |
| 195 | let diff = (diff_p1p0[y] + diff_p1p2 + delta) >> 1; |
| 196 | let delta_p1 = if weak { |
| 197 | clip_symm(diff, (mode2 >> 1) as i16) |
| 198 | } else { |
| 199 | clip_symm(diff, mode2 as i16) |
| 200 | }; |
| 201 | dst[offset + step] = clip8((dst[offset + step] as i16) - delta_p1); |
| 202 | } |
| 203 | } |
| 204 | offset += stride; |
| 205 | } |
| 206 | } |
| 207 | } |
| 208 | fn filter_chroma_edge(dst: &mut [u8], mut offset: usize, step: usize, stride: usize, mode1: u8, mode2: u8, lim1: i16, lim2: i16) { |
| 209 | let diff_q = 4 * diff!(dst, offset - 2 * step, offset - step).abs(); |
| 210 | let diff_p = 4 * diff!(dst, offset + step, offset ).abs(); |
| 211 | let str_q = strength!(diff_q, lim2); |
| 212 | let str_p = strength!(diff_p, lim2); |
| 213 | if str_p + str_q > 2 { |
| 214 | let msum = ((mode1 + mode2 + str_q + str_p) >> 1) as i16; |
| 215 | let (maxprod, weak) = if (str_q == 1) || (str_p == 1) { (512, true) } else { (384, false) }; |
| 216 | for _ in 0..2 { |
| 217 | let diff_pq = diff!(dst, offset, offset - step); |
| 218 | if (diff_pq != 0) && (lim1 * diff_pq.abs() < maxprod) { |
| 219 | let delta = if weak { |
| 220 | clip_symm((diff_pq + 1) >> 1, msum >> 1) |
| 221 | } else { |
| 222 | let diff_strg = (diff!(dst, offset - 2 * step, offset + step) + 4 * diff_pq + 4) >> 3; |
| 223 | clip_symm(diff_strg, msum) |
| 224 | }; |
| 225 | dst[offset - step] = clip8((dst[offset - step] as i16) + delta); |
| 226 | dst[offset] = clip8((dst[offset] as i16) - delta); |
| 227 | } |
| 228 | offset += stride; |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | pub struct RV60DeblockParams { |
| 234 | pub deblock_chroma: bool, |
| 235 | pub width: usize, |
| 236 | pub height: usize, |
| 237 | pub dblkstride: usize, |
| 238 | } |
| 239 | |
| 240 | pub struct RV60DSP {} |
| 241 | /*pub fn rv6_transform4x4_dc(coeffs: &mut [i16]) { |
| 242 | let dc = (((coeffs[0] * 13 + 0x10) >> 5) * 13 + 0x10) >> 5; |
| 243 | for el in coeffs.iter_mut().take(16) { |
| 244 | *el = dc; |
| 245 | } |
| 246 | }*/ |
| 247 | |
| 248 | impl RV60DSP { |
| 249 | pub fn new() -> Self { Self{} } |
| 250 | pub fn transform4x4(&self, blk: &mut [i16]) { |
| 251 | let mut tmp: [i32; 4 * 4] = [0; 4 * 4]; |
| 252 | |
| 253 | for i in 0..4 { |
| 254 | let a = blk[i + 0 * 4] as i32; |
| 255 | let b = blk[i + 1 * 4] as i32; |
| 256 | let c = blk[i + 2 * 4] as i32; |
| 257 | let d = blk[i + 3 * 4] as i32; |
| 258 | |
| 259 | let t0 = 13 * (a + c); |
| 260 | let t1 = 13 * (a - c); |
| 261 | let t2 = 7 * b - 17 * d; |
| 262 | let t3 = 7 * d + 17 * b; |
| 263 | tmp[i + 0 * 4] = (t0 + t3 + 0x10) >> 5; |
| 264 | tmp[i + 1 * 4] = (t1 + t2 + 0x10) >> 5; |
| 265 | tmp[i + 2 * 4] = (t1 - t2 + 0x10) >> 5; |
| 266 | tmp[i + 3 * 4] = (t0 - t3 + 0x10) >> 5; |
| 267 | } |
| 268 | for (dst, src) in blk.chunks_mut(4).zip(tmp.chunks(4)) { |
| 269 | let a = src[0]; |
| 270 | let b = src[1]; |
| 271 | let c = src[2]; |
| 272 | let d = src[3]; |
| 273 | |
| 274 | let t0 = 13 * (a + c); |
| 275 | let t1 = 13 * (a - c); |
| 276 | let t2 = 7 * b - 17 * d; |
| 277 | let t3 = 7 * d + 17 * b; |
| 278 | dst[0] = ((t0 + t3 + 0x10) >> 5) as i16; |
| 279 | dst[1] = ((t1 + t2 + 0x10) >> 5) as i16; |
| 280 | dst[2] = ((t1 - t2 + 0x10) >> 5) as i16; |
| 281 | dst[3] = ((t0 - t3 + 0x10) >> 5) as i16; |
| 282 | } |
| 283 | } |
| 284 | /*pub fn transform8x8_dc(&self, blk: &mut [i16]) { |
| 285 | assert!(blk.len() >= 8 * 8); |
| 286 | let dc = (((coeffs[0] * 37 + 0x40) >> 7) * 37 + 0x40) >> 7; |
| 287 | for el in coeffs.iter_mut().take(8 * 8) { |
| 288 | *el = dc; |
| 289 | } |
| 290 | }*/ |
| 291 | pub fn transform8x8(&self, blk: &mut [i16]) { |
| 292 | assert!(blk.len() >= 8 * 8); |
| 293 | let mut tmp: [i32; 8 * 8] = [0; 8 * 8]; |
| 294 | for i in 0..8 { |
| 295 | let s0 = blk[i + 0 * 8] as i32; |
| 296 | let s1 = blk[i + 1 * 8] as i32; |
| 297 | let s2 = blk[i + 2 * 8] as i32; |
| 298 | let s3 = blk[i + 3 * 8] as i32; |
| 299 | let s4 = blk[i + 4 * 8] as i32; |
| 300 | let s5 = blk[i + 5 * 8] as i32; |
| 301 | let s6 = blk[i + 6 * 8] as i32; |
| 302 | let s7 = blk[i + 7 * 8] as i32; |
| 303 | |
| 304 | let t0 = 37 * (s0 + s4); |
| 305 | let t1 = 37 * (s0 - s4); |
| 306 | let t2 = 48 * s2 + 20 * s6; |
| 307 | let t3 = 20 * s2 - 48 * s6; |
| 308 | let t4 = t0 + t2; |
| 309 | let t5 = t0 - t2; |
| 310 | let t6 = t1 + t3; |
| 311 | let t7 = t1 - t3; |
| 312 | let t8 = 51 * s1 + 43 * s3 + 29 * s5 + 10 * s7; |
| 313 | let t9 = 43 * s1 - 10 * s3 - 51 * s5 - 29 * s7; |
| 314 | let ta = 29 * s1 - 51 * s3 + 10 * s5 + 43 * s7; |
| 315 | let tb = 10 * s1 - 29 * s3 + 43 * s5 - 51 * s7; |
| 316 | tmp[i + 0 * 8] = (t4 + t8 + 0x40) >> 7; |
| 317 | tmp[i + 1 * 8] = (t6 + t9 + 0x40) >> 7; |
| 318 | tmp[i + 2 * 8] = (t7 + ta + 0x40) >> 7; |
| 319 | tmp[i + 3 * 8] = (t5 + tb + 0x40) >> 7; |
| 320 | tmp[i + 4 * 8] = (t5 - tb + 0x40) >> 7; |
| 321 | tmp[i + 5 * 8] = (t7 - ta + 0x40) >> 7; |
| 322 | tmp[i + 6 * 8] = (t6 - t9 + 0x40) >> 7; |
| 323 | tmp[i + 7 * 8] = (t4 - t8 + 0x40) >> 7; |
| 324 | } |
| 325 | for (dst, src) in blk.chunks_mut(8).zip(tmp.chunks(8)) { |
| 326 | let s0 = src[0]; |
| 327 | let s1 = src[1]; |
| 328 | let s2 = src[2]; |
| 329 | let s3 = src[3]; |
| 330 | let s4 = src[4]; |
| 331 | let s5 = src[5]; |
| 332 | let s6 = src[6]; |
| 333 | let s7 = src[7]; |
| 334 | |
| 335 | let t0 = 37 * (s0 + s4); |
| 336 | let t1 = 37 * (s0 - s4); |
| 337 | let t2 = 48 * s2 + 20 * s6; |
| 338 | let t3 = 20 * s2 - 48 * s6; |
| 339 | let t4 = t0 + t2; |
| 340 | let t5 = t0 - t2; |
| 341 | let t6 = t1 + t3; |
| 342 | let t7 = t1 - t3; |
| 343 | let t8 = 51 * s1 + 43 * s3 + 29 * s5 + 10 * s7; |
| 344 | let t9 = 43 * s1 - 10 * s3 - 51 * s5 - 29 * s7; |
| 345 | let ta = 29 * s1 - 51 * s3 + 10 * s5 + 43 * s7; |
| 346 | let tb = 10 * s1 - 29 * s3 + 43 * s5 - 51 * s7; |
| 347 | dst[0] = ((t4 + t8 + 0x40) >> 7) as i16; |
| 348 | dst[1] = ((t6 + t9 + 0x40) >> 7) as i16; |
| 349 | dst[2] = ((t7 + ta + 0x40) >> 7) as i16; |
| 350 | dst[3] = ((t5 + tb + 0x40) >> 7) as i16; |
| 351 | dst[4] = ((t5 - tb + 0x40) >> 7) as i16; |
| 352 | dst[5] = ((t7 - ta + 0x40) >> 7) as i16; |
| 353 | dst[6] = ((t6 - t9 + 0x40) >> 7) as i16; |
| 354 | dst[7] = ((t4 - t8 + 0x40) >> 7) as i16; |
| 355 | } |
| 356 | } |
| 357 | /*pub fn transform16x16_dc(&self, blk: &mut [i16; 16 * 16]) { |
| 358 | let dc = (((coeffs[0] * 26 + 0x40) >> 7) * 26 + 0x40) >> 7; |
| 359 | for el in coeffs.iter_mut() { |
| 360 | *el = dc; |
| 361 | } |
| 362 | }*/ |
| 363 | #[allow(non_snake_case)] |
| 364 | fn transform16(blk: &mut [i16; 16 * 16], off: usize, step: usize) { |
| 365 | let src0 = blk[off + 0 * step] as i32; |
| 366 | let src1 = blk[off + 1 * step] as i32; |
| 367 | let src2 = blk[off + 2 * step] as i32; |
| 368 | let src3 = blk[off + 3 * step] as i32; |
| 369 | let src4 = blk[off + 4 * step] as i32; |
| 370 | let src5 = blk[off + 5 * step] as i32; |
| 371 | let src6 = blk[off + 6 * step] as i32; |
| 372 | let src7 = blk[off + 7 * step] as i32; |
| 373 | let src8 = blk[off + 8 * step] as i32; |
| 374 | let src9 = blk[off + 9 * step] as i32; |
| 375 | let srcA = blk[off + 10 * step] as i32; |
| 376 | let srcB = blk[off + 11 * step] as i32; |
| 377 | let srcC = blk[off + 12 * step] as i32; |
| 378 | let srcD = blk[off + 13 * step] as i32; |
| 379 | let srcE = blk[off + 14 * step] as i32; |
| 380 | let srcF = blk[off + 15 * step] as i32; |
| 381 | let t0 = 26 * (src0 + src8); |
| 382 | let t1 = 26 * (src0 - src8); |
| 383 | let t2 = 14 * src4 - 34 * srcC; |
| 384 | let t3 = 34 * src4 + 14 * srcC; |
| 385 | let t4 = t0 + t3; |
| 386 | let t5 = t0 - t3; |
| 387 | let t6 = t1 + t2; |
| 388 | let t7 = t1 - t2; |
| 389 | let tmp00 = 31 * src2 + -7 * src6 + -36 * srcA + -20 * srcE; |
| 390 | let tmp01 = 36 * src2 + 31 * src6 + 20 * srcA + 7 * srcE; |
| 391 | let tmp02 = 20 * src2 + -36 * src6 + 7 * srcA + 31 * srcE; |
| 392 | let tmp03 = 7 * src2 + -20 * src6 + 31 * srcA + -36 * srcE; |
| 393 | let tm0 = t4 + tmp01; |
| 394 | let tm1 = t4 - tmp01; |
| 395 | let tm2 = t5 + tmp03; |
| 396 | let tm3 = t5 - tmp03; |
| 397 | let tm4 = t6 + tmp00; |
| 398 | let tm5 = t6 - tmp00; |
| 399 | let tm6 = t7 + tmp02; |
| 400 | let tm7 = t7 - tmp02; |
| 401 | let tt0 = 37 * src1 + 35 * src3 + 32 * src5 + 28 * src7 + 23 * src9 + 17 * srcB + 11 * srcD + 4 * srcF; |
| 402 | let tt1 = 35 * src1 + 23 * src3 + 4 * src5 + -17 * src7 + -32 * src9 + -37 * srcB + -28 * srcD + -11 * srcF; |
| 403 | let tt2 = 32 * src1 + 4 * src3 + -28 * src5 + -35 * src7 + -11 * src9 + 23 * srcB + 37 * srcD + 17 * srcF; |
| 404 | let tt3 = 28 * src1 + -17 * src3 + -35 * src5 + 4 * src7 + 37 * src9 + 11 * srcB + -32 * srcD + -23 * srcF; |
| 405 | let tt4 = 23 * src1 + -32 * src3 + -11 * src5 + 37 * src7 + -4 * src9 + -35 * srcB + 17 * srcD + 28 * srcF; |
| 406 | let tt5 = 17 * src1 + -37 * src3 + 23 * src5 + 11 * src7 + -35 * src9 + 28 * srcB + 4 * srcD + -32 * srcF; |
| 407 | let tt6 = 11 * src1 + -28 * src3 + 37 * src5 + -32 * src7 + 17 * src9 + 4 * srcB + -23 * srcD + 35 * srcF; |
| 408 | let tt7 = 4 * src1 + -11 * src3 + 17 * src5 + -23 * src7 + 28 * src9 + -32 * srcB + 35 * srcD + -37 * srcF; |
| 409 | blk[off + 0 * step] = ((tm0 + tt0 + 64) >> 7) as i16; |
| 410 | blk[off + 1 * step] = ((tm4 + tt1 + 64) >> 7) as i16; |
| 411 | blk[off + 2 * step] = ((tm6 + tt2 + 64) >> 7) as i16; |
| 412 | blk[off + 3 * step] = ((tm4 + tt3 + 64) >> 7) as i16; |
| 413 | blk[off + 4 * step] = ((tm3 + tt4 + 64) >> 7) as i16; |
| 414 | blk[off + 5 * step] = ((tm7 + tt5 + 64) >> 7) as i16; |
| 415 | blk[off + 6 * step] = ((tm5 + tt6 + 64) >> 7) as i16; |
| 416 | blk[off + 7 * step] = ((tm1 + tt7 + 64) >> 7) as i16; |
| 417 | blk[off + 8 * step] = ((tm1 - tt7 + 64) >> 7) as i16; |
| 418 | blk[off + 9 * step] = ((tm5 - tt6 + 64) >> 7) as i16; |
| 419 | blk[off + 10 * step] = ((tm7 - tt5 + 64) >> 7) as i16; |
| 420 | blk[off + 11 * step] = ((tm3 - tt4 + 64) >> 7) as i16; |
| 421 | blk[off + 12 * step] = ((tm2 - tt3 + 64) >> 7) as i16; |
| 422 | blk[off + 13 * step] = ((tm6 - tt2 + 64) >> 7) as i16; |
| 423 | blk[off + 14 * step] = ((tm4 - tt1 + 64) >> 7) as i16; |
| 424 | blk[off + 15 * step] = ((tm0 - tt0 + 64) >> 7) as i16; |
| 425 | } |
| 426 | pub fn transform16x16(&self, blk: &mut [i16; 16 * 16]) { |
| 427 | for i in 0..16 { |
| 428 | Self::transform16(blk, i, 16); |
| 429 | } |
| 430 | for i in 0..16 { |
| 431 | Self::transform16(blk, i * 16, 1); |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | pub fn add_block(&self, dst: &mut [u8], mut doff: usize, dstride: usize, blk: &[i16], size: usize) { |
| 436 | for y in 0..size { |
| 437 | for x in 0..size { |
| 438 | dst[doff + x] = clip8((dst[doff + x] as i16) + blk[x + y * size]); |
| 439 | } |
| 440 | doff += dstride; |
| 441 | } |
| 442 | } |
| 443 | fn avg(&self, dst: &mut [u8], mut didx: usize, dstride: usize, |
| 444 | src: &[u8], mut sidx: usize, sstride: usize, |
| 445 | w: usize, h: usize) { |
| 446 | for _ in 0..h { |
| 447 | for x in 0..w { |
| 448 | dst[didx + x] = (((dst[didx + x] as u16) + (src[sidx + x] as u16)) >> 1) as u8; |
| 449 | } |
| 450 | didx += dstride; |
| 451 | sidx += sstride; |
| 452 | } |
| 453 | } |
| 454 | pub fn do_avg(&self, frame: &mut NASimpleVideoFrame<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, w: usize, h: usize) { |
| 455 | for comp in 0..3 { |
| 456 | let dstride = frame.stride[comp]; |
| 457 | let sstride = prev_frame.get_stride(comp); |
| 458 | let doff = if comp == 0 { x + y * dstride } else { frame.offset[comp] + (x >> 1) + (y >> 1) * dstride }; |
| 459 | let soff = prev_frame.get_offset(comp); |
| 460 | let dst = &mut frame.data; |
| 461 | let sdata = prev_frame.get_data(); |
| 462 | let src: &[u8] = sdata.as_slice(); |
| 463 | |
| 464 | if comp == 0 { |
| 465 | self.avg(dst, doff, dstride, src, soff, sstride, w, h); |
| 466 | } else { |
| 467 | self.avg(dst, doff, dstride, src, soff, sstride, w >> 1, h >> 1); |
| 468 | } |
| 469 | } |
| 470 | } |
| 471 | pub fn do_mc(&self, frame: &mut NASimpleVideoFrame<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, w: usize, h: usize, mv: MV, avg: bool) { |
| 472 | { // luma |
| 473 | let dstride = frame.stride[0]; |
| 474 | let doffset = frame.offset[0] + (if !avg { x + y * dstride } else { 0 }); |
| 475 | let dst = &mut frame.data; |
| 476 | |
| 477 | let (w_, h_) = prev_frame.get_dimensions(0); |
| 478 | let fw = (w_ + 15) & !15; |
| 479 | let fh = (h_ + 15) & !15; |
| 480 | |
| 481 | let dx = mv.x >> 2; |
| 482 | let cx = (mv.x & 3) as usize; |
| 483 | let dy = mv.y >> 2; |
| 484 | let cy = (mv.y & 3) as usize; |
| 485 | |
| 486 | if check_pos(x, y, w, h, fw, fh, dx, dy, RV60_EDGE1[cx], RV60_EDGE2[cx], RV60_EDGE1[cy], RV60_EDGE2[cy]) { |
| 487 | let sstride = prev_frame.get_stride(0); |
| 488 | let mut soffset = prev_frame.get_offset(0) + x + y * sstride; |
| 489 | let data = prev_frame.get_data(); |
| 490 | let src: &[u8] = data.as_slice(); |
| 491 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; |
| 492 | luma_mc(dst, doffset, dstride, src, soffset, sstride, w, h, cx, cy); |
| 493 | } else { |
| 494 | let mut ebuf: [u8; 70*70] = [0; 70*70]; |
| 495 | edge_emu(prev_frame, (x as isize) + (dx as isize) - 2, (y as isize) + (dy as isize) - 2, w+5, h+5, &mut ebuf, 70, 0, 0); |
| 496 | luma_mc(dst, doffset, dstride, &ebuf, 70*2 + 2, 70, w, h, cx, cy); |
| 497 | } |
| 498 | } |
| 499 | let (w_, h_) = prev_frame.get_dimensions(1); |
| 500 | let fw = (w_ + 7) & !7; |
| 501 | let fh = (h_ + 7) & !7; |
| 502 | let mvx = mv.x / 2; |
| 503 | let mvy = mv.y / 2; |
| 504 | let dx = mvx >> 2; |
| 505 | let cx = (mvx & 3) as usize; |
| 506 | let dy = mvy >> 2; |
| 507 | let cy = (mvy & 3) as usize; |
| 508 | let cw = w >> 1; |
| 509 | let ch = h >> 1; |
| 510 | |
| 511 | for comp in 1..3 { // chroma |
| 512 | let dstride = frame.stride[comp]; |
| 513 | let doffset = frame.offset[comp] + (if !avg { (x >> 1) + (y >> 1) * dstride } else { 0 }); |
| 514 | if check_pos(x >> 1, y >> 1, cw, ch, fw, fh, dx, dy, 0, 1, 0, 1) { |
| 515 | let sstride = prev_frame.get_stride(comp); |
| 516 | let mut soffset = prev_frame.get_offset(comp) + (x >> 1) + (y >> 1) * sstride; |
| 517 | let data = prev_frame.get_data(); |
| 518 | let src: &[u8] = data.as_slice(); |
| 519 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; |
| 520 | chroma_mc(frame.data, doffset, dstride, src, soffset, sstride, cw, ch, cx, cy); |
| 521 | } else { |
| 522 | let mut ebuf: [u8; 40*40] = [0; 40*40]; |
| 523 | edge_emu(prev_frame, ((x >> 1) as isize) + (dx as isize), ((y >> 1) as isize) + (dy as isize), cw+1, ch+1, &mut ebuf, 40, comp, 0); |
| 524 | chroma_mc(frame.data, doffset, dstride, &ebuf, 0, 40, cw, ch, cx, cy); |
| 525 | } |
| 526 | } |
| 527 | } |
| 528 | fn deblock_edge4_ver(&self, frame: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, |
| 529 | dblk_l: u8, dblk_r: u8, deblock_chroma: bool) { |
| 530 | let qp_l = dblk_l >> 2; |
| 531 | let str_l = dblk_l & 3; |
| 532 | let qp_r = dblk_r >> 2; |
| 533 | let str_r = dblk_r & 3; |
| 534 | let dl_l = &RV60_DEB_LIMITS[qp_l as usize]; |
| 535 | let dl_r = &RV60_DEB_LIMITS[qp_r as usize]; |
| 536 | let mode_l = if str_l != 0 { dl_l[(str_l - 1) as usize] } else { 0 }; |
| 537 | let mode_r = if str_r != 0 { dl_r[(str_r - 1) as usize] } else { 0 }; |
| 538 | let lim1 = dl_r[2] as i16; |
| 539 | let lim2 = (dl_r[3] * 4) as i16; |
| 540 | { |
| 541 | let stride = frame.stride[0]; |
| 542 | let offset = frame.offset[0] + xpos + ypos * stride; |
| 543 | filter_luma_edge(frame.data, offset, 1, stride, mode_l, mode_r, lim1, lim2); |
| 544 | } |
| 545 | if ((str_l | str_r) >= 2) && deblock_chroma { |
| 546 | for comp in 1..2 { |
| 547 | let stride = frame.stride[comp]; |
| 548 | let offset = frame.offset[comp] + (xpos >> 1) + (ypos >> 1) * stride; |
| 549 | filter_chroma_edge(frame.data, offset, 1, stride, mode_l, mode_r, lim1, lim2); |
| 550 | } |
| 551 | } |
| 552 | } |
| 553 | fn deblock_edge4_hor(&self, frame: &mut NASimpleVideoFrame<u8>, xpos: usize, ypos: usize, |
| 554 | dblk_t: u8, dblk_d: u8, deblock_chroma: bool) { |
| 555 | let qp_t = dblk_t >> 2; |
| 556 | let str_t = dblk_t & 3; |
| 557 | let qp_d = dblk_d >> 2; |
| 558 | let str_d = dblk_d & 3; |
| 559 | let dl_t = &RV60_DEB_LIMITS[qp_t as usize]; |
| 560 | let dl_d = &RV60_DEB_LIMITS[qp_d as usize]; |
| 561 | let mode_t = if str_t != 0 { dl_t[(str_t - 1) as usize] } else { 0 }; |
| 562 | let mode_d = if str_d != 0 { dl_d[(str_d - 1) as usize] } else { 0 }; |
| 563 | let lim1 = dl_d[2] as i16; |
| 564 | let lim2 = (dl_d[3] * 4) as i16; |
| 565 | { |
| 566 | let stride = frame.stride[0]; |
| 567 | let offset = frame.offset[0] + xpos + ypos * stride; |
| 568 | filter_luma_edge(frame.data, offset, stride, 1, mode_t, mode_d, lim1, lim2); |
| 569 | } |
| 570 | if ((str_t | str_d) >= 2) && deblock_chroma { |
| 571 | for comp in 1..2 { |
| 572 | let stride = frame.stride[comp]; |
| 573 | let offset = frame.offset[comp] + (xpos >> 1) + (ypos >> 1) * stride; |
| 574 | filter_chroma_edge(frame.data, offset, stride, 1, mode_t, mode_d, lim1, lim2); |
| 575 | } |
| 576 | } |
| 577 | } |
| 578 | fn deblock8x8(&self, dparams: &RV60DeblockParams, frame: &mut NASimpleVideoFrame<u8>, |
| 579 | xpos: usize, ypos: usize, top_str: &[u8], left_str: &[u8], dblkpos: usize) { |
| 580 | if xpos > 0 { |
| 581 | if ypos > 0 { |
| 582 | let str_l = left_str[dblkpos - dparams.dblkstride]; |
| 583 | let str_r = left_str[dblkpos]; |
| 584 | if (str_l | str_r) != 0 { |
| 585 | self.deblock_edge4_ver(frame, xpos, ypos - 4, str_l, str_r, dparams.deblock_chroma); |
| 586 | } |
| 587 | } |
| 588 | { |
| 589 | let str_l = left_str[dblkpos]; |
| 590 | let str_r = left_str[dblkpos + dparams.dblkstride]; |
| 591 | if (str_l | str_r) != 0 { |
| 592 | self.deblock_edge4_ver(frame, xpos, ypos + 0, str_l, str_r, dparams.deblock_chroma); |
| 593 | } |
| 594 | } |
| 595 | if ypos + 4 >= dparams.height { |
| 596 | let str_l = left_str[dblkpos + dparams.dblkstride]; |
| 597 | let str_r = left_str[dblkpos + dparams.dblkstride * 2]; |
| 598 | if (str_l | str_r) != 0 { |
| 599 | self.deblock_edge4_ver(frame, xpos, ypos + 4, str_l, str_r, dparams.deblock_chroma); |
| 600 | } |
| 601 | } |
| 602 | } |
| 603 | if ypos > 0 { |
| 604 | if xpos > 0 { |
| 605 | let str_t = top_str[dblkpos - 1]; |
| 606 | let str_d = top_str[dblkpos]; |
| 607 | if (str_t | str_d) != 0 { |
| 608 | self.deblock_edge4_hor(frame, xpos - 4, ypos, str_t, str_d, dparams.deblock_chroma); |
| 609 | } |
| 610 | } |
| 611 | { |
| 612 | let str_t = top_str[dblkpos]; |
| 613 | let str_d = top_str[dblkpos + 1]; |
| 614 | if (str_t | str_d) != 0 { |
| 615 | self.deblock_edge4_hor(frame, xpos + 0, ypos, str_t, str_d, dparams.deblock_chroma); |
| 616 | } |
| 617 | } |
| 618 | if xpos + 4 >= dparams.width { |
| 619 | let str_t = top_str[dblkpos + 1]; |
| 620 | let str_d = top_str[dblkpos + 2]; |
| 621 | if (str_t | str_d) != 0 { |
| 622 | self.deblock_edge4_hor(frame, xpos + 4, ypos, str_t, str_d, dparams.deblock_chroma); |
| 623 | } |
| 624 | } |
| 625 | } |
| 626 | } |
| 627 | pub fn do_deblock(&self, dparams: &RV60DeblockParams, frame: &mut NASimpleVideoFrame<u8>, |
| 628 | xpos: usize, ypos: usize, size: usize, top_str: &[u8], left_str: &[u8], dpos: usize) { |
| 629 | for x in 0..(size >> 3) { |
| 630 | self.deblock8x8(dparams, frame, xpos + x * 8, ypos, |
| 631 | top_str, left_str, dpos + x * 2); |
| 632 | } |
| 633 | for y in 1..(size >> 3) { |
| 634 | self.deblock8x8(dparams, frame, xpos, ypos + y * 8, |
| 635 | top_str, left_str, dpos + y * 2 * dparams.dblkstride); |
| 636 | } |
| 637 | } |
| 638 | } |
| 639 | |
| 640 | const RV60_DEB_LIMITS: [[u8; 4]; 32] = [ |
| 641 | [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], |
| 642 | [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], [ 0, 0, 128, 0 ], |
| 643 | [ 0, 0, 128, 3 ], [ 0, 1, 128, 3 ], [ 0, 1, 122, 3 ], [ 1, 1, 96, 4 ], |
| 644 | [ 1, 1, 75, 4 ], [ 1, 1, 59, 4 ], [ 1, 1, 47, 6 ], [ 1, 1, 37, 6 ], |
| 645 | [ 1, 1, 29, 6 ], [ 1, 2, 23, 7 ], [ 1, 2, 18, 8 ], [ 1, 2, 15, 8 ], |
| 646 | [ 1, 2, 13, 9 ], [ 2, 3, 11, 9 ], [ 2, 3, 10, 10 ], [ 2, 3, 9, 10 ], |
| 647 | [ 2, 4, 8, 11 ], [ 3, 4, 7, 11 ], [ 3, 5, 6, 12 ], [ 3, 5, 5, 13 ], |
| 648 | [ 3, 5, 4, 14 ], [ 4, 7, 3, 15 ], [ 5, 8, 2, 16 ], [ 5, 9, 1, 17 ] |
| 649 | ]; |
| 650 | |
| 651 | #[derive(Clone)] |
| 652 | pub struct IntraPredContext { |
| 653 | pub t: [u8; 129], // 0 - TL or 0x80, two block sizes or replicated last val from block0 |
| 654 | pub l: [u8; 129], |
| 655 | pub has_t: bool, |
| 656 | pub has_tr: bool, |
| 657 | pub has_l: bool, |
| 658 | pub has_ld: bool, |
| 659 | } |
| 660 | |
| 661 | impl IntraPredContext { |
| 662 | pub fn new() -> Self { |
| 663 | Self { |
| 664 | t: [0x80; 129], l: [0x80; 129], has_t: false, has_tr: false, has_l: false, has_ld: false, |
| 665 | } |
| 666 | } |
| 667 | pub fn pred_dc(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, filter: bool) { |
| 668 | let dc; |
| 669 | if !self.has_t && !self.has_l { |
| 670 | dc = 0x80; |
| 671 | } else { |
| 672 | let mut sum = 0; |
| 673 | if self.has_t { |
| 674 | for x in 0..size { sum += self.t[x + 1] as u16; } |
| 675 | } |
| 676 | if self.has_l { |
| 677 | for y in 0..size { sum += self.l[y + 1] as u16; } |
| 678 | } |
| 679 | if self.has_t && self.has_l { |
| 680 | dc = ((sum + (size as u16)) / ((size as u16) * 2)) as u8; |
| 681 | } else { |
| 682 | dc = ((sum + ((size >> 1) as u16)) / (size as u16)) as u8; |
| 683 | } |
| 684 | } |
| 685 | for _ in 0..size { |
| 686 | for x in 0..size { dst[doff + x] = dc; } |
| 687 | doff += dstride; |
| 688 | } |
| 689 | if filter && self.has_t && self.has_l { |
| 690 | doff -= dstride * size; |
| 691 | dst[doff] = (((self.t[1] as u16) + (self.l[1] as u16) + 2 * (dst[doff] as u16) + 2) >> 2) as u8; |
| 692 | for x in 1..size { |
| 693 | dst[doff + x] = (((self.t[x + 1] as u16) + 3 * (dst[doff + x] as u16) + 2) >> 2) as u8; |
| 694 | } |
| 695 | for y in 1..size { |
| 696 | doff += dstride; |
| 697 | dst[doff] = (((self.l[y + 1] as u16) + 3 * (dst[doff] as u16) + 2) >> 2) as u8; |
| 698 | } |
| 699 | } |
| 700 | } |
| 701 | pub fn pred_plane(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize) { |
| 702 | let lastl = self.l[size + 1] as i32; |
| 703 | let lastt = self.t[size + 1] as i32; |
| 704 | let mut tmp1: [i32; 64] = [0; 64]; |
| 705 | let mut tmp2: [i32; 64] = [0; 64]; |
| 706 | for i in 0..size { |
| 707 | tmp1[i] = lastl - (self.t[i + 1] as i32); |
| 708 | tmp2[i] = lastt - (self.l[i + 1] as i32); |
| 709 | } |
| 710 | let shift = match size { |
| 711 | 4 => 3, |
| 712 | 8 => 4, |
| 713 | 16 => 5, |
| 714 | 32 => 6, |
| 715 | _ => 7, |
| 716 | }; |
| 717 | let mut top_ref: [i32; 64] = [0; 64]; |
| 718 | let mut left_ref:[i32; 64] = [0; 64]; |
| 719 | for i in 0..size { |
| 720 | top_ref [i] = (self.t[i + 1] as i32) << (shift - 1); |
| 721 | left_ref[i] = (self.l[i + 1] as i32) << (shift - 1); |
| 722 | } |
| 723 | for y in 0..size { |
| 724 | let add = tmp2[y]; |
| 725 | let mut sum = left_ref[y] + (size as i32); |
| 726 | for x in 0..size { |
| 727 | let v = tmp1[x] + top_ref[x]; |
| 728 | sum += add; |
| 729 | top_ref[x] = v; |
| 730 | dst[doff + x] = ((sum + v) >> shift) as u8; |
| 731 | } |
| 732 | doff += dstride; |
| 733 | } |
| 734 | } |
| 735 | fn pred_hor_angle(dst: &mut [u8], doff: usize, dstride: usize, size: usize, weight: i16, src: &[u8]) { |
| 736 | let mut sum = 0; |
| 737 | for x in 0..size { |
| 738 | sum += weight; |
| 739 | let off = ((sum >> 5) + 32) as usize; |
| 740 | let frac = (sum & 0x1F) as u16; |
| 741 | if frac == 0 { |
| 742 | for y in 0..size { |
| 743 | dst[doff + x + y * dstride] = src[off + y]; |
| 744 | } |
| 745 | } else { |
| 746 | for y in 0..size { |
| 747 | let a = src[off + y + 0] as u16; |
| 748 | let b = src[off + y + 1] as u16; |
| 749 | dst[doff + x + y * dstride] = (((32 - frac) * a + frac * b + 0x10) >> 5) as u8; |
| 750 | } |
| 751 | } |
| 752 | } |
| 753 | } |
| 754 | fn pred_ver_angle(dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, weight: i16, src: &[u8]) { |
| 755 | let mut sum = 0; |
| 756 | for _ in 0..size { |
| 757 | sum += weight; |
| 758 | let off = ((sum >> 5) + 32) as usize; |
| 759 | let frac = (sum & 0x1F) as u16; |
| 760 | if frac == 0 { |
| 761 | dst[doff..][..size].copy_from_slice(&src[off..][..size]); |
| 762 | } else { |
| 763 | for x in 0..size { |
| 764 | let a = src[off + x + 0] as u16; |
| 765 | let b = src[off + x + 1] as u16; |
| 766 | dst[doff + x] = (((32 - frac) * a + frac * b + 0x10) >> 5) as u8; |
| 767 | } |
| 768 | } |
| 769 | doff += dstride; |
| 770 | } |
| 771 | } |
| 772 | fn filter_weak(dst: &mut [u8], src: &[u8], size: usize) { |
| 773 | dst[0] = src[0]; |
| 774 | for i in 1..size-1 { |
| 775 | dst[i] = (((src[i - 1] as u16) + 2 * (src[i] as u16) + (src[i + 1] as u16) + 2) >> 2) as u8; |
| 776 | } |
| 777 | dst[size - 1] = src[size - 1]; |
| 778 | } |
| 779 | fn filter_bilin32(dst: &mut [u8], v0: u8, v1: u8, size: usize) { |
| 780 | let diff = (v1 as i16) - (v0 as i16); |
| 781 | let mut sum = ((v0 as i16) << 5) + (1 << (5 - 1)); |
| 782 | for i in 0..size { |
| 783 | dst[i] = (sum >> 5) as u8; |
| 784 | sum += diff; |
| 785 | } |
| 786 | } |
| 787 | #[allow(clippy::cyclomatic_complexity)] |
| 788 | pub fn pred_angle(&self, dst: &mut [u8], mut doff: usize, dstride: usize, size: usize, angle: usize, filter: bool) { |
| 789 | let mut filtered1: [u8; 96] = [0; 96]; |
| 790 | let mut filtered2: [u8; 96] = [0; 96]; |
| 791 | if angle == 0 { |
| 792 | self.pred_plane(dst, doff, dstride, size); |
| 793 | } else if angle == 1 { |
| 794 | self.pred_dc(dst, doff, dstride, size, filter); |
| 795 | } else if angle <= 9 { |
| 796 | let ang_weight = RV60_IPRED_ANGLE[10 - angle]; |
| 797 | let add_size = (size * (ang_weight as usize) + 31) >> 5; |
| 798 | if size <= 16 { |
| 799 | Self::filter_weak(&mut filtered1[32..], &self.l[1..], size + add_size); |
| 800 | } else { |
| 801 | Self::filter_bilin32(&mut filtered1[32..], self.l[1], self.l[33], 32); |
| 802 | Self::filter_bilin32(&mut filtered1[64..], self.l[32], self.l[64], add_size); |
| 803 | } |
| 804 | Self::pred_hor_angle(dst, doff, dstride, size, ang_weight as i16, &filtered1); |
| 805 | } else if angle == 10 { |
| 806 | if size <= 16 { |
| 807 | Self::filter_weak(&mut filtered1[32..], &self.l[1..], size); |
| 808 | } else { |
| 809 | Self::filter_bilin32(&mut filtered1[32..], self.l[1], self.l[33], 32); |
| 810 | } |
| 811 | for y in 0..size { |
| 812 | for x in 0..size { |
| 813 | dst[doff + x] = filtered1[32 + y]; |
| 814 | } |
| 815 | doff += dstride; |
| 816 | } |
| 817 | if filter { |
| 818 | doff -= dstride * size; |
| 819 | let tl = self.t[0] as i16; |
| 820 | for x in 0..size { |
| 821 | dst[doff + x] = clip8((dst[doff + x] as i16) + (((self.t[x + 1] as i16) - tl) >> 1)); |
| 822 | } |
| 823 | } |
| 824 | } else if angle <= 17 { |
| 825 | let ang_weight = RV60_IPRED_ANGLE [angle - 10]; |
| 826 | let inv_angle = RV60_IPRED_INV_ANGLE[angle - 10]; |
| 827 | let add_size = (size * (ang_weight as usize) + 31) >> 5; |
| 828 | if size <= 16 { |
| 829 | for i in 0..=size { |
| 830 | filtered1[32-1 + i] = self.l[i]; |
| 831 | } |
| 832 | for i in 0..=size { |
| 833 | filtered2[32-1 + i] = self.t[i]; |
| 834 | } |
| 835 | } else { |
| 836 | filtered1[32-1] = self.l[0]; |
| 837 | Self::filter_bilin32(&mut filtered1[32..], self.l[0], self.l[32], 32); |
| 838 | filtered2[32-1] = self.t[0]; |
| 839 | Self::filter_bilin32(&mut filtered2[32..], self.t[0], self.t[32], 32); |
| 840 | } |
| 841 | if add_size > 1 { |
| 842 | let mut sum = 0x80; |
| 843 | for i in 1..add_size { |
| 844 | sum += inv_angle; |
| 845 | let pos = ((sum >> 8) + 32 - 1) as usize; |
| 846 | filtered1[32 - 1 - i] = filtered2[pos]; |
| 847 | } |
| 848 | } |
| 849 | Self::pred_hor_angle(dst, doff, dstride, size, -(ang_weight as i16), &filtered1); |
| 850 | } else if angle <= 25 { |
| 851 | let ang_weight = RV60_IPRED_ANGLE[26 - angle]; |
| 852 | let inv_angle = RV60_IPRED_INV_ANGLE[26 - angle]; |
| 853 | let add_size = (size * (ang_weight as usize) + 31) >> 5; |
| 854 | if size <= 16 { |
| 855 | for i in 0..=size { |
| 856 | filtered1[32-1 + i] = self.t[i]; |
| 857 | } |
| 858 | for i in 0..=size { |
| 859 | filtered2[32-1 + i] = self.l[i]; |
| 860 | } |
| 861 | } else { |
| 862 | filtered1[32-1] = self.t[0]; |
| 863 | Self::filter_bilin32(&mut filtered1[32..], self.t[0], self.t[32], 32); |
| 864 | filtered2[32-1] = self.l[0]; |
| 865 | Self::filter_bilin32(&mut filtered2[32..], self.l[0], self.l[32], 32); |
| 866 | } |
| 867 | if add_size > 1 { |
| 868 | let mut sum = 0x80; |
| 869 | for i in 1..add_size { |
| 870 | sum += inv_angle; |
| 871 | let pos = ((sum >> 8) + 32 - 1) as usize; |
| 872 | filtered1[32 - 1 - i] = filtered2[pos]; |
| 873 | } |
| 874 | } |
| 875 | Self::pred_ver_angle(dst, doff, dstride, size, -(ang_weight as i16), &filtered1); |
| 876 | } else if angle == 26 { |
| 877 | if size <= 16 { |
| 878 | Self::filter_weak(&mut filtered1[32..], &self.t[1..], size); |
| 879 | } else { |
| 880 | Self::filter_bilin32(&mut filtered1[32..], self.t[1], self.t[33], 32); |
| 881 | } |
| 882 | for _ in 0..size { |
| 883 | dst[doff..][..size].copy_from_slice(&filtered1[32..][..size]); |
| 884 | doff += dstride; |
| 885 | } |
| 886 | if filter { |
| 887 | doff -= dstride * size; |
| 888 | let tl = self.l[0] as i16; |
| 889 | for y in 0..size { |
| 890 | dst[doff] = clip8((dst[doff] as i16) + (((self.l[y + 1] as i16) - tl) >> 1)); |
| 891 | doff += dstride; |
| 892 | } |
| 893 | } |
| 894 | } else if angle <= 34 { |
| 895 | let ang_weight = RV60_IPRED_ANGLE[angle - 26]; |
| 896 | let add_size = (size * (ang_weight as usize) + 31) >> 5; |
| 897 | if size <= 16 { |
| 898 | Self::filter_weak(&mut filtered1[32..], &self.t[1..], size + add_size); |
| 899 | } else { |
| 900 | Self::filter_bilin32(&mut filtered1[32..], self.t[1], self.t[33], 32); |
| 901 | Self::filter_bilin32(&mut filtered1[64..], self.t[32], self.t[64], add_size); |
| 902 | } |
| 903 | Self::pred_ver_angle(dst, doff, dstride, size, ang_weight as i16, &filtered1); |
| 904 | } else { |
| 905 | unreachable!(); |
| 906 | } |
| 907 | } |
| 908 | } |
| 909 | |
| 910 | const RV60_IPRED_ANGLE: [u8; 9] = [ 0, 2, 5, 9, 13, 17, 21, 26, 32 ]; |
| 911 | const RV60_IPRED_INV_ANGLE: [i16; 9] = [ 0, 4096, 1638, 910, 630, 482, 390, 315, 256 ]; |
| 912 | const RV60_EDGE1: [isize; 4] = [ 0, 2, 2, 2 ]; |
| 913 | const RV60_EDGE2: [isize; 4] = [ 0, 3, 3, 3 ]; |
| 914 | |