| 1 | use nihav_core::frame::*; |
| 2 | use nihav_codec_support::codecs::blockdsp::edge_emu; |
| 3 | |
| 4 | fn clip_u8(val: i16) -> u8 { |
| 5 | val.max(0).min(255) as u8 |
| 6 | } |
| 7 | |
| 8 | pub struct IPredContext { |
| 9 | pub left: [u8; 16], |
| 10 | pub has_left: bool, |
| 11 | pub top: [u8; 16], |
| 12 | pub has_top: bool, |
| 13 | pub tl: u8, |
| 14 | } |
| 15 | |
| 16 | impl IPredContext { |
| 17 | pub fn fill(&mut self, src: &[u8], off: usize, stride: usize, tsize: usize, lsize: usize) { |
| 18 | if self.has_top { |
| 19 | for i in 0..tsize { |
| 20 | self.top[i] = src[off - stride + i]; |
| 21 | } |
| 22 | for i in tsize..16 { |
| 23 | self.top[i] = 0x80; |
| 24 | } |
| 25 | } else { |
| 26 | self.top = [0x80; 16]; |
| 27 | } |
| 28 | if self.has_left { |
| 29 | for i in 0..lsize { |
| 30 | self.left[i] = src[off - 1 + i * stride]; |
| 31 | } |
| 32 | for i in lsize..16 { |
| 33 | self.left[i] = 0x80; |
| 34 | } |
| 35 | } else { |
| 36 | self.left = [0x80; 16]; |
| 37 | } |
| 38 | if self.has_top && self.has_left { |
| 39 | self.tl = src[off - stride - 1]; |
| 40 | } else { |
| 41 | self.tl = 0x80; |
| 42 | } |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | impl Default for IPredContext { |
| 47 | fn default() -> Self { |
| 48 | Self { |
| 49 | left: [0x80; 16], |
| 50 | top: [0x80; 16], |
| 51 | tl: 0x80, |
| 52 | has_left: false, |
| 53 | has_top: false, |
| 54 | } |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | const DCT_COEFFS: [i32; 16] = [ |
| 59 | 23170, 23170, 23170, 23170, |
| 60 | 30274, 12540, -12540, -30274, |
| 61 | 23170, -23170, -23170, 23170, |
| 62 | 12540, -30274, 30274, -12540 |
| 63 | ]; |
| 64 | |
| 65 | pub fn idct4x4(coeffs: &mut [i16; 16]) { |
| 66 | let mut tmp = [0i16; 16]; |
| 67 | for (src, dst) in coeffs.chunks(4).zip(tmp.chunks_mut(4)) { |
| 68 | let s0 = i32::from(src[0]); |
| 69 | let s1 = i32::from(src[1]); |
| 70 | let s2 = i32::from(src[2]); |
| 71 | let s3 = i32::from(src[3]); |
| 72 | |
| 73 | let t0 = (s0 + s2).wrapping_mul(23170); |
| 74 | let t1 = (s0 - s2).wrapping_mul(23170); |
| 75 | let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540); |
| 76 | let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274); |
| 77 | |
| 78 | dst[0] = ((t0 + t2) >> 14) as i16; |
| 79 | dst[1] = ((t1 + t3) >> 14) as i16; |
| 80 | dst[2] = ((t1 - t3) >> 14) as i16; |
| 81 | dst[3] = ((t0 - t2) >> 14) as i16; |
| 82 | } |
| 83 | for i in 0..4 { |
| 84 | let s0 = i32::from(tmp[i + 4 * 0]); |
| 85 | let s1 = i32::from(tmp[i + 4 * 1]); |
| 86 | let s2 = i32::from(tmp[i + 4 * 2]); |
| 87 | let s3 = i32::from(tmp[i + 4 * 3]); |
| 88 | |
| 89 | let t0 = (s0 + s2).wrapping_mul(23170) + 0x20000; |
| 90 | let t1 = (s0 - s2).wrapping_mul(23170) + 0x20000; |
| 91 | let t2 = s1.wrapping_mul(30274) + s3.wrapping_mul(12540); |
| 92 | let t3 = s1.wrapping_mul(12540) - s3.wrapping_mul(30274); |
| 93 | |
| 94 | coeffs[i + 0 * 4] = ((t0 + t2) >> 18) as i16; |
| 95 | coeffs[i + 1 * 4] = ((t1 + t3) >> 18) as i16; |
| 96 | coeffs[i + 2 * 4] = ((t1 - t3) >> 18) as i16; |
| 97 | coeffs[i + 3 * 4] = ((t0 - t2) >> 18) as i16; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | pub fn idct4x4_dc(coeffs: &mut [i16; 16]) { |
| 102 | let dc = ((((i32::from(coeffs[0]) * DCT_COEFFS[0]) >> 14) * DCT_COEFFS[0] + 0x20000) >> 18) as i16; |
| 103 | for el in coeffs.iter_mut() { |
| 104 | *el = dc; |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | pub fn add_coeffs4x4(dst: &mut [u8], off: usize, stride: usize, coeffs: &[i16; 16]) { |
| 109 | let dst = &mut dst[off..]; |
| 110 | for (out, src) in dst.chunks_mut(stride).zip(coeffs.chunks(4)) { |
| 111 | for (oel, iel) in out.iter_mut().take(4).zip(src.iter()) { |
| 112 | *oel = clip_u8(i16::from(*oel) + *iel); |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | pub fn add_coeffs16x1(dst: &mut [u8], off: usize, coeffs: &[i16; 16]) { |
| 117 | let dst = &mut dst[off..]; |
| 118 | for (oel, iel) in dst.iter_mut().take(16).zip(coeffs.iter()) { |
| 119 | *oel = clip_u8(i16::from(*oel) + *iel); |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | pub trait IntraPred { |
| 124 | const SIZE: usize; |
| 125 | fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 126 | let dc; |
| 127 | if !ipred.has_left && !ipred.has_top { |
| 128 | dc = 0x80; |
| 129 | } else { |
| 130 | let mut dcsum = 0; |
| 131 | let mut dcshift = match Self::SIZE { |
| 132 | 16 => 3, |
| 133 | _ => 2, |
| 134 | }; |
| 135 | if ipred.has_left { |
| 136 | for el in ipred.left.iter().take(Self::SIZE) { |
| 137 | dcsum += u16::from(*el); |
| 138 | } |
| 139 | dcshift += 1; |
| 140 | } |
| 141 | if ipred.has_top { |
| 142 | for el in ipred.top.iter().take(Self::SIZE) { |
| 143 | dcsum += u16::from(*el); |
| 144 | } |
| 145 | dcshift += 1; |
| 146 | } |
| 147 | dc = ((dcsum + (1 << (dcshift - 1))) >> dcshift) as u8; |
| 148 | } |
| 149 | for _ in 0..Self::SIZE { |
| 150 | let out = &mut dst[off..][..Self::SIZE]; |
| 151 | for el in out.iter_mut() { |
| 152 | *el = dc; |
| 153 | } |
| 154 | off += stride; |
| 155 | } |
| 156 | } |
| 157 | fn ipred_v(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 158 | for _ in 0..Self::SIZE { |
| 159 | let out = &mut dst[off..][..Self::SIZE]; |
| 160 | out.copy_from_slice(&ipred.top[0..Self::SIZE]); |
| 161 | off += stride; |
| 162 | } |
| 163 | } |
| 164 | fn ipred_h(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 165 | for leftel in ipred.left.iter().take(Self::SIZE) { |
| 166 | let out = &mut dst[off..][..Self::SIZE]; |
| 167 | for el in out.iter_mut() { |
| 168 | *el = *leftel; |
| 169 | } |
| 170 | off += stride; |
| 171 | } |
| 172 | } |
| 173 | fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 174 | let tl = i16::from(ipred.tl); |
| 175 | for m in 0..Self::SIZE { |
| 176 | for n in 0..Self::SIZE { |
| 177 | dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl); |
| 178 | } |
| 179 | off += stride; |
| 180 | } |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | pub struct IPred16x16 {} |
| 185 | impl IntraPred for IPred16x16 { const SIZE: usize = 16; } |
| 186 | |
| 187 | pub struct IPred8x8 {} |
| 188 | impl IntraPred for IPred8x8 { const SIZE: usize = 8; } |
| 189 | |
| 190 | macro_rules! load_pred4 { |
| 191 | (topleft; $ipred: expr) => {{ |
| 192 | let tl = u16::from($ipred.tl); |
| 193 | let a0 = u16::from($ipred.top[0]); |
| 194 | let l0 = u16::from($ipred.left[0]); |
| 195 | ((l0 + tl * 2 + a0 + 2) >> 2) as u8 |
| 196 | }}; |
| 197 | (top; $ipred: expr) => {{ |
| 198 | let tl = u16::from($ipred.tl); |
| 199 | let a0 = u16::from($ipred.top[0]); |
| 200 | let a1 = u16::from($ipred.top[1]); |
| 201 | let a2 = u16::from($ipred.top[2]); |
| 202 | let a3 = u16::from($ipred.top[3]); |
| 203 | let a4 = u16::from($ipred.top[4]); |
| 204 | let p0 = ((tl + a0 * 2 + a1 + 2) >> 2) as u8; |
| 205 | let p1 = ((a0 + a1 * 2 + a2 + 2) >> 2) as u8; |
| 206 | let p2 = ((a1 + a2 * 2 + a3 + 2) >> 2) as u8; |
| 207 | let p3 = ((a2 + a3 * 2 + a4 + 2) >> 2) as u8; |
| 208 | (p0, p1, p2, p3) |
| 209 | }}; |
| 210 | (top8; $ipred: expr) => {{ |
| 211 | let t3 = u16::from($ipred.top[3]); |
| 212 | let t4 = u16::from($ipred.top[4]); |
| 213 | let t5 = u16::from($ipred.top[5]); |
| 214 | let t6 = u16::from($ipred.top[6]); |
| 215 | let t7 = u16::from($ipred.top[7]); |
| 216 | let p4 = ((t3 + t4 * 2 + t5 + 2) >> 2) as u8; |
| 217 | let p5 = ((t4 + t5 * 2 + t6 + 2) >> 2) as u8; |
| 218 | let p6 = ((t5 + t6 * 2 + t7 + 2) >> 2) as u8; |
| 219 | let p7 = ((t6 + t7 * 2 + t7 + 2) >> 2) as u8; |
| 220 | (p4, p5, p6, p7) |
| 221 | }}; |
| 222 | (topavg; $ipred: expr) => {{ |
| 223 | let tl = u16::from($ipred.tl); |
| 224 | let a0 = u16::from($ipred.top[0]); |
| 225 | let a1 = u16::from($ipred.top[1]); |
| 226 | let a2 = u16::from($ipred.top[2]); |
| 227 | let a3 = u16::from($ipred.top[3]); |
| 228 | let p0 = ((tl + a0 + 1) >> 1) as u8; |
| 229 | let p1 = ((a0 + a1 + 1) >> 1) as u8; |
| 230 | let p2 = ((a1 + a2 + 1) >> 1) as u8; |
| 231 | let p3 = ((a2 + a3 + 1) >> 1) as u8; |
| 232 | (p0, p1, p2, p3) |
| 233 | }}; |
| 234 | (left; $ipred: expr) => {{ |
| 235 | let tl = u16::from($ipred.tl); |
| 236 | let l0 = u16::from($ipred.left[0]); |
| 237 | let l1 = u16::from($ipred.left[1]); |
| 238 | let l2 = u16::from($ipred.left[2]); |
| 239 | let l3 = u16::from($ipred.left[3]); |
| 240 | let l4 = u16::from($ipred.left[4]); |
| 241 | let p0 = ((tl + l0 * 2 + l1 + 2) >> 2) as u8; |
| 242 | let p1 = ((l0 + l1 * 2 + l2 + 2) >> 2) as u8; |
| 243 | let p2 = ((l1 + l2 * 2 + l3 + 2) >> 2) as u8; |
| 244 | let p3 = ((l2 + l3 * 2 + l4 + 2) >> 2) as u8; |
| 245 | (p0, p1, p2, p3) |
| 246 | }}; |
| 247 | (left8; $ipred: expr) => {{ |
| 248 | let l3 = u16::from($ipred.left[3]); |
| 249 | let l4 = u16::from($ipred.left[4]); |
| 250 | let l5 = u16::from($ipred.left[5]); |
| 251 | let l6 = u16::from($ipred.left[6]); |
| 252 | let l7 = u16::from($ipred.left[7]); |
| 253 | let p4 = ((l3 + l4 * 2 + l5 + 2) >> 2) as u8; |
| 254 | let p5 = ((l4 + l5 * 2 + l6 + 2) >> 2) as u8; |
| 255 | let p6 = ((l5 + l6 * 2 + l7 + 2) >> 2) as u8; |
| 256 | let p7 = ((l6 + l7 * 2 + l7 + 2) >> 2) as u8; |
| 257 | (p4, p5, p6, p7) |
| 258 | }}; |
| 259 | (leftavg; $ipred: expr) => {{ |
| 260 | let tl = u16::from($ipred.tl); |
| 261 | let l0 = u16::from($ipred.left[0]); |
| 262 | let l1 = u16::from($ipred.left[1]); |
| 263 | let l2 = u16::from($ipred.left[2]); |
| 264 | let l3 = u16::from($ipred.left[3]); |
| 265 | let p0 = ((tl + l0 + 1) >> 1) as u8; |
| 266 | let p1 = ((l0 + l1 + 1) >> 1) as u8; |
| 267 | let p2 = ((l1 + l2 + 1) >> 1) as u8; |
| 268 | let p3 = ((l2 + l3 + 1) >> 1) as u8; |
| 269 | (p0, p1, p2, p3) |
| 270 | }}; |
| 271 | } |
| 272 | |
| 273 | pub struct IPred4x4 {} |
| 274 | impl IPred4x4 { |
| 275 | pub fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 276 | let dc; |
| 277 | let mut dcsum = 0; |
| 278 | for el in ipred.left.iter().take(4) { |
| 279 | dcsum += u16::from(*el); |
| 280 | } |
| 281 | for el in ipred.top.iter().take(4) { |
| 282 | dcsum += u16::from(*el); |
| 283 | } |
| 284 | dc = ((dcsum + (1 << 2)) >> 3) as u8; |
| 285 | for _ in 0..4 { |
| 286 | let out = &mut dst[off..][..4]; |
| 287 | for el in out.iter_mut() { |
| 288 | *el = dc; |
| 289 | } |
| 290 | off += stride; |
| 291 | } |
| 292 | } |
| 293 | pub fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 294 | let tl = i16::from(ipred.tl); |
| 295 | for m in 0..4 { |
| 296 | for n in 0..4 { |
| 297 | dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl); |
| 298 | } |
| 299 | off += stride; |
| 300 | } |
| 301 | } |
| 302 | pub fn ipred_ve(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 303 | let (v0, v1, v2, v3) = load_pred4!(top; ipred); |
| 304 | let vert_pred = [v0, v1, v2, v3]; |
| 305 | for _ in 0..4 { |
| 306 | let out = &mut dst[off..][..4]; |
| 307 | out.copy_from_slice(&vert_pred); |
| 308 | off += stride; |
| 309 | } |
| 310 | } |
| 311 | pub fn ipred_he(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 312 | let (p0, p1, p2, _) = load_pred4!(left; ipred); |
| 313 | let p3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8; |
| 314 | let hor_pred = [p0, p1, p2, p3]; |
| 315 | for m in 0..4 { |
| 316 | for n in 0..4 { |
| 317 | dst[off + n] = hor_pred[m]; |
| 318 | } |
| 319 | off += stride; |
| 320 | } |
| 321 | } |
| 322 | pub fn ipred_ld(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 323 | let (_, p0, p1, p2) = load_pred4!(top; ipred); |
| 324 | let (p3, p4, p5, p6) = load_pred4!(top8; ipred); |
| 325 | |
| 326 | dst[off + 0] = p0; dst[off + 1] = p1; dst[off + 2] = p2; dst[off + 3] = p3; |
| 327 | off += stride; |
| 328 | dst[off + 0] = p1; dst[off + 1] = p2; dst[off + 2] = p3; dst[off + 3] = p4; |
| 329 | off += stride; |
| 330 | dst[off + 0] = p2; dst[off + 1] = p3; dst[off + 2] = p4; dst[off + 3] = p5; |
| 331 | off += stride; |
| 332 | dst[off + 0] = p3; dst[off + 1] = p4; dst[off + 2] = p5; dst[off + 3] = p6; |
| 333 | } |
| 334 | pub fn ipred_rd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 335 | let tl = load_pred4!(topleft; ipred); |
| 336 | let (l0, l1, l2, _) = load_pred4!(left; ipred); |
| 337 | let (t0, t1, t2, _) = load_pred4!(top; ipred); |
| 338 | |
| 339 | dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2; |
| 340 | off += stride; |
| 341 | dst[off + 0] = l0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1; |
| 342 | off += stride; |
| 343 | dst[off + 0] = l1; dst[off + 1] = l0; dst[off + 2] = tl; dst[off + 3] = t0; |
| 344 | off += stride; |
| 345 | dst[off + 0] = l2; dst[off + 1] = l1; dst[off + 2] = l0; dst[off + 3] = tl; |
| 346 | } |
| 347 | pub fn ipred_vr(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 348 | let tl = load_pred4!(topleft; ipred); |
| 349 | let (l0, l1, _, _) = load_pred4!(left; ipred); |
| 350 | let (t0, t1, t2, _) = load_pred4!(top; ipred); |
| 351 | let (m0, m1, m2, m3) = load_pred4!(topavg; ipred); |
| 352 | |
| 353 | dst[off + 0] = m0; dst[off + 1] = m1; dst[off + 2] = m2; dst[off + 3] = m3; |
| 354 | off += stride; |
| 355 | dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2; |
| 356 | off += stride; |
| 357 | dst[off + 0] = l0; dst[off + 1] = m0; dst[off + 2] = m1; dst[off + 3] = m2; |
| 358 | off += stride; |
| 359 | dst[off + 0] = l1; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1; |
| 360 | } |
| 361 | pub fn ipred_vl(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 362 | let (_, t1, t2, t3) = load_pred4!(top; ipred); |
| 363 | let (t4, t5, t6, _) = load_pred4!(top8; ipred); |
| 364 | let (_, m1, m2, m3) = load_pred4!(topavg; ipred); |
| 365 | let m4 = ((u16::from(ipred.top[3]) + u16::from(ipred.top[4]) + 1) >> 1) as u8; |
| 366 | |
| 367 | dst[off + 0] = m1; dst[off + 1] = m2; dst[off + 2] = m3; dst[off + 3] = m4; |
| 368 | off += stride; |
| 369 | dst[off + 0] = t1; dst[off + 1] = t2; dst[off + 2] = t3; dst[off + 3] = t4; |
| 370 | off += stride; |
| 371 | dst[off + 0] = m2; dst[off + 1] = m3; dst[off + 2] = m4; dst[off + 3] = t5; |
| 372 | off += stride; |
| 373 | dst[off + 0] = t2; dst[off + 1] = t3; dst[off + 2] = t4; dst[off + 3] = t6; |
| 374 | } |
| 375 | pub fn ipred_hd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 376 | let tl = load_pred4!(topleft; ipred); |
| 377 | let (l0, l1, l2, _) = load_pred4!(left; ipred); |
| 378 | let (m0, m1, m2, m3) = load_pred4!(leftavg; ipred); |
| 379 | let (t0, t1, _, _) = load_pred4!(top; ipred); |
| 380 | |
| 381 | dst[off + 0] = m0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1; |
| 382 | off += stride; |
| 383 | dst[off + 0] = m1; dst[off + 1] = l0; dst[off + 2] = m0; dst[off + 3] = tl; |
| 384 | off += stride; |
| 385 | dst[off + 0] = m2; dst[off + 1] = l1; dst[off + 2] = m1; dst[off + 3] = l0; |
| 386 | off += stride; |
| 387 | dst[off + 0] = m3; dst[off + 1] = l2; dst[off + 2] = m2; dst[off + 3] = l1; |
| 388 | } |
| 389 | pub fn ipred_hu(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) { |
| 390 | let (_, m1, m2, m3) = load_pred4!(leftavg; ipred); |
| 391 | let (_, l1, l2, _) = load_pred4!(left; ipred); |
| 392 | let l3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8; |
| 393 | let p3 = ipred.left[3]; |
| 394 | |
| 395 | dst[off + 0] = m1; dst[off + 1] = l1; dst[off + 2] = m2; dst[off + 3] = l2; |
| 396 | off += stride; |
| 397 | dst[off + 0] = m2; dst[off + 1] = l2; dst[off + 2] = m3; dst[off + 3] = l3; |
| 398 | off += stride; |
| 399 | dst[off + 0] = m3; dst[off + 1] = l3; dst[off + 2] = p3; dst[off + 3] = p3; |
| 400 | off += stride; |
| 401 | dst[off + 0] = p3; dst[off + 1] = p3; dst[off + 2] = p3; dst[off + 3] = p3; |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | fn delta(p1: i16, p0: i16, q0: i16, q1: i16) -> i16 { |
| 406 | (p1 - q1) + 3 * (q0 - p0) |
| 407 | } |
| 408 | |
| 409 | pub type LoopFilterFunc = fn(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16); |
| 410 | |
| 411 | pub fn simple_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, _thr_inner: i16, _thr_hev: i16) { |
| 412 | for _ in 0..len { |
| 413 | let p1 = i16::from(buf[off - step * 2]); |
| 414 | let p0 = i16::from(buf[off - step * 1]); |
| 415 | let q0 = i16::from(buf[off + step * 0]); |
| 416 | let q1 = i16::from(buf[off + step * 1]); |
| 417 | let dpq = p0 - q0; |
| 418 | if dpq.abs() < thr { |
| 419 | let diff = delta(p1, p0, q0, q1); |
| 420 | let diffq0 = (diff.min(127) + 4) >> 3; |
| 421 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; |
| 422 | buf[off - step * 1] = clip_u8(p0 + diffp0); |
| 423 | buf[off + step * 0] = clip_u8(q0 - diffq0); |
| 424 | } |
| 425 | off += stride; |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | fn normal_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16, edge: bool) { |
| 430 | for _ in 0..len { |
| 431 | let p0 = i16::from(buf[off - step * 1]); |
| 432 | let q0 = i16::from(buf[off + step * 0]); |
| 433 | let dpq = p0 - q0; |
| 434 | if dpq.abs() <= thr { |
| 435 | let p3 = i16::from(buf[off - step * 4]); |
| 436 | let p2 = i16::from(buf[off - step * 3]); |
| 437 | let p1 = i16::from(buf[off - step * 2]); |
| 438 | let q1 = i16::from(buf[off + step * 1]); |
| 439 | let q2 = i16::from(buf[off + step * 2]); |
| 440 | let q3 = i16::from(buf[off + step * 3]); |
| 441 | let dp2 = p3 - p2; |
| 442 | let dp1 = p2 - p1; |
| 443 | let dp0 = p1 - p0; |
| 444 | let dq0 = q1 - q0; |
| 445 | let dq1 = q2 - q1; |
| 446 | let dq2 = q3 - q2; |
| 447 | if (dp0.abs() <= thr_inner) && (dp1.abs() <= thr_inner) && |
| 448 | (dp2.abs() <= thr_inner) && (dq0.abs() <= thr_inner) && |
| 449 | (dq1.abs() <= thr_inner) && (dq2.abs() <= thr_inner) { |
| 450 | let high_edge_variation = (dp0.abs() > thr_hev) || (dq0.abs() > thr_hev); |
| 451 | if high_edge_variation { |
| 452 | let diff = delta(p1, p0, q0, q1); |
| 453 | let diffq0 = (diff.min(127) + 4) >> 3; |
| 454 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; |
| 455 | buf[off - step * 1] = clip_u8(p0 + diffp0); |
| 456 | buf[off + step * 0] = clip_u8(q0 - diffq0); |
| 457 | } else if edge { |
| 458 | let d = delta(p1, p0, q0, q1); |
| 459 | let diff0 = (d * 27 + 63) >> 7; |
| 460 | buf[off - step * 1] = clip_u8(p0 + diff0); |
| 461 | buf[off + step * 0] = clip_u8(q0 - diff0); |
| 462 | let diff1 = (d * 18 + 63) >> 7; |
| 463 | buf[off - step * 2] = clip_u8(p1 + diff1); |
| 464 | buf[off + step * 1] = clip_u8(q1 - diff1); |
| 465 | let diff2 = (d * 9 + 63) >> 7; |
| 466 | buf[off - step * 3] = clip_u8(p2 + diff2); |
| 467 | buf[off + step * 2] = clip_u8(q2 - diff2); |
| 468 | } else { |
| 469 | let diff = 3 * (q0 - p0); |
| 470 | let diffq0 = (diff.min(127) + 4) >> 3; |
| 471 | let diffp0 = diffq0 - if (diff & 7) == 4 { 1 } else { 0 }; |
| 472 | buf[off - step * 1] = clip_u8(p0 + diffp0); |
| 473 | buf[off + step * 0] = clip_u8(q0 - diffq0); |
| 474 | let diff2 = (diffq0 + 1) >> 1; |
| 475 | buf[off - step * 2] = clip_u8(p1 + diff2); |
| 476 | buf[off + step * 1] = clip_u8(q1 - diff2); |
| 477 | } |
| 478 | } |
| 479 | } |
| 480 | off += stride; |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | pub fn normal_loop_filter_inner(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { |
| 485 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, false); |
| 486 | } |
| 487 | |
| 488 | pub fn normal_loop_filter_edge(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) { |
| 489 | normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, true); |
| 490 | } |
| 491 | |
| 492 | const VP7_BICUBIC_FILTERS: [[i16; 6]; 8] = [ |
| 493 | [ 0, 0, 128, 0, 0, 0 ], |
| 494 | [ 0, -6, 123, 12, -1, 0 ], |
| 495 | [ 2, -11, 108, 36, -8, 1 ], |
| 496 | [ 0, -9, 93, 50, -6, 0 ], |
| 497 | [ 3, -16, 77, 77, -16, 3 ], |
| 498 | [ 0, -6, 50, 93, -9, 0 ], |
| 499 | [ 1, -8, 36, 108, -11, 2 ], |
| 500 | [ 0, -1, 12, 123, -6, 0 ] |
| 501 | ]; |
| 502 | |
| 503 | macro_rules! interpolate { |
| 504 | ($src: expr, $off: expr, $step: expr, $mode: expr) => {{ |
| 505 | let s0 = i32::from($src[$off + 0 * $step]); |
| 506 | let s1 = i32::from($src[$off + 1 * $step]); |
| 507 | let s2 = i32::from($src[$off + 2 * $step]); |
| 508 | let s3 = i32::from($src[$off + 3 * $step]); |
| 509 | let s4 = i32::from($src[$off + 4 * $step]); |
| 510 | let s5 = i32::from($src[$off + 5 * $step]); |
| 511 | let filt = &VP7_BICUBIC_FILTERS[$mode]; |
| 512 | let src = [s0, s1, s2, s3, s4, s5]; |
| 513 | let mut val = 64; |
| 514 | for (s, c) in src.iter().zip(filt.iter()) { |
| 515 | val += s * i32::from(*c); |
| 516 | } |
| 517 | clip_u8((val >> 7) as i16) |
| 518 | }} |
| 519 | } |
| 520 | |
| 521 | const EDGE_PRE: usize = 2; |
| 522 | const EDGE_POST: usize = 4; |
| 523 | const TMP_STRIDE: usize = 16; |
| 524 | |
| 525 | fn mc_block_common(dst: &mut [u8], mut doff: usize, dstride: usize, src: &[u8], sstride: usize, size: usize, mx: usize, my: usize) { |
| 526 | if (mx == 0) && (my == 0) { |
| 527 | let dst = &mut dst[doff..]; |
| 528 | let src = &src[EDGE_PRE + EDGE_PRE * sstride..]; |
| 529 | for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) { |
| 530 | (&mut out[0..size]).copy_from_slice(&src[0..size]); |
| 531 | } |
| 532 | } else if my == 0 { |
| 533 | let src = &src[EDGE_PRE * sstride..]; |
| 534 | for src in src.chunks(sstride).take(size) { |
| 535 | for x in 0..size { |
| 536 | dst[doff + x] = interpolate!(src, x, 1, mx); |
| 537 | } |
| 538 | doff += dstride; |
| 539 | } |
| 540 | } else if mx == 0 { |
| 541 | let src = &src[EDGE_PRE..]; |
| 542 | for y in 0..size { |
| 543 | for x in 0..size { |
| 544 | dst[doff + x] = interpolate!(src, x + y * sstride, sstride, my); |
| 545 | } |
| 546 | doff += dstride; |
| 547 | } |
| 548 | } else { |
| 549 | let mut tmp = [0u8; TMP_STRIDE * (16 + EDGE_PRE + EDGE_POST)]; |
| 550 | for (y, dst) in tmp.chunks_mut(TMP_STRIDE).take(size + EDGE_PRE + EDGE_POST).enumerate() { |
| 551 | for x in 0..size { |
| 552 | dst[x] = interpolate!(src, x + y * sstride, 1, mx); |
| 553 | } |
| 554 | } |
| 555 | for y in 0..size { |
| 556 | for x in 0..size { |
| 557 | dst[doff + x] = interpolate!(tmp, x + y * TMP_STRIDE, TMP_STRIDE, my); |
| 558 | } |
| 559 | doff += dstride; |
| 560 | } |
| 561 | } |
| 562 | } |
| 563 | fn mc_block(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, |
| 564 | mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize, |
| 565 | mc_buf: &mut [u8], size: usize) { |
| 566 | if (mvx == 0) && (mvy == 0) { |
| 567 | let dst = &mut dst[doff..]; |
| 568 | let sstride = reffrm.get_stride(plane); |
| 569 | let srcoff = reffrm.get_offset(plane) + xpos + ypos * sstride; |
| 570 | let src = &reffrm.get_data(); |
| 571 | let src = &src[srcoff..]; |
| 572 | for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) { |
| 573 | (&mut out[0..size]).copy_from_slice(&src[0..size]); |
| 574 | } |
| 575 | return; |
| 576 | } |
| 577 | let (w, h) = reffrm.get_dimensions(plane); |
| 578 | let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize; |
| 579 | let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize; |
| 580 | let bsize = (size as isize) + (EDGE_PRE as isize) + (EDGE_POST as isize); |
| 581 | let ref_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize); |
| 582 | let ref_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize); |
| 583 | |
| 584 | let (src, sstride) = if (ref_x < 0) || (ref_x + bsize > wa) || (ref_y < 0) || (ref_y + bsize > ha) { |
| 585 | edge_emu(&reffrm, ref_x, ref_y, bsize as usize, bsize as usize, mc_buf, 32, plane, 0); |
| 586 | (mc_buf as &[u8], 32) |
| 587 | } else { |
| 588 | let off = reffrm.get_offset(plane); |
| 589 | let stride = reffrm.get_stride(plane); |
| 590 | let data = reffrm.get_data(); |
| 591 | (&data[off + (ref_x as usize) + (ref_y as usize) * stride..], stride) |
| 592 | }; |
| 593 | let mx = (mvx & 7) as usize; |
| 594 | let my = (mvy & 7) as usize; |
| 595 | mc_block_common(dst, doff, dstride, src, sstride, size, mx, my); |
| 596 | } |
| 597 | pub fn mc_block16x16(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, |
| 598 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { |
| 599 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 16); |
| 600 | } |
| 601 | pub fn mc_block8x8(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, |
| 602 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { |
| 603 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 8); |
| 604 | } |
| 605 | pub fn mc_block4x4(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, |
| 606 | mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) { |
| 607 | mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 4); |
| 608 | } |
| 609 | pub fn mc_block_special(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize, |
| 610 | mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize, |
| 611 | mc_buf: &mut [u8], size: usize, pitch_mode: u8) { |
| 612 | const Y_MUL: [isize; 8] = [ 1, 0, 2, 4, 1, 1, 2, 2 ]; |
| 613 | const Y_OFF: [isize; 8] = [ 0, 4, 0, 0, 1, -1, 1, -1 ]; |
| 614 | const ILACE_CHROMA: [bool; 8] = [ false, false, true, true, false, false, true, true ]; // mode&2 != 0 |
| 615 | |
| 616 | let pitch_mode = (pitch_mode & 7) as usize; |
| 617 | let (xstep, ymul) = if plane == 0 { |
| 618 | (Y_OFF[pitch_mode], Y_MUL[pitch_mode]) |
| 619 | } else { |
| 620 | (0, if ILACE_CHROMA[pitch_mode] { 2 } else { 1 }) |
| 621 | }; |
| 622 | |
| 623 | let (w, h) = reffrm.get_dimensions(plane); |
| 624 | let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize; |
| 625 | let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize; |
| 626 | let mut start_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize); |
| 627 | let mut end_x = (xpos as isize) + ((mvx >> 3) as isize) + ((size + EDGE_POST) as isize); |
| 628 | if xstep < 0 { |
| 629 | start_x -= (size + EDGE_POST) as isize; |
| 630 | } else if xstep > 0 { |
| 631 | end_x += (size as isize) * xstep; |
| 632 | } |
| 633 | let mut start_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize) * ymul; |
| 634 | let mut end_y = (ypos as isize) + ((mvy >> 3) as isize) + ((size + EDGE_POST) as isize) * ymul; |
| 635 | if ymul == 0 { |
| 636 | start_y -= EDGE_PRE as isize; |
| 637 | end_y += (EDGE_POST + 1) as isize; |
| 638 | } |
| 639 | let off = reffrm.get_offset(plane); |
| 640 | let stride = reffrm.get_stride(plane); |
| 641 | let (src, sstride) = if (start_x >= 0) && (end_x <= wa) && (start_y >= 0) && (end_y <= ha) { |
| 642 | let data = reffrm.get_data(); |
| 643 | (&data[off + (start_x as usize) + (start_y as usize) * stride..], |
| 644 | ((stride as isize) + xstep) as usize) |
| 645 | } else { |
| 646 | let add = (size + EDGE_PRE + EDGE_POST) * (xstep.abs() as usize); |
| 647 | let bw = size + EDGE_PRE + EDGE_POST + add; |
| 648 | let bh = (end_y - start_y) as usize; |
| 649 | let bo = if xstep >= 0 { 0 } else { add }; |
| 650 | edge_emu(&reffrm, start_x + (bo as isize), start_y, bw, bh, mc_buf, 128, plane, 0); |
| 651 | (&mc_buf[bo..], (128 + xstep) as usize) |
| 652 | }; |
| 653 | let mx = (mvx & 7) as usize; |
| 654 | let my = (mvy & 7) as usize; |
| 655 | match ymul { |
| 656 | 0 => unimplemented!(), |
| 657 | 1 => mc_block_common(dst, doff, dstride, src, sstride, size, mx, my), |
| 658 | 2 => { |
| 659 | let hsize = size / 2; |
| 660 | for y in 0..2 { |
| 661 | for x in 0..2 { |
| 662 | mc_block_common(dst, doff + x * hsize + y * hsize * dstride, dstride, |
| 663 | &src[x * hsize + y * sstride..], sstride * 2, hsize, mx, my); |
| 664 | } |
| 665 | } |
| 666 | }, |
| 667 | 4 => { |
| 668 | let qsize = size / 4; |
| 669 | for y in 0..4 { |
| 670 | for x in 0..4 { |
| 671 | mc_block_common(dst, doff + x * qsize + y * qsize * dstride, dstride, |
| 672 | &src[x * qsize + y * sstride..], sstride * 4, qsize, mx, my); |
| 673 | } |
| 674 | } |
| 675 | }, |
| 676 | _ => unreachable!(), |
| 677 | }; |
| 678 | } |
| 679 | |
| 680 | pub fn fade_frame(srcfrm: NAVideoBufferRef<u8>, dstfrm: &mut NASimpleVideoFrame<u8>, alpha: u16, beta: u16) { |
| 681 | let mut fade_lut = [0u8; 256]; |
| 682 | for (i, el) in fade_lut.iter_mut().enumerate() { |
| 683 | let y = i as u16; |
| 684 | *el = (y + ((y * beta) >> 8) + alpha).max(0).min(255) as u8; |
| 685 | } |
| 686 | |
| 687 | let (w, h) = srcfrm.get_dimensions(0); |
| 688 | let (wa, ha) = ((w + 15) & !15, (h + 15) & !15); |
| 689 | let soff = srcfrm.get_offset(0); |
| 690 | let sstride = srcfrm.get_stride(0); |
| 691 | let sdata = srcfrm.get_data(); |
| 692 | let src = &sdata[soff..]; |
| 693 | let dstride = dstfrm.stride[0]; |
| 694 | let dst = &mut dstfrm.data[dstfrm.offset[0]..]; |
| 695 | for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) { |
| 696 | for (s, d) in src.iter().zip(dst.iter_mut()).take(wa) { |
| 697 | *d = fade_lut[*s as usize]; |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | for plane in 1..3 { |
| 702 | let (w, h) = srcfrm.get_dimensions(plane); |
| 703 | let (wa, ha) = ((w + 7) & !7, (h + 7) & !7); |
| 704 | let soff = srcfrm.get_offset(plane); |
| 705 | let sstride = srcfrm.get_stride(plane); |
| 706 | let sdata = srcfrm.get_data(); |
| 707 | let src = &sdata[soff..]; |
| 708 | let dstride = dstfrm.stride[plane]; |
| 709 | let dst = &mut dstfrm.data[dstfrm.offset[plane]..]; |
| 710 | for (src, dst) in src.chunks(sstride).zip(dst.chunks_mut(dstride)).take(ha) { |
| 711 | (&mut dst[0..wa]).copy_from_slice(&src[0..wa]); |
| 712 | } |
| 713 | } |
| 714 | } |