| 1 | use nihav_codec_support::codecs::blockdsp::*; |
| 2 | |
| 3 | #[allow(dead_code)] |
| 4 | #[derive(Debug,Clone,Copy)] |
| 5 | pub enum PredType4x4 { |
| 6 | Ver, |
| 7 | Hor, |
| 8 | DC, |
| 9 | DiagDownLeft, |
| 10 | DiagDownRight, |
| 11 | VerRight, |
| 12 | HorDown, |
| 13 | VerLeft, |
| 14 | HorUp, |
| 15 | LeftDC, |
| 16 | TopDC, |
| 17 | DC128, |
| 18 | } |
| 19 | |
| 20 | #[allow(dead_code)] |
| 21 | #[derive(Debug,Clone,Copy)] |
| 22 | pub enum PredType8x8 { |
| 23 | DC, |
| 24 | Hor, |
| 25 | Ver, |
| 26 | Plane, |
| 27 | LeftDC, |
| 28 | TopDC, |
| 29 | DC128 |
| 30 | } |
| 31 | |
| 32 | pub const INTRA_PRED16: [PredType8x8; 4] = [ |
| 33 | PredType8x8::DC, PredType8x8::Hor, PredType8x8::Ver, PredType8x8::Plane |
| 34 | ]; |
| 35 | pub const INTRA_PRED4: [PredType4x4; 9] = [ |
| 36 | PredType4x4::Ver, PredType4x4::Hor, PredType4x4::DC, |
| 37 | PredType4x4::DiagDownLeft, PredType4x4::DiagDownRight, |
| 38 | PredType4x4::VerRight, PredType4x4::HorDown, |
| 39 | PredType4x4::VerLeft, PredType4x4::HorUp |
| 40 | ]; |
| 41 | |
| 42 | |
| 43 | const SVQ3_QUANTS: [i32; 32] = [ |
| 44 | 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718, |
| 45 | 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873, |
| 46 | 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683, |
| 47 | 61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533 |
| 48 | ]; |
| 49 | |
| 50 | pub fn chroma_transform(blk: &mut [i16; 4]) { |
| 51 | let t0 = blk[0] + blk[2]; |
| 52 | let t1 = blk[0] - blk[2]; |
| 53 | let t2 = blk[1] + blk[3]; |
| 54 | let t3 = blk[1] - blk[3]; |
| 55 | blk[0] = t0 + t2; |
| 56 | blk[1] = t0 - t2; |
| 57 | blk[2] = t1 + t3; |
| 58 | blk[3] = t1 - t3; |
| 59 | } |
| 60 | |
| 61 | pub fn idct_dc_coeffs(blk: &mut [i16; 16], q: u8) { |
| 62 | let quant = SVQ3_QUANTS[q as usize]; |
| 63 | let mut tmp = [0i32; 16]; |
| 64 | for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { |
| 65 | let s0 = i32::from(src[0]); |
| 66 | let s1 = i32::from(src[1]); |
| 67 | let s2 = i32::from(src[2]); |
| 68 | let s3 = i32::from(src[3]); |
| 69 | let t0 = 13 * (s0 + s2); |
| 70 | let t1 = 13 * (s0 - s2); |
| 71 | let t2 = 17 * s1 + 7 * s3; |
| 72 | let t3 = 7 * s1 - 17 * s3; |
| 73 | dst[0] = t0 + t2; |
| 74 | dst[1] = t1 + t3; |
| 75 | dst[2] = t1 - t3; |
| 76 | dst[3] = t0 - t2; |
| 77 | } |
| 78 | for i in 0..4 { |
| 79 | let s0 = tmp[i]; |
| 80 | let s1 = tmp[i + 4]; |
| 81 | let s2 = tmp[i + 4 * 2]; |
| 82 | let s3 = tmp[i + 4 * 3]; |
| 83 | let t0 = 13 * (s0 + s2); |
| 84 | let t1 = 13 * (s0 - s2); |
| 85 | let t2 = 17 * s1 + 7 * s3; |
| 86 | let t3 = 7 * s1 - 17 * s3; |
| 87 | blk[i] = (((t0 + t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; |
| 88 | blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; |
| 89 | blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; |
| 90 | blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + (1 << 19)) >> 20) as i16; |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | pub fn idct(blk: &mut [i16; 16], q: u8, chroma: bool) { |
| 95 | let quant = SVQ3_QUANTS[q as usize]; |
| 96 | let mut tmp = [0i32; 16]; |
| 97 | let dc = 13 * 13 * if chroma { quant * i32::from(blk[0]) / 2 } else { i32::from(blk[0]) * 1538 }; |
| 98 | blk[0] = 0; |
| 99 | for (src, dst) in blk.chunks(4).zip(tmp.chunks_mut(4)) { |
| 100 | let s0 = i32::from(src[0]); |
| 101 | let s1 = i32::from(src[1]); |
| 102 | let s2 = i32::from(src[2]); |
| 103 | let s3 = i32::from(src[3]); |
| 104 | let t0 = 13 * (s0 + s2); |
| 105 | let t1 = 13 * (s0 - s2); |
| 106 | let t2 = 17 * s1 + 7 * s3; |
| 107 | let t3 = 7 * s1 - 17 * s3; |
| 108 | dst[0] = t0 + t2; |
| 109 | dst[1] = t1 + t3; |
| 110 | dst[2] = t1 - t3; |
| 111 | dst[3] = t0 - t2; |
| 112 | } |
| 113 | for i in 0..4 { |
| 114 | let s0 = tmp[i]; |
| 115 | let s1 = tmp[i + 4]; |
| 116 | let s2 = tmp[i + 4 * 2]; |
| 117 | let s3 = tmp[i + 4 * 3]; |
| 118 | let t0 = 13 * (s0 + s2); |
| 119 | let t1 = 13 * (s0 - s2); |
| 120 | let t2 = 17 * s1 + 7 * s3; |
| 121 | let t3 = 7 * s1 - 17 * s3; |
| 122 | blk[i] = (((t0 + t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; |
| 123 | blk[i + 4] = (((t1 + t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; |
| 124 | blk[i + 4 * 2] = (((t1 - t3).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; |
| 125 | blk[i + 4 * 3] = (((t0 - t2).wrapping_mul(quant) + dc + (1 << 19)) >> 20) as i16; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | pub fn add_coeffs(dst: &mut [u8], offset: usize, stride: usize, coeffs: &[i16]) { |
| 130 | let out = &mut dst[offset..][..stride * 3 + 4]; |
| 131 | for (line, src) in out.chunks_mut(stride).take(4).zip(coeffs.chunks(4)) { |
| 132 | for (dst, src) in line.iter_mut().take(4).zip(src.iter()) { |
| 133 | *dst = (i32::from(*dst) + i32::from(*src)).max(0).min(255) as u8; |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | pub fn avg(dst: &mut [u8], dstride: usize, |
| 139 | src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 140 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { |
| 141 | for (dst, src) in dline.iter_mut().zip(sline.iter()).take(bw) { |
| 142 | *dst = ((u16::from(*dst) + u16::from(*src) + 1) >> 1) as u8; |
| 143 | } |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | fn clip8(val: i16) -> u8 { val.max(0).min(255) as u8 } |
| 148 | |
| 149 | fn ipred_dc128(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { |
| 150 | for _ in 0..bsize { |
| 151 | for x in 0..bsize { buf[idx + x] = 128; } |
| 152 | idx += stride; |
| 153 | } |
| 154 | } |
| 155 | fn ipred_ver(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { |
| 156 | let oidx = idx - stride; |
| 157 | for _ in 0..bsize { |
| 158 | for x in 0..bsize { buf[idx + x] = buf[oidx + x]; } |
| 159 | idx += stride; |
| 160 | } |
| 161 | } |
| 162 | fn ipred_hor(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize) { |
| 163 | for _ in 0..bsize { |
| 164 | for x in 0..bsize { buf[idx + x] = buf[idx - 1]; } |
| 165 | idx += stride; |
| 166 | } |
| 167 | } |
| 168 | fn ipred_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { |
| 169 | let mut adc: u16 = 0; |
| 170 | for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } |
| 171 | for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } |
| 172 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; |
| 173 | |
| 174 | for _ in 0..bsize { |
| 175 | for x in 0..bsize { buf[idx + x] = dc; } |
| 176 | idx += stride; |
| 177 | } |
| 178 | } |
| 179 | fn ipred_left_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { |
| 180 | let mut adc: u16 = 0; |
| 181 | for i in 0..bsize { adc += u16::from(buf[idx - 1 + i * stride]); } |
| 182 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; |
| 183 | |
| 184 | for _ in 0..bsize { |
| 185 | for x in 0..bsize { buf[idx + x] = dc; } |
| 186 | idx += stride; |
| 187 | } |
| 188 | } |
| 189 | fn ipred_top_dc(buf: &mut [u8], mut idx: usize, stride: usize, bsize: usize, shift: u8) { |
| 190 | let mut adc: u16 = 0; |
| 191 | for i in 0..bsize { adc += u16::from(buf[idx - stride + i]); } |
| 192 | let dc = ((adc + (1 << (shift - 1))) >> shift) as u8; |
| 193 | |
| 194 | for _ in 0..bsize { |
| 195 | for x in 0..bsize { buf[idx + x] = dc; } |
| 196 | idx += stride; |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | fn load_top(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { |
| 201 | for i in 0..len { dst[i] = u16::from(buf[idx - stride + i]); } |
| 202 | } |
| 203 | fn load_left(dst: &mut [u16], buf: &mut [u8], idx: usize, stride: usize, len: usize) { |
| 204 | for i in 0..len { dst[i] = u16::from(buf[idx - 1 + i * stride]); } |
| 205 | } |
| 206 | |
| 207 | fn ipred_4x4_ver(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 208 | ipred_ver(buf, idx, stride, 4); |
| 209 | } |
| 210 | fn ipred_4x4_hor(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 211 | ipred_hor(buf, idx, stride, 4); |
| 212 | } |
| 213 | fn ipred_4x4_diag_down_left(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 214 | let mut t: [u16; 8] = [0; 8]; |
| 215 | let mut l: [u16; 8] = [0; 8]; |
| 216 | load_top(&mut t, buf, idx, stride, 4); |
| 217 | load_left(&mut l, buf, idx, stride, 4); |
| 218 | let a = ((l[1] + t[1]) >> 1) as u8; |
| 219 | let b = ((l[2] + t[2]) >> 1) as u8; |
| 220 | let c = ((l[3] + t[3]) >> 1) as u8; |
| 221 | |
| 222 | let dst = &mut buf[idx..]; |
| 223 | dst[0] = a; dst[1] = b; dst[2] = c; dst[3] = c; |
| 224 | let dst = &mut buf[idx + stride..]; |
| 225 | dst[0] = b; dst[1] = c; dst[2] = c; dst[3] = c; |
| 226 | let dst = &mut buf[idx + stride * 2..]; |
| 227 | dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; |
| 228 | let dst = &mut buf[idx + stride * 3..]; |
| 229 | dst[0] = c; dst[1] = c; dst[2] = c; dst[3] = c; |
| 230 | } |
| 231 | fn ipred_4x4_diag_down_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 232 | let mut t: [u16; 5] = [0; 5]; |
| 233 | let mut l: [u16; 5] = [0; 5]; |
| 234 | load_top(&mut t, buf, idx - 1, stride, 5); |
| 235 | load_left(&mut l, buf, idx - stride, stride, 5); |
| 236 | let dst = &mut buf[idx..]; |
| 237 | |
| 238 | for j in 0..4 { |
| 239 | for i in 0..j { |
| 240 | dst[i + j * stride] = ((l[j - i - 1] + 2 * l[j - i] + l[j - i + 1] + 2) >> 2) as u8; |
| 241 | } |
| 242 | dst[j + j * stride] = ((l[1] + 2 * l[0] + t[1] + 2) >> 2) as u8; |
| 243 | for i in (j+1)..4 { |
| 244 | dst[i + j * stride] = ((t[i - j - 1] + 2 * t[i - j] + t[i - j + 1] + 2) >> 2) as u8; |
| 245 | } |
| 246 | } |
| 247 | } |
| 248 | fn ipred_4x4_ver_right(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 249 | let mut t: [u16; 5] = [0; 5]; |
| 250 | let mut l: [u16; 5] = [0; 5]; |
| 251 | load_top(&mut t, buf, idx - 1, stride, 5); |
| 252 | load_left(&mut l, buf, idx - stride, stride, 5); |
| 253 | let dst = &mut buf[idx..]; |
| 254 | |
| 255 | for j in 0..4 { |
| 256 | for i in 0..4 { |
| 257 | let zvr = ((2 * i) as i8) - (j as i8); |
| 258 | let pix; |
| 259 | if zvr >= 0 { |
| 260 | if (zvr & 1) == 0 { |
| 261 | pix = (t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 1) >> 1; |
| 262 | } else { |
| 263 | pix = (t[i - (j >> 1) - 1] + 2 * t[i - (j >> 1)] + t[i - (j >> 1) + 1] + 2) >> 2; |
| 264 | } |
| 265 | } else { |
| 266 | if zvr == -1 { |
| 267 | pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; |
| 268 | } else { |
| 269 | pix = (l[j] + 2 * l[j - 1] + l[j - 2] + 2) >> 2; |
| 270 | } |
| 271 | } |
| 272 | dst[i + j * stride] = pix as u8; |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | fn ipred_4x4_ver_left(buf: &mut [u8], idx: usize, stride: usize, tr: &[u8]) { |
| 277 | let mut t: [u16; 8] = [0; 8]; |
| 278 | load_top(&mut t, buf, idx, stride, 4); |
| 279 | for i in 0..4 { t[i + 4] = u16::from(tr[i]); } |
| 280 | let dst = &mut buf[idx..]; |
| 281 | |
| 282 | dst[0 + 0 * stride] = ((t[0] + t[1] + 1) >> 1) as u8; |
| 283 | let pix = ((t[1] + t[2] + 1) >> 1) as u8; |
| 284 | dst[1 + 0 * stride] = pix; |
| 285 | dst[0 + 2 * stride] = pix; |
| 286 | let pix = ((t[2] + t[3] + 1) >> 1) as u8; |
| 287 | dst[2 + 0 * stride] = pix; |
| 288 | dst[1 + 2 * stride] = pix; |
| 289 | let pix = ((t[3] + t[4] + 1) >> 1) as u8; |
| 290 | dst[3 + 0 * stride] = pix; |
| 291 | dst[2 + 2 * stride] = pix; |
| 292 | dst[3 + 2 * stride] = ((t[4] + t[5] + 1) >> 1) as u8; |
| 293 | dst[0 + 1 * stride] = ((t[0] + 2*t[1] + t[2] + 2) >> 2) as u8; |
| 294 | let pix = ((t[1] + 2*t[2] + t[3] + 2) >> 2) as u8; |
| 295 | dst[1 + 1 * stride] = pix; |
| 296 | dst[0 + 3 * stride] = pix; |
| 297 | let pix = ((t[2] + 2*t[3] + t[4] + 2) >> 2) as u8; |
| 298 | dst[2 + 1 * stride] = pix; |
| 299 | dst[1 + 3 * stride] = pix; |
| 300 | let pix = ((t[3] + 2*t[4] + t[5] + 2) >> 2) as u8; |
| 301 | dst[3 + 1 * stride] = pix; |
| 302 | dst[2 + 3 * stride] = pix; |
| 303 | dst[3 + 3 * stride] = ((t[4] + 2*t[5] + t[6] + 2) >> 2) as u8; |
| 304 | } |
| 305 | fn ipred_4x4_hor_down(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 306 | let mut t: [u16; 5] = [0; 5]; |
| 307 | let mut l: [u16; 5] = [0; 5]; |
| 308 | load_top(&mut t, buf, idx - 1, stride, 5); |
| 309 | load_left(&mut l, buf, idx - stride, stride, 5); |
| 310 | let dst = &mut buf[idx..]; |
| 311 | |
| 312 | for j in 0..4 { |
| 313 | for i in 0..4 { |
| 314 | let zhd = ((2 * j) as i8) - (i as i8); |
| 315 | let pix; |
| 316 | if zhd >= 0 { |
| 317 | if (zhd & 1) == 0 { |
| 318 | pix = (l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 1) >> 1; |
| 319 | } else { |
| 320 | pix = (l[j - (i >> 1) - 1] + 2 * l[j - (i >> 1)] + l[j - (i >> 1) + 1] + 2) >> 2; |
| 321 | } |
| 322 | } else { |
| 323 | if zhd == -1 { |
| 324 | pix = (l[1] + 2 * l[0] + t[1] + 2) >> 2; |
| 325 | } else { |
| 326 | pix = (t[i - 2] + 2 * t[i - 1] + t[i] + 2) >> 2; |
| 327 | } |
| 328 | } |
| 329 | dst[i + j * stride] = pix as u8; |
| 330 | } |
| 331 | } |
| 332 | } |
| 333 | fn ipred_4x4_hor_up(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 334 | let mut l: [u16; 8] = [0; 8]; |
| 335 | load_left(&mut l, buf, idx, stride, 8); |
| 336 | let dst = &mut buf[idx..]; |
| 337 | |
| 338 | dst[0 + 0 * stride] = ((l[0] + l[1] + 1) >> 1) as u8; |
| 339 | dst[1 + 0 * stride] = ((l[0] + 2*l[1] + l[2] + 2) >> 2) as u8; |
| 340 | let pix = ((l[1] + l[2] + 1) >> 1) as u8; |
| 341 | dst[2 + 0 * stride] = pix; |
| 342 | dst[0 + 1 * stride] = pix; |
| 343 | let pix = ((l[1] + 2*l[2] + l[3] + 2) >> 2) as u8; |
| 344 | dst[3 + 0 * stride] = pix; |
| 345 | dst[1 + 1 * stride] = pix; |
| 346 | let pix = ((l[2] + l[3] + 1) >> 1) as u8; |
| 347 | dst[2 + 1 * stride] = pix; |
| 348 | dst[0 + 2 * stride] = pix; |
| 349 | let pix = ((l[2] + 3*l[3] + 2) >> 2) as u8; |
| 350 | dst[3 + 1 * stride] = pix; |
| 351 | dst[1 + 2 * stride] = pix; |
| 352 | dst[3 + 2 * stride] = l[3] as u8; |
| 353 | dst[1 + 3 * stride] = l[3] as u8; |
| 354 | dst[0 + 3 * stride] = l[3] as u8; |
| 355 | dst[2 + 2 * stride] = l[3] as u8; |
| 356 | dst[2 + 3 * stride] = l[3] as u8; |
| 357 | dst[3 + 3 * stride] = l[3] as u8; |
| 358 | } |
| 359 | fn ipred_4x4_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 360 | ipred_dc(buf, idx, stride, 4, 3); |
| 361 | } |
| 362 | fn ipred_4x4_left_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 363 | ipred_left_dc(buf, idx, stride, 4, 2); |
| 364 | } |
| 365 | fn ipred_4x4_top_dc(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 366 | ipred_top_dc(buf, idx, stride, 4, 2); |
| 367 | } |
| 368 | fn ipred_4x4_dc128(buf: &mut [u8], idx: usize, stride: usize, _tr: &[u8]) { |
| 369 | ipred_dc128(buf, idx, stride, 4); |
| 370 | } |
| 371 | |
| 372 | fn ipred_8x8_ver(buf: &mut [u8], idx: usize, stride: usize) { |
| 373 | ipred_ver(buf, idx, stride, 8); |
| 374 | } |
| 375 | fn ipred_8x8_hor(buf: &mut [u8], idx: usize, stride: usize) { |
| 376 | ipred_hor(buf, idx, stride, 8); |
| 377 | } |
| 378 | fn ipred_8x8_dc(buf: &mut [u8], idx: usize, stride: usize) { |
| 379 | let mut t: [u16; 8] = [0; 8]; |
| 380 | load_top(&mut t, buf, idx, stride, 8); |
| 381 | let mut l: [u16; 8] = [0; 8]; |
| 382 | load_left(&mut l, buf, idx, stride, 8); |
| 383 | |
| 384 | let dc0 = ((t[0] + t[1] + t[2] + t[3] + l[0] + l[1] + l[2] + l[3] + 4) >> 3) as u8; |
| 385 | let sum1 = t[4] + t[5] + t[6] + t[7]; |
| 386 | let dc1 = ((sum1 + 2) >> 2) as u8; |
| 387 | let sum2 = l[4] + l[5] + l[6] + l[7]; |
| 388 | let dc2 = ((sum2 + 2) >> 2) as u8; |
| 389 | let dc3 = ((sum1 + sum2 + 4) >> 3) as u8; |
| 390 | |
| 391 | let dst = &mut buf[idx..]; |
| 392 | for row in dst.chunks_mut(stride).take(4) { |
| 393 | row[..4].copy_from_slice(&[dc0; 4]); |
| 394 | row[4..8].copy_from_slice(&[dc1; 4]); |
| 395 | } |
| 396 | for row in dst.chunks_mut(stride).skip(4).take(4) { |
| 397 | row[..4].copy_from_slice(&[dc2; 4]); |
| 398 | row[4..8].copy_from_slice(&[dc3; 4]); |
| 399 | } |
| 400 | } |
| 401 | fn ipred_8x8_left_dc(buf: &mut [u8], idx: usize, stride: usize) { |
| 402 | ipred_left_dc(buf, idx, stride, 8, 3); |
| 403 | } |
| 404 | fn ipred_8x8_top_dc(buf: &mut [u8], idx: usize, stride: usize) { |
| 405 | ipred_top_dc(buf, idx, stride, 8, 3); |
| 406 | } |
| 407 | fn ipred_8x8_dc128(buf: &mut [u8], idx: usize, stride: usize) { |
| 408 | ipred_dc128(buf, idx, stride, 8); |
| 409 | } |
| 410 | fn ipred_8x8_plane(_buf: &mut [u8], _idx: usize, _stride: usize) { |
| 411 | unreachable!(); |
| 412 | /* let mut h: i16 = 0; |
| 413 | let mut v: i16 = 0; |
| 414 | let idx0 = idx + 3 - stride; |
| 415 | let mut idx1 = idx + 4 * stride - 1; |
| 416 | let mut idx2 = idx + 2 * stride - 1; |
| 417 | for i in 0..4 { |
| 418 | let i1 = (i + 1) as i16; |
| 419 | h += i1 * (i16::from(buf[idx0 + i + 1]) - i16::from(buf[idx0 - i - 1])); |
| 420 | v += i1 * (i16::from(buf[idx1]) - i16::from(buf[idx2])); |
| 421 | idx1 += stride; |
| 422 | idx2 -= stride; |
| 423 | } |
| 424 | let b = (17 * h + 16) >> 5; |
| 425 | let c = (17 * v + 16) >> 5; |
| 426 | let mut a = 16 * (i16::from(buf[idx - 1 + 7 * stride]) + i16::from(buf[idx + 7 - stride])) - 3 * (b + c) + 16; |
| 427 | for line in buf[idx..].chunks_mut(stride).take(8) { |
| 428 | let mut acc = a; |
| 429 | for el in line.iter_mut().take(8) { |
| 430 | *el = clip8(acc >> 5); |
| 431 | acc += b; |
| 432 | } |
| 433 | a += c; |
| 434 | }*/ |
| 435 | } |
| 436 | |
| 437 | fn ipred_16x16_ver(buf: &mut [u8], idx: usize, stride: usize) { |
| 438 | ipred_ver(buf, idx, stride, 16); |
| 439 | } |
| 440 | fn ipred_16x16_hor(buf: &mut [u8], idx: usize, stride: usize) { |
| 441 | ipred_hor(buf, idx, stride, 16); |
| 442 | } |
| 443 | fn ipred_16x16_dc(buf: &mut [u8], idx: usize, stride: usize) { |
| 444 | ipred_dc(buf, idx, stride, 16, 5); |
| 445 | } |
| 446 | fn ipred_16x16_left_dc(buf: &mut [u8], idx: usize, stride: usize) { |
| 447 | ipred_left_dc(buf, idx, stride, 16, 4); |
| 448 | } |
| 449 | fn ipred_16x16_top_dc(buf: &mut [u8], idx: usize, stride: usize) { |
| 450 | ipred_top_dc(buf, idx, stride, 16, 4); |
| 451 | } |
| 452 | fn ipred_16x16_dc128(buf: &mut [u8], idx: usize, stride: usize) { |
| 453 | ipred_dc128(buf, idx, stride, 16); |
| 454 | } |
| 455 | fn ipred_16x16_plane(buf: &mut [u8], mut idx: usize, stride: usize) { |
| 456 | let idx0 = idx + 7 - stride; |
| 457 | let mut idx1 = idx + 8 * stride - 1; |
| 458 | let mut idx2 = idx1 - 2 * stride; |
| 459 | |
| 460 | let mut h = i16::from(buf[idx0 + 1]) - i16::from(buf[idx0 - 1]); |
| 461 | let mut v = i16::from(buf[idx1]) - i16::from(buf[idx2]); |
| 462 | |
| 463 | for k in 2..9 { |
| 464 | idx1 += stride; |
| 465 | idx2 -= stride; |
| 466 | h += (k as i16) * (i16::from(buf[idx0 + k]) - i16::from(buf[idx0 - k])); |
| 467 | v += (k as i16) * (i16::from(buf[idx1]) - i16::from(buf[idx2])); |
| 468 | } |
| 469 | h = 5 * (h / 4) / 16; |
| 470 | v = 5 * (v / 4) / 16; |
| 471 | std::mem::swap(&mut h, &mut v); |
| 472 | |
| 473 | let mut a = 16 * (i16::from(buf[idx - 1 + 15 * stride]) + i16::from(buf[idx + 15 - stride]) + 1) - 7 * (v + h); |
| 474 | |
| 475 | for _ in 0..16 { |
| 476 | let mut b = a; |
| 477 | a += v; |
| 478 | |
| 479 | for dst in buf[idx..].chunks_mut(4).take(4) { |
| 480 | dst[0] = clip8((b ) >> 5); |
| 481 | dst[1] = clip8((b + h) >> 5); |
| 482 | dst[2] = clip8((b + 2*h) >> 5); |
| 483 | dst[3] = clip8((b + 3*h) >> 5); |
| 484 | b += h * 4; |
| 485 | } |
| 486 | idx += stride; |
| 487 | } |
| 488 | } |
| 489 | |
| 490 | pub type IPred4x4Func = fn(buf: &mut [u8], off: usize, stride: usize, tr: &[u8]); |
| 491 | pub type IPred8x8Func = fn(buf: &mut [u8], off: usize, stride: usize); |
| 492 | |
| 493 | pub const IPRED_FUNCS4X4: [IPred4x4Func; 12] = [ |
| 494 | ipred_4x4_ver, ipred_4x4_hor, ipred_4x4_dc, |
| 495 | ipred_4x4_diag_down_left, ipred_4x4_diag_down_right, |
| 496 | ipred_4x4_ver_right, ipred_4x4_hor_down, ipred_4x4_ver_left, ipred_4x4_hor_up, |
| 497 | ipred_4x4_left_dc, ipred_4x4_top_dc, ipred_4x4_dc128 |
| 498 | ]; |
| 499 | |
| 500 | pub const IPRED_FUNCS8X8: [IPred8x8Func; 7] = [ |
| 501 | ipred_8x8_dc, ipred_8x8_hor, ipred_8x8_ver, ipred_8x8_plane, |
| 502 | ipred_8x8_left_dc, ipred_8x8_top_dc, ipred_8x8_dc128 |
| 503 | ]; |
| 504 | |
| 505 | pub const IPRED_FUNCS16X16: [IPred8x8Func; 7] = [ |
| 506 | ipred_16x16_dc, ipred_16x16_hor, ipred_16x16_ver, ipred_16x16_plane, |
| 507 | ipred_16x16_left_dc, ipred_16x16_top_dc, ipred_16x16_dc128 |
| 508 | ]; |
| 509 | |
| 510 | fn tpel_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 511 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { |
| 512 | dline[..bw].copy_from_slice(&sline[..bw]); |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | fn interp2(val: u32) -> u8 { |
| 517 | (((val + 1) * 683) >> 11) as u8 |
| 518 | } |
| 519 | |
| 520 | fn interp4(val: u32) -> u8 { |
| 521 | (((val + 6) * 2731) >> 15) as u8 |
| 522 | } |
| 523 | |
| 524 | fn tpel_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 525 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { |
| 526 | let mut last = u32::from(sline[0]); |
| 527 | for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { |
| 528 | let new = u32::from(*src); |
| 529 | *dst = interp2(last * 2 + new); |
| 530 | last = new; |
| 531 | } |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | fn tpel_interp02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 536 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(bh) { |
| 537 | let mut last = u32::from(sline[0]); |
| 538 | for (dst, src) in dline.iter_mut().take(bw).zip(sline[1..].iter()) { |
| 539 | let new = u32::from(*src); |
| 540 | *dst = interp2(last + new * 2); |
| 541 | last = new; |
| 542 | } |
| 543 | } |
| 544 | } |
| 545 | |
| 546 | fn tpel_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 547 | let src1 = &src[sstride..]; |
| 548 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { |
| 549 | for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { |
| 550 | *dst = interp2(u32::from(*s0) * 2 + u32::from(*s1)); |
| 551 | } |
| 552 | } |
| 553 | } |
| 554 | |
| 555 | fn tpel_interp11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 556 | let mut sidx0 = 0; |
| 557 | let mut sidx1 = sstride; |
| 558 | for dline in dst.chunks_mut(dstride).take(bh) { |
| 559 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { |
| 560 | *dst = interp4(u32::from(src[sidx0 + x]) * 4 + u32::from(src[sidx0 + x + 1]) * 3 + |
| 561 | u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 2); |
| 562 | } |
| 563 | sidx0 += sstride; |
| 564 | sidx1 += sstride; |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | fn tpel_interp12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 569 | let mut sidx0 = 0; |
| 570 | let mut sidx1 = sstride; |
| 571 | for dline in dst.chunks_mut(dstride).take(bh) { |
| 572 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { |
| 573 | *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 4 + |
| 574 | u32::from(src[sidx1 + x]) * 2 + u32::from(src[sidx1 + x + 1]) * 3); |
| 575 | } |
| 576 | sidx0 += sstride; |
| 577 | sidx1 += sstride; |
| 578 | } |
| 579 | } |
| 580 | |
| 581 | fn tpel_interp20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 582 | let src1 = &src[sstride..]; |
| 583 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(bh) { |
| 584 | for (dst, (s0, s1)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(bw) { |
| 585 | *dst = interp2(u32::from(*s0) + u32::from(*s1) * 2); |
| 586 | } |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | fn tpel_interp21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 591 | let mut sidx0 = 0; |
| 592 | let mut sidx1 = sstride; |
| 593 | for dline in dst.chunks_mut(dstride).take(bh) { |
| 594 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { |
| 595 | *dst = interp4(u32::from(src[sidx0 + x]) * 3 + u32::from(src[sidx0 + x + 1]) * 2 + |
| 596 | u32::from(src[sidx1 + x]) * 4 + u32::from(src[sidx1 + x + 1]) * 3); |
| 597 | } |
| 598 | sidx0 += sstride; |
| 599 | sidx1 += sstride; |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | fn tpel_interp22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) { |
| 604 | let mut sidx0 = 0; |
| 605 | let mut sidx1 = sstride; |
| 606 | for dline in dst.chunks_mut(dstride).take(bh) { |
| 607 | for (x, dst) in dline.iter_mut().take(bw).enumerate() { |
| 608 | *dst = interp4(u32::from(src[sidx0 + x]) * 2 + u32::from(src[sidx0 + x + 1]) * 3 + |
| 609 | u32::from(src[sidx1 + x]) * 3 + u32::from(src[sidx1 + x + 1]) * 4); |
| 610 | } |
| 611 | sidx0 += sstride; |
| 612 | sidx1 += sstride; |
| 613 | } |
| 614 | } |
| 615 | |
| 616 | pub const THIRDPEL_INTERP_FUNCS: &[BlkInterpFunc] = &[ |
| 617 | tpel_interp00, tpel_interp01, tpel_interp02, |
| 618 | tpel_interp10, tpel_interp11, tpel_interp12, |
| 619 | tpel_interp20, tpel_interp21, tpel_interp22 |
| 620 | ]; |