1 use std::cmp::Ordering;
2 use nihav_core::frame::*;
3 use nihav_codec_support::codecs::blockdsp::edge_emu;
5 fn clip_u8(val: i16) -> u8 {
6 val.max(0).min(255) as u8
9 pub struct IPredContext {
18 pub fn fill(&mut self, src: &[u8], off: usize, stride: usize, tsize: usize, lsize: usize) {
21 self.top[i] = src[off - stride + i];
27 self.top = [0x80; 16];
31 self.left[i] = src[off - 1 + i * stride];
37 self.left = [0x80; 16];
39 if self.has_top && self.has_left {
40 self.tl = src[off - stride - 1];
47 impl Default for IPredContext {
48 fn default() -> Self {
59 pub fn add_coeffs4x4(dst: &mut [u8], off: usize, stride: usize, coeffs: &[i16; 16]) {
60 let dst = &mut dst[off..];
61 for (out, src) in dst.chunks_mut(stride).zip(coeffs.chunks(4)) {
62 for (oel, iel) in out.iter_mut().take(4).zip(src.iter()) {
63 *oel = clip_u8(i16::from(*oel) + *iel);
67 pub fn add_coeffs16x1(dst: &mut [u8], off: usize, coeffs: &[i16; 16]) {
68 let dst = &mut dst[off..];
69 for (oel, iel) in dst.iter_mut().take(16).zip(coeffs.iter()) {
70 *oel = clip_u8(i16::from(*oel) + *iel);
76 #[allow(clippy::needless_late_init)]
77 fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
79 if !ipred.has_left && !ipred.has_top {
83 let mut dcshift = match Self::SIZE {
88 for el in ipred.left.iter().take(Self::SIZE) {
89 dcsum += u16::from(*el);
94 for el in ipred.top.iter().take(Self::SIZE) {
95 dcsum += u16::from(*el);
99 dc = ((dcsum + (1 << (dcshift - 1))) >> dcshift) as u8;
101 for _ in 0..Self::SIZE {
102 let out = &mut dst[off..][..Self::SIZE];
103 for el in out.iter_mut() {
109 fn ipred_v(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
110 for _ in 0..Self::SIZE {
111 let out = &mut dst[off..][..Self::SIZE];
112 out.copy_from_slice(&ipred.top[0..Self::SIZE]);
116 fn ipred_h(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
117 for leftel in ipred.left.iter().take(Self::SIZE) {
118 let out = &mut dst[off..][..Self::SIZE];
119 for el in out.iter_mut() {
125 fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
126 let tl = i16::from(ipred.tl);
127 for m in 0..Self::SIZE {
128 for n in 0..Self::SIZE {
129 dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl);
134 fn ipred_const(dst: &mut [u8], off: usize, stride: usize, dc: u8) {
135 for row in dst[off..].chunks_mut(stride).take(Self::SIZE) {
136 for el in row[..Self::SIZE].iter_mut() {
143 pub struct IPred16x16 {}
144 impl IntraPred for IPred16x16 { const SIZE: usize = 16; }
146 pub struct IPred8x8 {}
147 impl IntraPred for IPred8x8 { const SIZE: usize = 8; }
149 macro_rules! load_pred4 {
150 (topleft; $ipred: expr) => {{
151 let tl = u16::from($ipred.tl);
152 let a0 = u16::from($ipred.top[0]);
153 let l0 = u16::from($ipred.left[0]);
154 ((l0 + tl * 2 + a0 + 2) >> 2) as u8
156 (top; $ipred: expr) => {{
157 let tl = u16::from($ipred.tl);
158 let a0 = u16::from($ipred.top[0]);
159 let a1 = u16::from($ipred.top[1]);
160 let a2 = u16::from($ipred.top[2]);
161 let a3 = u16::from($ipred.top[3]);
162 let a4 = u16::from($ipred.top[4]);
163 let p0 = ((tl + a0 * 2 + a1 + 2) >> 2) as u8;
164 let p1 = ((a0 + a1 * 2 + a2 + 2) >> 2) as u8;
165 let p2 = ((a1 + a2 * 2 + a3 + 2) >> 2) as u8;
166 let p3 = ((a2 + a3 * 2 + a4 + 2) >> 2) as u8;
169 (top8; $ipred: expr) => {{
170 let t3 = u16::from($ipred.top[3]);
171 let t4 = u16::from($ipred.top[4]);
172 let t5 = u16::from($ipred.top[5]);
173 let t6 = u16::from($ipred.top[6]);
174 let t7 = u16::from($ipred.top[7]);
175 let p4 = ((t3 + t4 * 2 + t5 + 2) >> 2) as u8;
176 let p5 = ((t4 + t5 * 2 + t6 + 2) >> 2) as u8;
177 let p6 = ((t5 + t6 * 2 + t7 + 2) >> 2) as u8;
178 let p7 = ((t6 + t7 * 2 + t7 + 2) >> 2) as u8;
181 (topavg; $ipred: expr) => {{
182 let tl = u16::from($ipred.tl);
183 let a0 = u16::from($ipred.top[0]);
184 let a1 = u16::from($ipred.top[1]);
185 let a2 = u16::from($ipred.top[2]);
186 let a3 = u16::from($ipred.top[3]);
187 let p0 = ((tl + a0 + 1) >> 1) as u8;
188 let p1 = ((a0 + a1 + 1) >> 1) as u8;
189 let p2 = ((a1 + a2 + 1) >> 1) as u8;
190 let p3 = ((a2 + a3 + 1) >> 1) as u8;
193 (left; $ipred: expr) => {{
194 let tl = u16::from($ipred.tl);
195 let l0 = u16::from($ipred.left[0]);
196 let l1 = u16::from($ipred.left[1]);
197 let l2 = u16::from($ipred.left[2]);
198 let l3 = u16::from($ipred.left[3]);
199 let l4 = u16::from($ipred.left[4]);
200 let p0 = ((tl + l0 * 2 + l1 + 2) >> 2) as u8;
201 let p1 = ((l0 + l1 * 2 + l2 + 2) >> 2) as u8;
202 let p2 = ((l1 + l2 * 2 + l3 + 2) >> 2) as u8;
203 let p3 = ((l2 + l3 * 2 + l4 + 2) >> 2) as u8;
206 (left8; $ipred: expr) => {{
207 let l3 = u16::from($ipred.left[3]);
208 let l4 = u16::from($ipred.left[4]);
209 let l5 = u16::from($ipred.left[5]);
210 let l6 = u16::from($ipred.left[6]);
211 let l7 = u16::from($ipred.left[7]);
212 let p4 = ((l3 + l4 * 2 + l5 + 2) >> 2) as u8;
213 let p5 = ((l4 + l5 * 2 + l6 + 2) >> 2) as u8;
214 let p6 = ((l5 + l6 * 2 + l7 + 2) >> 2) as u8;
215 let p7 = ((l6 + l7 * 2 + l7 + 2) >> 2) as u8;
218 (leftavg; $ipred: expr) => {{
219 let tl = u16::from($ipred.tl);
220 let l0 = u16::from($ipred.left[0]);
221 let l1 = u16::from($ipred.left[1]);
222 let l2 = u16::from($ipred.left[2]);
223 let l3 = u16::from($ipred.left[3]);
224 let p0 = ((tl + l0 + 1) >> 1) as u8;
225 let p1 = ((l0 + l1 + 1) >> 1) as u8;
226 let p2 = ((l1 + l2 + 1) >> 1) as u8;
227 let p3 = ((l2 + l3 + 1) >> 1) as u8;
232 pub struct IPred4x4 {}
234 pub fn ipred_dc(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
236 for el in ipred.left.iter().take(4) {
237 dcsum += u16::from(*el);
239 for el in ipred.top.iter().take(4) {
240 dcsum += u16::from(*el);
242 let dc = ((dcsum + (1 << 2)) >> 3) as u8;
244 let out = &mut dst[off..][..4];
245 for el in out.iter_mut() {
251 pub fn ipred_tm(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
252 let tl = i16::from(ipred.tl);
255 dst[off + n] = clip_u8(i16::from(ipred.left[m]) + i16::from(ipred.top[n]) - tl);
260 pub fn ipred_ve(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
261 let (v0, v1, v2, v3) = load_pred4!(top; ipred);
262 let vert_pred = [v0, v1, v2, v3];
264 let out = &mut dst[off..][..4];
265 out.copy_from_slice(&vert_pred);
269 pub fn ipred_he(dst: &mut [u8], off: usize, stride: usize, ipred: &IPredContext) {
270 let (p0, p1, p2, _) = load_pred4!(left; ipred);
271 let p3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8;
272 let hor_pred = [p0, p1, p2, p3];
273 for (dline, &pred) in dst[off..].chunks_mut(stride).zip(hor_pred.iter()) {
274 for el in dline[..4].iter_mut() {
279 pub fn ipred_ld(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
280 let (_, p0, p1, p2) = load_pred4!(top; ipred);
281 let (p3, p4, p5, p6) = load_pred4!(top8; ipred);
283 dst[off + 0] = p0; dst[off + 1] = p1; dst[off + 2] = p2; dst[off + 3] = p3;
285 dst[off + 0] = p1; dst[off + 1] = p2; dst[off + 2] = p3; dst[off + 3] = p4;
287 dst[off + 0] = p2; dst[off + 1] = p3; dst[off + 2] = p4; dst[off + 3] = p5;
289 dst[off + 0] = p3; dst[off + 1] = p4; dst[off + 2] = p5; dst[off + 3] = p6;
291 pub fn ipred_rd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
292 let tl = load_pred4!(topleft; ipred);
293 let (l0, l1, l2, _) = load_pred4!(left; ipred);
294 let (t0, t1, t2, _) = load_pred4!(top; ipred);
296 dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2;
298 dst[off + 0] = l0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1;
300 dst[off + 0] = l1; dst[off + 1] = l0; dst[off + 2] = tl; dst[off + 3] = t0;
302 dst[off + 0] = l2; dst[off + 1] = l1; dst[off + 2] = l0; dst[off + 3] = tl;
304 pub fn ipred_vr(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
305 let tl = load_pred4!(topleft; ipred);
306 let (l0, l1, _, _) = load_pred4!(left; ipred);
307 let (t0, t1, t2, _) = load_pred4!(top; ipred);
308 let (m0, m1, m2, m3) = load_pred4!(topavg; ipred);
310 dst[off + 0] = m0; dst[off + 1] = m1; dst[off + 2] = m2; dst[off + 3] = m3;
312 dst[off + 0] = tl; dst[off + 1] = t0; dst[off + 2] = t1; dst[off + 3] = t2;
314 dst[off + 0] = l0; dst[off + 1] = m0; dst[off + 2] = m1; dst[off + 3] = m2;
316 dst[off + 0] = l1; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1;
318 pub fn ipred_vl(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
319 let (_, t1, t2, t3) = load_pred4!(top; ipred);
320 let (t4, t5, t6, _) = load_pred4!(top8; ipred);
321 let (_, m1, m2, m3) = load_pred4!(topavg; ipred);
322 let m4 = ((u16::from(ipred.top[3]) + u16::from(ipred.top[4]) + 1) >> 1) as u8;
324 dst[off + 0] = m1; dst[off + 1] = m2; dst[off + 2] = m3; dst[off + 3] = m4;
326 dst[off + 0] = t1; dst[off + 1] = t2; dst[off + 2] = t3; dst[off + 3] = t4;
328 dst[off + 0] = m2; dst[off + 1] = m3; dst[off + 2] = m4; dst[off + 3] = t5;
330 dst[off + 0] = t2; dst[off + 1] = t3; dst[off + 2] = t4; dst[off + 3] = t6;
332 pub fn ipred_hd(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
333 let tl = load_pred4!(topleft; ipred);
334 let (l0, l1, l2, _) = load_pred4!(left; ipred);
335 let (m0, m1, m2, m3) = load_pred4!(leftavg; ipred);
336 let (t0, t1, _, _) = load_pred4!(top; ipred);
338 dst[off + 0] = m0; dst[off + 1] = tl; dst[off + 2] = t0; dst[off + 3] = t1;
340 dst[off + 0] = m1; dst[off + 1] = l0; dst[off + 2] = m0; dst[off + 3] = tl;
342 dst[off + 0] = m2; dst[off + 1] = l1; dst[off + 2] = m1; dst[off + 3] = l0;
344 dst[off + 0] = m3; dst[off + 1] = l2; dst[off + 2] = m2; dst[off + 3] = l1;
346 pub fn ipred_hu(dst: &mut [u8], mut off: usize, stride: usize, ipred: &IPredContext) {
347 let (_, m1, m2, m3) = load_pred4!(leftavg; ipred);
348 let (_, l1, l2, _) = load_pred4!(left; ipred);
349 let l3 = ((u16::from(ipred.left[2]) + u16::from(ipred.left[3]) * 3 + 2) >> 2) as u8;
350 let p3 = ipred.left[3];
352 dst[off + 0] = m1; dst[off + 1] = l1; dst[off + 2] = m2; dst[off + 3] = l2;
354 dst[off + 0] = m2; dst[off + 1] = l2; dst[off + 2] = m3; dst[off + 3] = l3;
356 dst[off + 0] = m3; dst[off + 1] = l3; dst[off + 2] = p3; dst[off + 3] = p3;
358 dst[off + 0] = p3; dst[off + 1] = p3; dst[off + 2] = p3; dst[off + 3] = p3;
362 const VP7_BICUBIC_FILTERS: [[i16; 6]; 8] = [
363 [ 0, 0, 128, 0, 0, 0 ],
364 [ 0, -6, 123, 12, -1, 0 ],
365 [ 2, -11, 108, 36, -8, 1 ],
366 [ 0, -9, 93, 50, -6, 0 ],
367 [ 3, -16, 77, 77, -16, 3 ],
368 [ 0, -6, 50, 93, -9, 0 ],
369 [ 1, -8, 36, 108, -11, 2 ],
370 [ 0, -1, 12, 123, -6, 0 ]
373 macro_rules! interpolate {
374 ($src: expr, $off: expr, $step: expr, $mode: expr) => {{
375 let s0 = i32::from($src[$off + 0 * $step]);
376 let s1 = i32::from($src[$off + 1 * $step]);
377 let s2 = i32::from($src[$off + 2 * $step]);
378 let s3 = i32::from($src[$off + 3 * $step]);
379 let s4 = i32::from($src[$off + 4 * $step]);
380 let s5 = i32::from($src[$off + 5 * $step]);
381 let filt = &VP7_BICUBIC_FILTERS[$mode];
382 let src = [s0, s1, s2, s3, s4, s5];
384 for (s, c) in src.iter().zip(filt.iter()) {
385 val += s * i32::from(*c);
387 clip_u8((val >> 7) as i16)
391 const EDGE_PRE: usize = 2;
392 const EDGE_POST: usize = 4;
393 const TMP_STRIDE: usize = 16;
395 fn mc_block_common(dst: &mut [u8], mut doff: usize, dstride: usize, src: &[u8], sstride: usize, size: usize, mx: usize, my: usize) {
396 if (mx == 0) && (my == 0) {
397 let dst = &mut dst[doff..];
398 let src = &src[EDGE_PRE + EDGE_PRE * sstride..];
399 for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) {
400 out[..size].copy_from_slice(&src[..size]);
403 let src = &src[EDGE_PRE * sstride..];
404 for src in src.chunks(sstride).take(size) {
406 dst[doff + x] = interpolate!(src, x, 1, mx);
411 let src = &src[EDGE_PRE..];
414 dst[doff + x] = interpolate!(src, x + y * sstride, sstride, my);
419 let mut tmp = [0u8; TMP_STRIDE * (16 + EDGE_PRE + EDGE_POST)];
420 for (y, dst) in tmp.chunks_mut(TMP_STRIDE).take(size + EDGE_PRE + EDGE_POST).enumerate() {
422 dst[x] = interpolate!(src, x + y * sstride, 1, mx);
427 dst[doff + x] = interpolate!(tmp, x + y * TMP_STRIDE, TMP_STRIDE, my);
433 fn mc_block(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
434 mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize,
435 mc_buf: &mut [u8], size: usize) {
436 if (mvx == 0) && (mvy == 0) {
437 let dst = &mut dst[doff..];
438 let sstride = reffrm.get_stride(plane);
439 let srcoff = reffrm.get_offset(plane) + xpos + ypos * sstride;
440 let src = &reffrm.get_data();
441 let src = &src[srcoff..];
442 for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) {
443 out[..size].copy_from_slice(&src[..size]);
447 let (w, h) = reffrm.get_dimensions(plane);
448 let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize;
449 let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize;
450 let bsize = (size as isize) + (EDGE_PRE as isize) + (EDGE_POST as isize);
451 let ref_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize);
452 let ref_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize);
454 let (src, sstride) = if (ref_x < 0) || (ref_x + bsize > wa) || (ref_y < 0) || (ref_y + bsize > ha) {
455 edge_emu(&reffrm, ref_x, ref_y, bsize as usize, bsize as usize, mc_buf, 32, plane, 4);
456 (mc_buf as &[u8], 32)
458 let off = reffrm.get_offset(plane);
459 let stride = reffrm.get_stride(plane);
460 let data = reffrm.get_data();
461 (&data[off + (ref_x as usize) + (ref_y as usize) * stride..], stride)
463 let mx = (mvx & 7) as usize;
464 let my = (mvy & 7) as usize;
465 mc_block_common(dst, doff, dstride, src, sstride, size, mx, my);
467 pub fn mc_block16x16(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
468 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
469 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 16);
471 pub fn mc_block8x8(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
472 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
473 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 8);
475 pub fn mc_block4x4(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
476 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
477 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 4);
479 pub fn mc_block_special(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
480 mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize,
481 mc_buf: &mut [u8], size: usize, pitch_mode: u8) {
482 const Y_MUL: [isize; 8] = [ 1, 0, 2, 4, 1, 1, 2, 2 ];
483 const Y_OFF: [isize; 8] = [ 0, 4, 0, 0, 1, -1, 1, -1 ];
484 const ILACE_CHROMA: [bool; 8] = [ false, false, true, true, false, false, true, true ]; // mode&2 != 0
486 let pitch_mode = (pitch_mode & 7) as usize;
487 let (xstep, ymul) = if plane == 0 {
488 (Y_OFF[pitch_mode], Y_MUL[pitch_mode])
490 (0, if ILACE_CHROMA[pitch_mode] { 2 } else { 1 })
493 let (w, h) = reffrm.get_dimensions(plane);
494 let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize;
495 let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize;
496 let mut start_x = (xpos as isize) + ((mvx >> 3) as isize) - (EDGE_PRE as isize);
497 let mut end_x = (xpos as isize) + ((mvx >> 3) as isize) + ((size + EDGE_POST) as isize);
498 match xstep.cmp(&0) {
499 Ordering::Less => start_x -= (size + EDGE_POST) as isize,
500 Ordering::Greater => end_x += (size as isize) * xstep,
501 Ordering::Equal => {},
503 let mut start_y = (ypos as isize) + ((mvy >> 3) as isize) - (EDGE_PRE as isize) * ymul;
504 let mut end_y = (ypos as isize) + ((mvy >> 3) as isize) + ((size + EDGE_POST) as isize) * ymul;
506 start_y -= EDGE_PRE as isize;
507 end_y += (EDGE_POST + 1) as isize;
509 let off = reffrm.get_offset(plane);
510 let stride = reffrm.get_stride(plane);
511 let (src, sstride) = if (start_x >= 0) && (end_x <= wa) && (start_y >= 0) && (end_y <= ha) {
512 let data = reffrm.get_data();
513 (&data[off + (start_x as usize) + (start_y as usize) * stride..],
514 ((stride as isize) + xstep) as usize)
516 let add = (size + EDGE_PRE + EDGE_POST) * xstep.unsigned_abs();
517 let bw = size + EDGE_PRE + EDGE_POST + add;
518 let bh = (end_y - start_y) as usize;
519 let bo = if xstep >= 0 { 0 } else { add };
520 edge_emu(&reffrm, start_x + (bo as isize), start_y, bw, bh, mc_buf, 128, plane, 0);
521 (&mc_buf[bo..], (128 + xstep) as usize)
523 let mx = (mvx & 7) as usize;
524 let my = (mvy & 7) as usize;
526 0 => unimplemented!(),
527 1 => mc_block_common(dst, doff, dstride, src, sstride, size, mx, my),
529 let hsize = size / 2;
532 mc_block_common(dst, doff + x * hsize + y * hsize * dstride, dstride,
533 &src[x * hsize + y * sstride..], sstride * 2, hsize, mx, my);
538 let qsize = size / 4;
541 mc_block_common(dst, doff + x * qsize + y * qsize * dstride, dstride,
542 &src[x * qsize + y * sstride..], sstride * 4, qsize, mx, my);