3 const TMP_BUF_STRIDE: usize = 32;
5 fn interp_block1(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool) {
7 let step = if hor { 1 } else { sstride };
8 let avgidx = if avg0 { step * 2 } else { step * 3 };
9 let mut src = src.as_ptr();
10 let mut dst = dst.as_mut_ptr();
13 let t = clip_u8(( i16::from(*src)
14 - 5 * i16::from(*src.add(step))
15 + 20 * i16::from(*src.add(step * 2))
16 + 20 * i16::from(*src.add(step * 3))
17 - 5 * i16::from(*src.add(step * 4))
18 + i16::from(*src.add(step * 5))
20 *dst = ((u16::from(t) + u16::from(*src.add(avgidx)) + 1) >> 1) as u8;
24 dst = dst.sub(w).add(dstride);
25 src = src.sub(w).add(sstride);
30 fn interp_block2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool) {
32 let step = if hor { 1 } else { sstride };
33 let mut pix = dst.as_mut_ptr();
34 let mut src = src.as_ptr();
37 *pix.add(x) = clip_u8(( i16::from(*src)
38 - 5 * i16::from(*src.add(step))
39 + 20 * i16::from(*src.add(step * 2))
40 + 20 * i16::from(*src.add(step * 3))
41 - 5 * i16::from(*src.add(step * 4))
42 + i16::from(*src.add(step * 5))
46 pix = pix.add(dstride);
48 src = src.add(sstride);
53 fn mc_avg_tmp(dst: &mut [u8], dstride: usize, w: usize, h: usize, tmp: &[u8], tmp2: &[u8]) {
55 let mut src1 = tmp.as_ptr();
56 let mut src2 = tmp2.as_ptr();
57 let mut dst = dst.as_mut_ptr();
62 *dst.add(x) = ((u16::from(a) + u16::from(b) + 1) >> 1) as u8;
64 dst = dst.add(dstride);
65 src1 = src1.add(TMP_BUF_STRIDE);
66 src2 = src2.add(TMP_BUF_STRIDE);
71 fn h264_mc00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
73 let mut src = src.as_ptr();
74 let mut dst = dst.as_mut_ptr();
76 std::ptr::copy_nonoverlapping(src, dst, w);
77 src = src.add(sstride);
78 dst = dst.add(dstride);
83 fn h264_mc01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
84 interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true);
87 fn h264_mc02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
88 interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true);
91 fn h264_mc03(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
92 interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false);
95 fn h264_mc10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
96 interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true);
99 fn h264_mc11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
100 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
101 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
102 h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
103 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h);
104 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
107 fn h264_mc12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
108 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
109 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
110 h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
111 h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h);
112 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
115 fn h264_mc13(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
116 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
117 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
118 h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
119 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h);
120 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
123 fn h264_mc20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
124 interp_block2(dst, dstride, &src[2..], sstride, w, h, false);
127 fn h264_mc21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
128 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
129 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
130 h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
131 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h);
132 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
135 fn h264_mc22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
136 let mut tmp: [i32; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
138 let mut src = src.as_ptr();
139 let mut dst = tmp.as_mut_ptr();
142 *dst = i32::from(*src)
143 - 5 * i32::from(*src.add(sstride))
144 + 20 * i32::from(*src.add(sstride * 2))
145 + 20 * i32::from(*src.add(sstride * 3))
146 - 5 * i32::from(*src.add(sstride * 4))
147 + i32::from(*src.add(sstride * 5));
151 src = src.sub(w+5).add(sstride);
152 dst = dst.sub(w+5).add(TMP_BUF_STRIDE);
156 let mut dst = dst.as_mut_ptr();
157 let mut src = tmp.as_ptr();
160 *dst = clip_u8(((*src - 5 * *src.add(1) + 20 * *src.add(2) + 20 * *src.add(3) - 5 * *src.add(4) + *src.add(5) + 512) >> 10) as i16);
164 dst = dst.sub(w).add(dstride);
165 src = src.sub(w).add(TMP_BUF_STRIDE);
170 fn h264_mc23(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
171 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
172 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
173 h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
174 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h);
175 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
178 fn h264_mc30(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
179 interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false);
182 fn h264_mc31(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
183 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
184 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
185 h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
186 h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h);
187 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
190 fn h264_mc32(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
191 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
192 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
193 h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
194 h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h);
195 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
198 fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
199 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
200 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
201 h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h);
202 h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h);
203 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
207 pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) {
213 if a0 == 8 && b0 == 8 {
215 let mut src = src.as_ptr();
216 let mut dst = dst.as_mut_ptr();
218 std::ptr::copy_nonoverlapping(src, dst, w);
219 src = src.add(sstride);
220 dst = dst.add(dstride);
225 let mut src0 = src.as_ptr();
226 let mut src1 = src0.add(sstride);
227 let mut dst = dst.as_mut_ptr();
230 let a = *src0.add(x);
231 let b = *src1.add(x);
232 *dst.add(x) = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8;
234 src0 = src0.add(sstride);
235 src1 = src1.add(sstride);
236 dst = dst.add(dstride);
241 let mut src = src.as_ptr();
242 let mut dst = dst.as_mut_ptr();
246 let b = *src.add(x + 1);
247 *dst.add(x) = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8;
250 src = src.add(sstride);
251 dst = dst.add(dstride);
256 let mut src0 = src.as_ptr();
257 let mut src1 = src0.add(sstride);
258 let mut dst = dst.as_mut_ptr();
263 let b = *src0.add(x + 1);
264 let d = *src1.add(x + 1);
265 *dst.add(x) = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8;
269 src0 = src0.add(sstride);
270 src1 = src1.add(sstride);
271 dst = dst.add(dstride);
277 macro_rules! luma_mc {
278 ($orig:ident, $func4:ident, $func8:ident, $func16:ident) => {
279 fn $func4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {
280 $orig(dst, dstride, src, sstride, 4, h);
282 fn $func8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {
283 $orig(dst, dstride, src, sstride, 8, h);
285 fn $func16(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {
286 $orig(dst, dstride, src, sstride, 16, h);
291 luma_mc!(h264_mc00, h264_mc00_4, h264_mc00_8, h264_mc00_16);
292 luma_mc!(h264_mc01, h264_mc01_4, h264_mc01_8, h264_mc01_16);
293 luma_mc!(h264_mc02, h264_mc02_4, h264_mc02_8, h264_mc02_16);
294 luma_mc!(h264_mc03, h264_mc03_4, h264_mc03_8, h264_mc03_16);
295 luma_mc!(h264_mc10, h264_mc10_4, h264_mc10_8, h264_mc10_16);
296 luma_mc!(h264_mc11, h264_mc11_4, h264_mc11_8, h264_mc11_16);
297 luma_mc!(h264_mc12, h264_mc12_4, h264_mc12_8, h264_mc12_16);
298 luma_mc!(h264_mc13, h264_mc13_4, h264_mc13_8, h264_mc13_16);
299 luma_mc!(h264_mc20, h264_mc20_4, h264_mc20_8, h264_mc20_16);
300 luma_mc!(h264_mc21, h264_mc21_4, h264_mc21_8, h264_mc21_16);
301 luma_mc!(h264_mc22, h264_mc22_4, h264_mc22_8, h264_mc22_16);
302 luma_mc!(h264_mc23, h264_mc23_4, h264_mc23_8, h264_mc23_16);
303 luma_mc!(h264_mc30, h264_mc30_4, h264_mc30_8, h264_mc30_16);
304 luma_mc!(h264_mc31, h264_mc31_4, h264_mc31_8, h264_mc31_16);
305 luma_mc!(h264_mc32, h264_mc32_4, h264_mc32_8, h264_mc32_16);
306 luma_mc!(h264_mc33, h264_mc33_4, h264_mc33_8, h264_mc33_16);
308 pub const H264_LUMA_INTERP: &[[super::MCFunc; 16]; 3] = &[
310 h264_mc00_4, h264_mc01_4, h264_mc02_4, h264_mc03_4,
311 h264_mc10_4, h264_mc11_4, h264_mc12_4, h264_mc13_4,
312 h264_mc20_4, h264_mc21_4, h264_mc22_4, h264_mc23_4,
313 h264_mc30_4, h264_mc31_4, h264_mc32_4, h264_mc33_4
315 h264_mc00_8, h264_mc01_8, h264_mc02_8, h264_mc03_8,
316 h264_mc10_8, h264_mc11_8, h264_mc12_8, h264_mc13_8,
317 h264_mc20_8, h264_mc21_8, h264_mc22_8, h264_mc23_8,
318 h264_mc30_8, h264_mc31_8, h264_mc32_8, h264_mc33_8
320 h264_mc00_16, h264_mc01_16, h264_mc02_16, h264_mc03_16,
321 h264_mc10_16, h264_mc11_16, h264_mc12_16, h264_mc13_16,
322 h264_mc20_16, h264_mc21_16, h264_mc22_16, h264_mc23_16,
323 h264_mc30_16, h264_mc31_16, h264_mc32_16, h264_mc33_16
327 impl super::RegisterSIMD for super::H264MC {
328 fn register_simd(&mut self) {}