]>
Commit | Line | Data |
---|---|---|
1 | use super::clip_u8; | |
2 | ||
3 | const TMP_BUF_STRIDE: usize = 32; | |
4 | ||
5 | fn interp_block1(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool) { | |
6 | unsafe { | |
7 | let step = if hor { 1 } else { sstride }; | |
8 | let avgidx = if avg0 { step * 2 } else { step * 3 }; | |
9 | let mut src = src.as_ptr(); | |
10 | let mut dst = dst.as_mut_ptr(); | |
11 | for _ in 0..h { | |
12 | for _ in 0..w { | |
13 | let t = clip_u8(( i16::from(*src) | |
14 | - 5 * i16::from(*src.add(step)) | |
15 | + 20 * i16::from(*src.add(step * 2)) | |
16 | + 20 * i16::from(*src.add(step * 3)) | |
17 | - 5 * i16::from(*src.add(step * 4)) | |
18 | + i16::from(*src.add(step * 5)) | |
19 | + 16) >> 5); | |
20 | *dst = ((u16::from(t) + u16::from(*src.add(avgidx)) + 1) >> 1) as u8; | |
21 | src = src.add(1); | |
22 | dst = dst.add(1); | |
23 | } | |
24 | dst = dst.sub(w).add(dstride); | |
25 | src = src.sub(w).add(sstride); | |
26 | } | |
27 | } | |
28 | } | |
29 | ||
30 | fn interp_block2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool) { | |
31 | unsafe { | |
32 | let step = if hor { 1 } else { sstride }; | |
33 | let mut pix = dst.as_mut_ptr(); | |
34 | let mut src = src.as_ptr(); | |
35 | for _ in 0..h { | |
36 | for x in 0..w { | |
37 | *pix.add(x) = clip_u8(( i16::from(*src) | |
38 | - 5 * i16::from(*src.add(step)) | |
39 | + 20 * i16::from(*src.add(step * 2)) | |
40 | + 20 * i16::from(*src.add(step * 3)) | |
41 | - 5 * i16::from(*src.add(step * 4)) | |
42 | + i16::from(*src.add(step * 5)) | |
43 | + 16) >> 5); | |
44 | src = src.add(1); | |
45 | } | |
46 | pix = pix.add(dstride); | |
47 | src = src.sub(w); | |
48 | src = src.add(sstride); | |
49 | } | |
50 | } | |
51 | } | |
52 | ||
53 | fn mc_avg_tmp(dst: &mut [u8], dstride: usize, w: usize, h: usize, tmp: &[u8], tmp2: &[u8]) { | |
54 | unsafe { | |
55 | let mut src1 = tmp.as_ptr(); | |
56 | let mut src2 = tmp2.as_ptr(); | |
57 | let mut dst = dst.as_mut_ptr(); | |
58 | for _ in 0..h { | |
59 | for x in 0..w { | |
60 | let a = *src1.add(x); | |
61 | let b = *src2.add(x); | |
62 | *dst.add(x) = ((u16::from(a) + u16::from(b) + 1) >> 1) as u8; | |
63 | } | |
64 | dst = dst.add(dstride); | |
65 | src1 = src1.add(TMP_BUF_STRIDE); | |
66 | src2 = src2.add(TMP_BUF_STRIDE); | |
67 | } | |
68 | } | |
69 | } | |
70 | ||
71 | fn h264_mc00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
72 | unsafe { | |
73 | let mut src = src.as_ptr(); | |
74 | let mut dst = dst.as_mut_ptr(); | |
75 | for _ in 0..h { | |
76 | std::ptr::copy_nonoverlapping(src, dst, w); | |
77 | src = src.add(sstride); | |
78 | dst = dst.add(dstride); | |
79 | } | |
80 | } | |
81 | } | |
82 | ||
83 | fn h264_mc01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
84 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true); | |
85 | } | |
86 | ||
87 | fn h264_mc02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
88 | interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true); | |
89 | } | |
90 | ||
91 | fn h264_mc03(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
92 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false); | |
93 | } | |
94 | ||
95 | fn h264_mc10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
96 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true); | |
97 | } | |
98 | ||
99 | fn h264_mc11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
100 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
101 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
102 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
103 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
104 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
105 | } | |
106 | ||
107 | fn h264_mc12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
108 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
109 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
110 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
111 | h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
112 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
113 | } | |
114 | ||
115 | fn h264_mc13(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
116 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
117 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
118 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
119 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
120 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
121 | } | |
122 | ||
123 | fn h264_mc20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
124 | interp_block2(dst, dstride, &src[2..], sstride, w, h, false); | |
125 | } | |
126 | ||
127 | fn h264_mc21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
128 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
129 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
130 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
131 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
132 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
133 | } | |
134 | ||
135 | fn h264_mc22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
136 | let mut tmp: [i32; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
137 | unsafe { | |
138 | let mut src = src.as_ptr(); | |
139 | let mut dst = tmp.as_mut_ptr(); | |
140 | for _ in 0..h { | |
141 | for _ in 0..w+5 { | |
142 | *dst = i32::from(*src) | |
143 | - 5 * i32::from(*src.add(sstride)) | |
144 | + 20 * i32::from(*src.add(sstride * 2)) | |
145 | + 20 * i32::from(*src.add(sstride * 3)) | |
146 | - 5 * i32::from(*src.add(sstride * 4)) | |
147 | + i32::from(*src.add(sstride * 5)); | |
148 | dst = dst.add(1); | |
149 | src = src.add(1); | |
150 | } | |
151 | src = src.sub(w+5).add(sstride); | |
152 | dst = dst.sub(w+5).add(TMP_BUF_STRIDE); | |
153 | } | |
154 | } | |
155 | unsafe { | |
156 | let mut dst = dst.as_mut_ptr(); | |
157 | let mut src = tmp.as_ptr(); | |
158 | for _ in 0..h { | |
159 | for _ in 0..w { | |
160 | *dst = clip_u8(((*src - 5 * *src.add(1) + 20 * *src.add(2) + 20 * *src.add(3) - 5 * *src.add(4) + *src.add(5) + 512) >> 10) as i16); | |
161 | dst = dst.add(1); | |
162 | src = src.add(1); | |
163 | } | |
164 | dst = dst.sub(w).add(dstride); | |
165 | src = src.sub(w).add(TMP_BUF_STRIDE); | |
166 | } | |
167 | } | |
168 | } | |
169 | ||
170 | fn h264_mc23(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
171 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
172 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
173 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
174 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
175 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
176 | } | |
177 | ||
178 | fn h264_mc30(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
179 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false); | |
180 | } | |
181 | ||
182 | fn h264_mc31(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
183 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
184 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
185 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
186 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
187 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
188 | } | |
189 | ||
190 | fn h264_mc32(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
191 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
192 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
193 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
194 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
195 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
196 | } | |
197 | ||
198 | fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
199 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
200 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
201 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
202 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
203 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
204 | } | |
205 | ||
206 | ||
207 | pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { | |
208 | let a0 = 8 - dx; | |
209 | let a1 = dx; | |
210 | let b0 = 8 - dy; | |
211 | let b1 = dy; | |
212 | ||
213 | if a0 == 8 && b0 == 8 { | |
214 | unsafe { | |
215 | let mut src = src.as_ptr(); | |
216 | let mut dst = dst.as_mut_ptr(); | |
217 | for _ in 0..h { | |
218 | std::ptr::copy_nonoverlapping(src, dst, w); | |
219 | src = src.add(sstride); | |
220 | dst = dst.add(dstride); | |
221 | } | |
222 | } | |
223 | } else if a0 == 8 { | |
224 | unsafe { | |
225 | let mut src0 = src.as_ptr(); | |
226 | let mut src1 = src0.add(sstride); | |
227 | let mut dst = dst.as_mut_ptr(); | |
228 | for _ in 0..h { | |
229 | for x in 0..w { | |
230 | let a = *src0.add(x); | |
231 | let b = *src1.add(x); | |
232 | *dst.add(x) = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8; | |
233 | } | |
234 | src0 = src0.add(sstride); | |
235 | src1 = src1.add(sstride); | |
236 | dst = dst.add(dstride); | |
237 | } | |
238 | } | |
239 | } else if b0 == 8 { | |
240 | unsafe { | |
241 | let mut src = src.as_ptr(); | |
242 | let mut dst = dst.as_mut_ptr(); | |
243 | for _ in 0..h { | |
244 | let mut a = *src; | |
245 | for x in 0..w { | |
246 | let b = *src.add(x + 1); | |
247 | *dst.add(x) = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8; | |
248 | a = b; | |
249 | } | |
250 | src = src.add(sstride); | |
251 | dst = dst.add(dstride); | |
252 | } | |
253 | } | |
254 | } else { | |
255 | unsafe { | |
256 | let mut src0 = src.as_ptr(); | |
257 | let mut src1 = src0.add(sstride); | |
258 | let mut dst = dst.as_mut_ptr(); | |
259 | for _ in 0..h { | |
260 | let mut a = *src0; | |
261 | let mut c = *src1; | |
262 | for x in 0..w { | |
263 | let b = *src0.add(x + 1); | |
264 | let d = *src1.add(x + 1); | |
265 | *dst.add(x) = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8; | |
266 | a = b; | |
267 | c = d; | |
268 | } | |
269 | src0 = src0.add(sstride); | |
270 | src1 = src1.add(sstride); | |
271 | dst = dst.add(dstride); | |
272 | } | |
273 | } | |
274 | } | |
275 | } | |
276 | ||
277 | macro_rules! luma_mc { | |
278 | ($orig:ident, $func4:ident, $func8:ident, $func16:ident) => { | |
279 | fn $func4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { | |
280 | $orig(dst, dstride, src, sstride, 4, h); | |
281 | } | |
282 | fn $func8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { | |
283 | $orig(dst, dstride, src, sstride, 8, h); | |
284 | } | |
285 | fn $func16(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { | |
286 | $orig(dst, dstride, src, sstride, 16, h); | |
287 | } | |
288 | } | |
289 | } | |
290 | ||
291 | luma_mc!(h264_mc00, h264_mc00_4, h264_mc00_8, h264_mc00_16); | |
292 | luma_mc!(h264_mc01, h264_mc01_4, h264_mc01_8, h264_mc01_16); | |
293 | luma_mc!(h264_mc02, h264_mc02_4, h264_mc02_8, h264_mc02_16); | |
294 | luma_mc!(h264_mc03, h264_mc03_4, h264_mc03_8, h264_mc03_16); | |
295 | luma_mc!(h264_mc10, h264_mc10_4, h264_mc10_8, h264_mc10_16); | |
296 | luma_mc!(h264_mc11, h264_mc11_4, h264_mc11_8, h264_mc11_16); | |
297 | luma_mc!(h264_mc12, h264_mc12_4, h264_mc12_8, h264_mc12_16); | |
298 | luma_mc!(h264_mc13, h264_mc13_4, h264_mc13_8, h264_mc13_16); | |
299 | luma_mc!(h264_mc20, h264_mc20_4, h264_mc20_8, h264_mc20_16); | |
300 | luma_mc!(h264_mc21, h264_mc21_4, h264_mc21_8, h264_mc21_16); | |
301 | luma_mc!(h264_mc22, h264_mc22_4, h264_mc22_8, h264_mc22_16); | |
302 | luma_mc!(h264_mc23, h264_mc23_4, h264_mc23_8, h264_mc23_16); | |
303 | luma_mc!(h264_mc30, h264_mc30_4, h264_mc30_8, h264_mc30_16); | |
304 | luma_mc!(h264_mc31, h264_mc31_4, h264_mc31_8, h264_mc31_16); | |
305 | luma_mc!(h264_mc32, h264_mc32_4, h264_mc32_8, h264_mc32_16); | |
306 | luma_mc!(h264_mc33, h264_mc33_4, h264_mc33_8, h264_mc33_16); | |
307 | ||
308 | pub const H264_LUMA_INTERP: &[[super::MCFunc; 16]; 3] = &[ | |
309 | [ | |
310 | h264_mc00_4, h264_mc01_4, h264_mc02_4, h264_mc03_4, | |
311 | h264_mc10_4, h264_mc11_4, h264_mc12_4, h264_mc13_4, | |
312 | h264_mc20_4, h264_mc21_4, h264_mc22_4, h264_mc23_4, | |
313 | h264_mc30_4, h264_mc31_4, h264_mc32_4, h264_mc33_4 | |
314 | ], [ | |
315 | h264_mc00_8, h264_mc01_8, h264_mc02_8, h264_mc03_8, | |
316 | h264_mc10_8, h264_mc11_8, h264_mc12_8, h264_mc13_8, | |
317 | h264_mc20_8, h264_mc21_8, h264_mc22_8, h264_mc23_8, | |
318 | h264_mc30_8, h264_mc31_8, h264_mc32_8, h264_mc33_8 | |
319 | ], [ | |
320 | h264_mc00_16, h264_mc01_16, h264_mc02_16, h264_mc03_16, | |
321 | h264_mc10_16, h264_mc11_16, h264_mc12_16, h264_mc13_16, | |
322 | h264_mc20_16, h264_mc21_16, h264_mc22_16, h264_mc23_16, | |
323 | h264_mc30_16, h264_mc31_16, h264_mc32_16, h264_mc33_16 | |
324 | ] | |
325 | ]; |