]>
Commit | Line | Data |
---|---|---|
1 | use std::arch::asm; | |
2 | ||
3 | #[cfg(target_arch = "x86")] | |
4 | fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { | |
5 | let a0 = 8 - dx; | |
6 | let a1 = dx; | |
7 | let b0 = 8 - dy; | |
8 | let b1 = dy; | |
9 | ||
10 | if a0 == 8 && b0 == 8 { | |
11 | unsafe { | |
12 | let mut src = src.as_ptr(); | |
13 | let mut dst = dst.as_mut_ptr(); | |
14 | for _ in 0..h { | |
15 | std::ptr::copy_nonoverlapping(src, dst, w); | |
16 | src = src.add(sstride); | |
17 | dst = dst.add(dstride); | |
18 | } | |
19 | } | |
20 | } else if a0 == 8 { | |
21 | unsafe { | |
22 | let mut src0 = src.as_ptr(); | |
23 | let mut src1 = src0.add(sstride); | |
24 | let mut dst = dst.as_mut_ptr(); | |
25 | for _ in 0..h { | |
26 | for x in 0..w { | |
27 | let a = *src0.add(x); | |
28 | let b = *src1.add(x); | |
29 | *dst.add(x) = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8; | |
30 | } | |
31 | src0 = src0.add(sstride); | |
32 | src1 = src1.add(sstride); | |
33 | dst = dst.add(dstride); | |
34 | } | |
35 | } | |
36 | } else if b0 == 8 { | |
37 | unsafe { | |
38 | let mut src = src.as_ptr(); | |
39 | let mut dst = dst.as_mut_ptr(); | |
40 | for _ in 0..h { | |
41 | let mut a = *src; | |
42 | for x in 0..w { | |
43 | let b = *src.add(x + 1); | |
44 | *dst.add(x) = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8; | |
45 | a = b; | |
46 | } | |
47 | src = src.add(sstride); | |
48 | dst = dst.add(dstride); | |
49 | } | |
50 | } | |
51 | } else { | |
52 | unsafe { | |
53 | let mut src0 = src.as_ptr(); | |
54 | let mut src1 = src0.add(sstride); | |
55 | let mut dst = dst.as_mut_ptr(); | |
56 | for _ in 0..h { | |
57 | let mut a = *src0; | |
58 | let mut c = *src1; | |
59 | for x in 0..w { | |
60 | let b = *src0.add(x + 1); | |
61 | let d = *src1.add(x + 1); | |
62 | *dst.add(x) = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8; | |
63 | a = b; | |
64 | c = d; | |
65 | } | |
66 | src0 = src0.add(sstride); | |
67 | src1 = src1.add(sstride); | |
68 | dst = dst.add(dstride); | |
69 | } | |
70 | } | |
71 | } | |
72 | } | |
73 | ||
74 | pub fn chroma_interp_8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { | |
75 | unsafe { | |
76 | match (dx, dy) { | |
77 | (0, 0) => { | |
78 | asm!( | |
79 | "lea {stmp}, [{src} + {sstride} * 2]", | |
80 | "lea {dtmp}, [{dst} + {dstride} * 2]", | |
81 | "2:", | |
82 | "movq xmm0, [{src}]", | |
83 | "movq xmm1, [{src} + {sstride}]", | |
84 | "movq xmm2, [{stmp}]", | |
85 | "movq xmm3, [{stmp} + {sstride}]", | |
86 | "movq [{dst}], xmm0", | |
87 | "lea {src}, [{src} + {sstride} * 4]", | |
88 | "movq [{dst} + {dstride}], xmm1", | |
89 | "lea {stmp}, [{stmp} + {sstride} * 4]", | |
90 | "movq [{dtmp}], xmm2", | |
91 | "lea {dst}, [{dst} + {dstride} * 4]", | |
92 | "movq [{dtmp} + {dstride}], xmm3", | |
93 | "lea {dtmp}, [{dtmp} + {dstride} * 4]", | |
94 | "sub {h}, 4", | |
95 | "jnz 2b", | |
96 | src = inout(reg) src.as_ptr() => _, | |
97 | sstride = in(reg) sstride, | |
98 | dst = inout(reg) dst.as_mut_ptr() => _, | |
99 | dstride = in(reg) dstride, | |
100 | h = inout(reg) h => _, | |
101 | stmp = out(reg) _, | |
102 | dtmp = out(reg) _, | |
103 | out("xmm0") _, | |
104 | out("xmm1") _, | |
105 | out("xmm2") _, | |
106 | out("xmm3") _, | |
107 | ); | |
108 | }, | |
109 | (0, _) => { | |
110 | asm!( | |
111 | "pxor xmm0, xmm0", | |
112 | "movd xmm3, {a0:e}", | |
113 | "movd xmm4, {a1:e}", | |
114 | "mov {a1:e}, 0x0004", | |
115 | "movd xmm5, {a1:e}", | |
116 | "pshuflw xmm3, xmm3, 0", | |
117 | "pshuflw xmm4, xmm4, 0", | |
118 | "pshuflw xmm5, xmm5, 0", | |
119 | "movlhps xmm3, xmm3", | |
120 | "movlhps xmm4, xmm4", | |
121 | "movlhps xmm5, xmm5", | |
122 | "movq xmm6, [{src}]", | |
123 | "add {src}, {sstride}", | |
124 | "punpcklbw xmm6, xmm0", | |
125 | "2:", | |
126 | "movaps xmm1, xmm6", | |
127 | "movq xmm2, [{src}]", | |
128 | "punpcklbw xmm2, xmm0", | |
129 | "movaps xmm6, xmm2", | |
130 | "pmullw xmm1, xmm3", | |
131 | "pmullw xmm2, xmm4", | |
132 | "add {src}, {sstride}", | |
133 | "paddw xmm1, xmm2", | |
134 | "paddw xmm1, xmm5", | |
135 | "psraw xmm1, 3", | |
136 | "packuswb xmm1, xmm1", | |
137 | "movq [{dst}], xmm1", | |
138 | "add {dst}, {dstride}", | |
139 | "dec {h}", | |
140 | "jnz 2b", | |
141 | src = inout(reg) src.as_ptr() => _, | |
142 | sstride = in(reg) sstride, | |
143 | dst = inout(reg) dst.as_mut_ptr() => _, | |
144 | dstride = in(reg) dstride, | |
145 | h = inout(reg) h => _, | |
146 | a0 = in(reg) i32::from(8 - dy), | |
147 | a1 = inout(reg) i32::from(dy) => _, | |
148 | out("xmm0") _, | |
149 | out("xmm1") _, | |
150 | out("xmm2") _, | |
151 | out("xmm3") _, | |
152 | out("xmm4") _, | |
153 | out("xmm5") _, | |
154 | out("xmm6") _, | |
155 | ); | |
156 | }, | |
157 | (_, 0) => { | |
158 | asm!( | |
159 | "pxor xmm0, xmm0", | |
160 | "movd xmm3, {a0:e}", | |
161 | "movd xmm4, {a1:e}", | |
162 | "mov {a1:e}, 0x0004", | |
163 | "movd xmm5, {a1:e}", | |
164 | "pshuflw xmm3, xmm3, 0", | |
165 | "pshuflw xmm4, xmm4, 0", | |
166 | "pshuflw xmm5, xmm5, 0", | |
167 | "movlhps xmm3, xmm3", | |
168 | "movlhps xmm4, xmm4", | |
169 | "movlhps xmm5, xmm5", | |
170 | "2:", | |
171 | "movq xmm1, [{src}]", | |
172 | "movq xmm2, [{src} + 1]", | |
173 | "punpcklbw xmm1, xmm0", | |
174 | "punpcklbw xmm2, xmm0", | |
175 | "pmullw xmm1, xmm3", | |
176 | "pmullw xmm2, xmm4", | |
177 | "add {src}, {sstride}", | |
178 | "paddw xmm1, xmm2", | |
179 | "paddw xmm1, xmm5", | |
180 | "psraw xmm1, 3", | |
181 | "packuswb xmm1, xmm1", | |
182 | "movq [{dst}], xmm1", | |
183 | "add {dst}, {dstride}", | |
184 | "dec {h}", | |
185 | "jnz 2b", | |
186 | src = inout(reg) src.as_ptr() => _, | |
187 | sstride = inout(reg) sstride => _, | |
188 | dst = inout(reg) dst.as_mut_ptr() => _, | |
189 | dstride = inout(reg) dstride => _, | |
190 | h = inout(reg) h => _, | |
191 | a0 = inout(reg) i32::from(8 - dx) => _, | |
192 | a1 = inout(reg) i32::from(dx) => _, | |
193 | out("xmm0") _, | |
194 | out("xmm1") _, | |
195 | out("xmm2") _, | |
196 | out("xmm3") _, | |
197 | out("xmm4") _, | |
198 | out("xmm5") _, | |
199 | ); | |
200 | }, | |
201 | #[cfg(target_arch = "x86")] | |
202 | _ => chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h), | |
203 | #[cfg(target_arch = "x86_64")] | |
204 | _ => { | |
205 | asm!( | |
206 | "pxor xmm0, xmm0", | |
207 | "movd xmm3, {a0:e}", | |
208 | "movd xmm4, {a1:e}", | |
209 | "movd xmm5, {b0:e}", | |
210 | "movd xmm6, {b1:e}", | |
211 | "mov {a1:e}, 0x0020", | |
212 | "movd xmm7, {a1:e}", | |
213 | "pshuflw xmm3, xmm3, 0", | |
214 | "pshuflw xmm4, xmm4, 0", | |
215 | "pshuflw xmm5, xmm5, 0", | |
216 | "pshuflw xmm6, xmm6, 0", | |
217 | "pshuflw xmm7, xmm7, 0", | |
218 | "movlhps xmm3, xmm3", | |
219 | "movlhps xmm4, xmm4", | |
220 | "movlhps xmm5, xmm5", | |
221 | "movlhps xmm6, xmm6", | |
222 | "movlhps xmm7, xmm7", | |
223 | ||
224 | "movq xmm8, [{src}]", | |
225 | "movq xmm2, [{src} + 1]", | |
226 | "punpcklbw xmm8, xmm0", | |
227 | "punpcklbw xmm2, xmm0", | |
228 | "pmullw xmm8, xmm3", | |
229 | "pmullw xmm2, xmm4", | |
230 | "add {src}, {sstride}", | |
231 | "paddw xmm8, xmm2", | |
232 | ||
233 | "2:", | |
234 | "movq xmm1, [{src}]", | |
235 | "movq xmm2, [{src} + 1]", | |
236 | "punpcklbw xmm1, xmm0", | |
237 | "punpcklbw xmm2, xmm0", | |
238 | "pmullw xmm1, xmm3", | |
239 | "pmullw xmm2, xmm4", | |
240 | "add {src}, {sstride}", | |
241 | "paddw xmm1, xmm2", | |
242 | "movaps xmm2, xmm8", | |
243 | "movaps xmm8, xmm1", | |
244 | ||
245 | "pmullw xmm1, xmm6", | |
246 | "pmullw xmm2, xmm5", | |
247 | "paddw xmm1, xmm2", | |
248 | "paddw xmm1, xmm7", | |
249 | "psraw xmm1, 6", | |
250 | "packuswb xmm1, xmm1", | |
251 | "movq [{dst}], xmm1", | |
252 | "add {dst}, {dstride}", | |
253 | "dec {h}", | |
254 | "jnz 2b", | |
255 | src = inout(reg) src.as_ptr() => _, | |
256 | sstride = inout(reg) sstride => _, | |
257 | dst = inout(reg) dst.as_mut_ptr() => _, | |
258 | dstride = inout(reg) dstride => _, | |
259 | h = inout(reg) h => _, | |
260 | a0 = inout(reg) i32::from(8 - dx) => _, | |
261 | a1 = inout(reg) i32::from(dx) => _, | |
262 | b0 = inout(reg) i32::from(8 - dy) => _, | |
263 | b1 = inout(reg) i32::from(dy) => _, | |
264 | out("xmm0") _, | |
265 | out("xmm1") _, | |
266 | out("xmm2") _, | |
267 | out("xmm3") _, | |
268 | out("xmm4") _, | |
269 | out("xmm5") _, | |
270 | out("xmm6") _, | |
271 | out("xmm7") _, | |
272 | out("xmm8") _, | |
273 | ); | |
274 | }, | |
275 | }; | |
276 | } | |
277 | } | |
278 | ||
279 | pub fn chroma_interp_4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { | |
280 | unsafe { | |
281 | match (dx, dy) { | |
282 | (0, 0) => { | |
283 | asm!( | |
284 | "2:", | |
285 | "movd xmm0, [{src}]", | |
286 | "movd xmm1, [{src} + {sstride}]", | |
287 | "movd [{dst}], xmm0", | |
288 | "lea {src}, [{src} + {sstride} * 2]", | |
289 | "movd [{dst} + {dstride}], xmm1", | |
290 | "lea {dst}, [{dst} + {dstride} * 2]", | |
291 | "sub {h}, 2", | |
292 | "jnz 2b", | |
293 | src = inout(reg) src.as_ptr() => _, | |
294 | sstride = in(reg) sstride, | |
295 | dst = inout(reg) dst.as_mut_ptr() => _, | |
296 | dstride = in(reg) dstride, | |
297 | h = inout(reg) h => _, | |
298 | out("xmm0") _, | |
299 | out("xmm1") _, | |
300 | ); | |
301 | }, | |
302 | (0, _) => { | |
303 | asm!( | |
304 | "pxor xmm0, xmm0", | |
305 | "movd xmm3, {a0:e}", | |
306 | "movd xmm4, {a1:e}", | |
307 | "mov {a1:e}, 0x0004", | |
308 | "movd xmm5, {a1:e}", | |
309 | "pshuflw xmm3, xmm3, 0", | |
310 | "pshuflw xmm4, xmm4, 0", | |
311 | "pshuflw xmm5, xmm5, 0", | |
312 | "movd xmm6, [{src}]", | |
313 | "add {src}, {sstride}", | |
314 | "punpcklbw xmm6, xmm0", | |
315 | "2:", | |
316 | "movaps xmm1, xmm6", | |
317 | "movd xmm2, [{src}]", | |
318 | "punpcklbw xmm2, xmm0", | |
319 | "movaps xmm6, xmm2", | |
320 | "pmullw xmm1, xmm3", | |
321 | "pmullw xmm2, xmm4", | |
322 | "add {src}, {sstride}", | |
323 | "paddw xmm1, xmm2", | |
324 | "paddw xmm1, xmm5", | |
325 | "psraw xmm1, 3", | |
326 | "packuswb xmm1, xmm1", | |
327 | "movd [{dst}], xmm1", | |
328 | "add {dst}, {dstride}", | |
329 | "dec {h}", | |
330 | "jnz 2b", | |
331 | src = inout(reg) src.as_ptr() => _, | |
332 | sstride = inout(reg) sstride => _, | |
333 | dst = inout(reg) dst.as_mut_ptr() => _, | |
334 | dstride = inout(reg) dstride => _, | |
335 | h = inout(reg) h => _, | |
336 | a0 = inout(reg) i32::from(8 - dy) => _, | |
337 | a1 = inout(reg) i32::from(dy) => _, | |
338 | out("xmm0") _, | |
339 | out("xmm1") _, | |
340 | out("xmm2") _, | |
341 | out("xmm3") _, | |
342 | out("xmm4") _, | |
343 | out("xmm5") _, | |
344 | out("xmm6") _, | |
345 | ); | |
346 | }, | |
347 | (_, 0) => { | |
348 | asm!( | |
349 | "pxor xmm0, xmm0", | |
350 | "movd xmm3, {a0:e}", | |
351 | "movd xmm4, {a1:e}", | |
352 | "mov {a1:e}, 0x0004", | |
353 | "movd xmm5, {a1:e}", | |
354 | "pshuflw xmm3, xmm3, 0", | |
355 | "pshuflw xmm4, xmm4, 0", | |
356 | "pshuflw xmm5, xmm5, 0", | |
357 | "2:", | |
358 | "movd xmm1, [{src}]", | |
359 | "movd xmm2, [{src} + 1]", | |
360 | "punpcklbw xmm1, xmm0", | |
361 | "punpcklbw xmm2, xmm0", | |
362 | "pmullw xmm1, xmm3", | |
363 | "pmullw xmm2, xmm4", | |
364 | "add {src}, {sstride}", | |
365 | "paddw xmm1, xmm2", | |
366 | "paddw xmm1, xmm5", | |
367 | "psraw xmm1, 3", | |
368 | "packuswb xmm1, xmm1", | |
369 | "movd [{dst}], xmm1", | |
370 | "add {dst}, {dstride}", | |
371 | "dec {h}", | |
372 | "jnz 2b", | |
373 | src = inout(reg) src.as_ptr() => _, | |
374 | sstride = inout(reg) sstride => _, | |
375 | dst = inout(reg) dst.as_mut_ptr() => _, | |
376 | dstride = inout(reg) dstride => _, | |
377 | h = inout(reg) h => _, | |
378 | a0 = inout(reg) i32::from(8 - dx) => _, | |
379 | a1 = inout(reg) i32::from(dx) => _, | |
380 | out("xmm0") _, | |
381 | out("xmm1") _, | |
382 | out("xmm2") _, | |
383 | out("xmm3") _, | |
384 | out("xmm4") _, | |
385 | out("xmm5") _, | |
386 | ); | |
387 | }, | |
388 | #[cfg(target_arch = "x86")] | |
389 | _ => chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h), | |
390 | #[cfg(target_arch = "x86_64")] | |
391 | _ => { | |
392 | asm!( | |
393 | "pxor xmm0, xmm0", | |
394 | "movd xmm3, {a0:e}", | |
395 | "movd xmm4, {a1:e}", | |
396 | "movd xmm5, {b0:e}", | |
397 | "movd xmm6, {b1:e}", | |
398 | "mov {a1:e}, 0x0020", | |
399 | "movd xmm7, {a1:e}", | |
400 | "pshuflw xmm3, xmm3, 0", | |
401 | "pshuflw xmm4, xmm4, 0", | |
402 | "pshuflw xmm5, xmm5, 0", | |
403 | "pshuflw xmm6, xmm6, 0", | |
404 | "pshuflw xmm7, xmm7, 0", | |
405 | ||
406 | "movd xmm8, [{src}]", | |
407 | "movd xmm2, [{src} + 1]", | |
408 | "punpcklbw xmm8, xmm0", | |
409 | "punpcklbw xmm2, xmm0", | |
410 | "pmullw xmm8, xmm3", | |
411 | "pmullw xmm2, xmm4", | |
412 | "add {src}, {sstride}", | |
413 | "paddw xmm8, xmm2", | |
414 | ||
415 | "2:", | |
416 | "movd xmm1, [{src}]", | |
417 | "movd xmm2, [{src} + 1]", | |
418 | "punpcklbw xmm1, xmm0", | |
419 | "punpcklbw xmm2, xmm0", | |
420 | "pmullw xmm1, xmm3", | |
421 | "pmullw xmm2, xmm4", | |
422 | "add {src}, {sstride}", | |
423 | "paddw xmm1, xmm2", | |
424 | "movaps xmm2, xmm8", | |
425 | "movaps xmm8, xmm1", | |
426 | ||
427 | "pmullw xmm1, xmm6", | |
428 | "pmullw xmm2, xmm5", | |
429 | "paddw xmm1, xmm2", | |
430 | "paddw xmm1, xmm7", | |
431 | "psraw xmm1, 6", | |
432 | "packuswb xmm1, xmm1", | |
433 | "movd [{dst}], xmm1", | |
434 | "add {dst}, {dstride}", | |
435 | "dec {h}", | |
436 | "jnz 2b", | |
437 | src = inout(reg) src.as_ptr() => _, | |
438 | sstride = inout(reg) sstride => _, | |
439 | dst = inout(reg) dst.as_mut_ptr() => _, | |
440 | dstride = inout(reg) dstride => _, | |
441 | h = inout(reg) h => _, | |
442 | a0 = inout(reg) i32::from(8 - dx) => _, | |
443 | a1 = inout(reg) i32::from(dx) => _, | |
444 | b0 = inout(reg) i32::from(8 - dy) => _, | |
445 | b1 = inout(reg) i32::from(dy) => _, | |
446 | out("xmm0") _, | |
447 | out("xmm1") _, | |
448 | out("xmm2") _, | |
449 | out("xmm3") _, | |
450 | out("xmm4") _, | |
451 | out("xmm5") _, | |
452 | out("xmm6") _, | |
453 | out("xmm7") _, | |
454 | out("xmm8") _, | |
455 | ); | |
456 | }, | |
457 | }; | |
458 | } | |
459 | } | |
460 | ||
461 | #[inline] | |
462 | fn chr_interp2(a: u8, b: u8, b0: u16, b1: u16) -> u8 { | |
463 | ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8 | |
464 | } | |
465 | #[inline] | |
466 | fn chr_interp4(a: u8, b: u8, c: u8, d: u8, a0: u16, a1: u16, b0: u16, b1: u16) -> u8 { | |
467 | ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8 | |
468 | } | |
469 | ||
470 | pub fn chroma_interp_2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) { | |
471 | let a0 = 8 - dx; | |
472 | let a1 = dx; | |
473 | let b0 = 8 - dy; | |
474 | let b1 = dy; | |
475 | ||
476 | if a0 == 8 && b0 == 8 { | |
477 | unsafe { | |
478 | let mut src = src.as_ptr(); | |
479 | let mut dst = dst.as_mut_ptr(); | |
480 | std::ptr::copy_nonoverlapping(src, dst, 2); | |
481 | src = src.add(sstride); | |
482 | dst = dst.add(dstride); | |
483 | std::ptr::copy_nonoverlapping(src, dst, 2); | |
484 | if h == 4 { | |
485 | src = src.add(sstride); | |
486 | dst = dst.add(dstride); | |
487 | std::ptr::copy_nonoverlapping(src, dst, 2); | |
488 | src = src.add(sstride); | |
489 | dst = dst.add(dstride); | |
490 | std::ptr::copy_nonoverlapping(src, dst, 2); | |
491 | } | |
492 | } | |
493 | } else if a0 == 8 { | |
494 | unsafe { | |
495 | let mut src0 = src.as_ptr(); | |
496 | let mut src1 = src0.add(sstride); | |
497 | let mut dst = dst.as_mut_ptr(); | |
498 | *dst = chr_interp2(*src0, *src1, b0, b1); | |
499 | *dst.add(1) = chr_interp2(*src0.add(1), *src1.add(1), b0, b1); | |
500 | *dst.add(dstride) = chr_interp2(*src0.add(sstride), *src1.add(sstride), b0, b1); | |
501 | *dst.add(dstride + 1) = chr_interp2(*src0.add(sstride + 1), *src1.add(sstride + 1), b0, b1); | |
502 | if h == 4 { | |
503 | src0 = src0.add(sstride * 2); | |
504 | src1 = src1.add(sstride * 2); | |
505 | dst = dst.add(dstride * 2); | |
506 | *dst = chr_interp2(*src0, *src1, b0, b1); | |
507 | *dst.add(1) = chr_interp2(*src0.add(1), *src1.add(1), b0, b1); | |
508 | *dst.add(dstride) = chr_interp2(*src0.add(sstride), *src1.add(sstride), b0, b1); | |
509 | *dst.add(dstride + 1) = chr_interp2(*src0.add(sstride + 1), *src1.add(sstride + 1), b0, b1); | |
510 | } | |
511 | } | |
512 | } else if b0 == 8 { | |
513 | unsafe { | |
514 | let mut src = src.as_ptr(); | |
515 | let mut dst = dst.as_mut_ptr(); | |
516 | let (a, b, c) = (*src, *src.add(1), *src.add(2)); | |
517 | *dst = chr_interp2(a, b, a0, a1); | |
518 | *dst.add(1) = chr_interp2(b, c, a0, a1); | |
519 | let (a, b, c) = (*src.add(sstride), *src.add(sstride + 1), *src.add(sstride + 2)); | |
520 | *dst.add(dstride) = chr_interp2(a, b, a0, a1); | |
521 | *dst.add(dstride + 1) = chr_interp2(b, c, a0, a1); | |
522 | if h == 4 { | |
523 | src = src.add(sstride * 2); | |
524 | dst = dst.add(dstride * 2); | |
525 | let (a, b, c) = (*src, *src.add(1), *src.add(2)); | |
526 | *dst = chr_interp2(a, b, a0, a1); | |
527 | *dst.add(1) = chr_interp2(b, c, a0, a1); | |
528 | let (a, b, c) = (*src.add(sstride), *src.add(sstride + 1), *src.add(sstride + 2)); | |
529 | *dst.add(dstride) = chr_interp2(a, b, a0, a1); | |
530 | *dst.add(dstride + 1) = chr_interp2(b, c, a0, a1); | |
531 | } | |
532 | } | |
533 | } else { | |
534 | unsafe { | |
535 | let height = h; | |
536 | let mut src0 = src.as_ptr(); | |
537 | let mut src1 = src0.add(sstride); | |
538 | let mut dst = dst.as_mut_ptr(); | |
539 | ||
540 | let (a, b, c) = (*src0, *src0.add(1), *src0.add(2)); | |
541 | let (d, e, f) = (*src1, *src1.add(1), *src1.add(2)); | |
542 | let (g, h, i) = (*src1.add(sstride), *src1.add(sstride + 1), *src1.add(sstride + 2)); | |
543 | *dst = chr_interp4(a, b, d, e, a0, a1, b0, b1); | |
544 | *dst.add(1) = chr_interp4(b, c, e, f, a0, a1, b0, b1); | |
545 | *dst.add(dstride) = chr_interp4(d, e, g, h, a0, a1, b0, b1); | |
546 | *dst.add(dstride + 1) = chr_interp4(e, f, h, i, a0, a1, b0, b1); | |
547 | if height == 4 { | |
548 | src0 = src0.add(sstride * 3); | |
549 | src1 = src1.add(sstride * 3); | |
550 | dst = dst.add(dstride * 2); | |
551 | let (a, b, c) = (*src0, *src0.add(1), *src0.add(2)); | |
552 | let (d, e, f) = (*src1, *src1.add(1), *src1.add(2)); | |
553 | *dst = chr_interp4(g, h, a, b, a0, a1, b0, b1); | |
554 | *dst.add(1) = chr_interp4(h, i, b, c, a0, a1, b0, b1); | |
555 | *dst.add(dstride) = chr_interp4(a, b, d, e, a0, a1, b0, b1); | |
556 | *dst.add(dstride + 1) = chr_interp4(b, c, e, f, a0, a1, b0, b1); | |
557 | } | |
558 | } | |
559 | } | |
560 | } | |
561 |