]>
Commit | Line | Data |
---|---|---|
1 | use super::clip_u8; | |
2 | ||
3 | const TMP_BUF_STRIDE: usize = 32; | |
4 | ||
5 | fn interp_block1(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool) { | |
6 | ||
7 | let step = if hor { 1 } else { sstride }; | |
8 | let mut idx = 0; | |
9 | let avgidx = if avg0 { step * 2 } else { step * 3 }; | |
10 | ||
11 | for dline in dst.chunks_mut(dstride).take(h) { | |
12 | for (x, pix) in dline.iter_mut().take(w).enumerate() { | |
13 | let t = clip_u8(( i16::from(src[idx + x]) | |
14 | - 5 * i16::from(src[idx + x + step]) | |
15 | + 20 * i16::from(src[idx + x + step * 2]) | |
16 | + 20 * i16::from(src[idx + x + step * 3]) | |
17 | - 5 * i16::from(src[idx + x + step * 4]) | |
18 | + i16::from(src[idx + x + step * 5]) | |
19 | + 16) >> 5); | |
20 | *pix = ((u16::from(t) + u16::from(src[idx + x + avgidx]) + 1) >> 1) as u8; | |
21 | } | |
22 | idx += sstride; | |
23 | } | |
24 | } | |
25 | ||
26 | fn interp_block2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool) { | |
27 | let step = if hor { 1 } else { sstride }; | |
28 | let mut idx = 0; | |
29 | for dline in dst.chunks_mut(dstride).take(h) { | |
30 | for (x, pix) in dline.iter_mut().take(w).enumerate() { | |
31 | *pix = clip_u8(( i16::from(src[idx + x]) | |
32 | - 5 * i16::from(src[idx + x + step]) | |
33 | + 20 * i16::from(src[idx + x + step * 2]) | |
34 | + 20 * i16::from(src[idx + x + step * 3]) | |
35 | - 5 * i16::from(src[idx + x + step * 4]) | |
36 | + i16::from(src[idx + x + step * 5]) | |
37 | + 16) >> 5); | |
38 | } | |
39 | idx += sstride; | |
40 | } | |
41 | } | |
42 | ||
43 | fn mc_avg_tmp(dst: &mut [u8], dstride: usize, w: usize, h: usize, tmp: &[u8], tmp2: &[u8]) { | |
44 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE).zip(tmp2.chunks(TMP_BUF_STRIDE))).take(h) { | |
45 | for (pix, (&a, &b)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(w) { | |
46 | *pix = ((u16::from(a) + u16::from(b) + 1) >> 1) as u8; | |
47 | } | |
48 | } | |
49 | } | |
50 | ||
51 | fn h264_mc00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
52 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { | |
53 | dline[..w].copy_from_slice(&sline[..w]); | |
54 | } | |
55 | } | |
56 | ||
57 | fn h264_mc01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
58 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true); | |
59 | } | |
60 | ||
61 | fn h264_mc02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
62 | interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true); | |
63 | } | |
64 | ||
65 | fn h264_mc03(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
66 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false); | |
67 | } | |
68 | ||
69 | fn h264_mc10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
70 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true); | |
71 | } | |
72 | ||
73 | fn h264_mc11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
74 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
75 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
76 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
77 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
78 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
79 | } | |
80 | ||
81 | fn h264_mc12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
82 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
83 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
84 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
85 | h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
86 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
87 | } | |
88 | ||
89 | fn h264_mc13(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
90 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
91 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
92 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
93 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
94 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
95 | } | |
96 | ||
97 | fn h264_mc20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
98 | interp_block2(dst, dstride, &src[2..], sstride, w, h, false); | |
99 | } | |
100 | ||
101 | fn h264_mc21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
102 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
103 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
104 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
105 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
106 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
107 | } | |
108 | ||
109 | fn h264_mc22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
110 | let mut tmp = [0i32; TMP_BUF_STRIDE * 16]; | |
111 | let mut idx = 0; | |
112 | for dline in tmp.chunks_mut(TMP_BUF_STRIDE).take(h) { | |
113 | for (x, pix) in dline.iter_mut().take(w + 5).enumerate() { | |
114 | *pix = i32::from(src[idx + x]) | |
115 | - 5 * i32::from(src[idx + x + sstride]) | |
116 | + 20 * i32::from(src[idx + x + sstride * 2]) | |
117 | + 20 * i32::from(src[idx + x + sstride * 3]) | |
118 | - 5 * i32::from(src[idx + x + sstride * 4]) | |
119 | + i32::from(src[idx + x + sstride * 5]); | |
120 | } | |
121 | idx += sstride; | |
122 | } | |
123 | for (dline, sline) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE)).take(h) { | |
124 | for (x, pix) in dline.iter_mut().take(w).enumerate() { | |
125 | *pix = clip_u8(((sline[x] - 5 * sline[x + 1] + 20 * sline[x + 2] + 20 * sline[x + 3] - 5 * sline[x + 4] + sline[x + 5] + 512) >> 10) as i16); | |
126 | } | |
127 | } | |
128 | } | |
129 | ||
130 | fn h264_mc23(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
131 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
132 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
133 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
134 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
135 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
136 | } | |
137 | ||
138 | fn h264_mc30(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
139 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false); | |
140 | } | |
141 | ||
142 | fn h264_mc31(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
143 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
144 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
145 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
146 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
147 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
148 | } | |
149 | ||
150 | fn h264_mc32(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
151 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
152 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
153 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
154 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
155 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
156 | } | |
157 | ||
158 | fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
159 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
160 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
161 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
162 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
163 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
164 | } | |
165 | ||
166 | ||
167 | pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { | |
168 | let a0 = 8 - dx; | |
169 | let a1 = dx; | |
170 | let b0 = 8 - dy; | |
171 | let b1 = dy; | |
172 | ||
173 | let src1 = &src[sstride..]; | |
174 | if a0 == 8 && b0 == 8 { | |
175 | for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { | |
176 | drow[..w].copy_from_slice(&line[..w]); | |
177 | } | |
178 | } else if a0 == 8 { | |
179 | for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) { | |
180 | for (pix, (&a, &b)) in drow.iter_mut().take(w).zip(line0.iter().zip(line1.iter())) { | |
181 | *pix = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8; | |
182 | } | |
183 | } | |
184 | } else if b0 == 8 { | |
185 | for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { | |
186 | let mut a = line[0]; | |
187 | for (pix, &b) in drow.iter_mut().take(w).zip(line.iter().skip(1)) { | |
188 | *pix = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8; | |
189 | a = b; | |
190 | } | |
191 | } | |
192 | } else { | |
193 | for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) { | |
194 | let mut a = line0[0]; | |
195 | let mut c = line1[0]; | |
196 | for (pix, (&b, &d)) in drow.iter_mut().take(w).zip(line0[1..].iter().zip(line1[1..].iter())) { | |
197 | *pix = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8; | |
198 | a = b; | |
199 | c = d; | |
200 | } | |
201 | } | |
202 | } | |
203 | } | |
204 | ||
205 | macro_rules! luma_mc { | |
206 | ($orig:ident, $func4:ident, $func8:ident, $func16:ident) => { | |
207 | fn $func4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { | |
208 | $orig(dst, dstride, src, sstride, 4, h); | |
209 | } | |
210 | fn $func8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { | |
211 | $orig(dst, dstride, src, sstride, 8, h); | |
212 | } | |
213 | fn $func16(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) { | |
214 | $orig(dst, dstride, src, sstride, 16, h); | |
215 | } | |
216 | } | |
217 | } | |
218 | ||
219 | luma_mc!(h264_mc00, h264_mc00_4, h264_mc00_8, h264_mc00_16); | |
220 | luma_mc!(h264_mc01, h264_mc01_4, h264_mc01_8, h264_mc01_16); | |
221 | luma_mc!(h264_mc02, h264_mc02_4, h264_mc02_8, h264_mc02_16); | |
222 | luma_mc!(h264_mc03, h264_mc03_4, h264_mc03_8, h264_mc03_16); | |
223 | luma_mc!(h264_mc10, h264_mc10_4, h264_mc10_8, h264_mc10_16); | |
224 | luma_mc!(h264_mc11, h264_mc11_4, h264_mc11_8, h264_mc11_16); | |
225 | luma_mc!(h264_mc12, h264_mc12_4, h264_mc12_8, h264_mc12_16); | |
226 | luma_mc!(h264_mc13, h264_mc13_4, h264_mc13_8, h264_mc13_16); | |
227 | luma_mc!(h264_mc20, h264_mc20_4, h264_mc20_8, h264_mc20_16); | |
228 | luma_mc!(h264_mc21, h264_mc21_4, h264_mc21_8, h264_mc21_16); | |
229 | luma_mc!(h264_mc22, h264_mc22_4, h264_mc22_8, h264_mc22_16); | |
230 | luma_mc!(h264_mc23, h264_mc23_4, h264_mc23_8, h264_mc23_16); | |
231 | luma_mc!(h264_mc30, h264_mc30_4, h264_mc30_8, h264_mc30_16); | |
232 | luma_mc!(h264_mc31, h264_mc31_4, h264_mc31_8, h264_mc31_16); | |
233 | luma_mc!(h264_mc32, h264_mc32_4, h264_mc32_8, h264_mc32_16); | |
234 | luma_mc!(h264_mc33, h264_mc33_4, h264_mc33_8, h264_mc33_16); | |
235 | ||
236 | pub const H264_LUMA_INTERP: &[[super::MCFunc; 16]; 3] = &[ | |
237 | [ | |
238 | h264_mc00_4, h264_mc01_4, h264_mc02_4, h264_mc03_4, | |
239 | h264_mc10_4, h264_mc11_4, h264_mc12_4, h264_mc13_4, | |
240 | h264_mc20_4, h264_mc21_4, h264_mc22_4, h264_mc23_4, | |
241 | h264_mc30_4, h264_mc31_4, h264_mc32_4, h264_mc33_4 | |
242 | ], [ | |
243 | h264_mc00_8, h264_mc01_8, h264_mc02_8, h264_mc03_8, | |
244 | h264_mc10_8, h264_mc11_8, h264_mc12_8, h264_mc13_8, | |
245 | h264_mc20_8, h264_mc21_8, h264_mc22_8, h264_mc23_8, | |
246 | h264_mc30_8, h264_mc31_8, h264_mc32_8, h264_mc33_8 | |
247 | ], [ | |
248 | h264_mc00_16, h264_mc01_16, h264_mc02_16, h264_mc03_16, | |
249 | h264_mc10_16, h264_mc11_16, h264_mc12_16, h264_mc13_16, | |
250 | h264_mc20_16, h264_mc21_16, h264_mc22_16, h264_mc23_16, | |
251 | h264_mc30_16, h264_mc31_16, h264_mc32_16, h264_mc33_16 | |
252 | ] | |
253 | ]; | |
254 | ||
255 | impl super::RegisterSIMD for super::H264MC { | |
256 | fn register_simd(&mut self) {} | |
257 | } |