]> git.nihav.org Git - nihav.git/blob - nihav-itu/src/codecs/h264/dsp/mc/release.rs
h264/dsp: split chroma_interp() by width
[nihav.git] / nihav-itu / src / codecs / h264 / dsp / mc / release.rs
1 use super::clip_u8;
2
3 const TMP_BUF_STRIDE: usize = 32;
4
5 fn interp_block1(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool) {
6 unsafe {
7 let step = if hor { 1 } else { sstride };
8 let avgidx = if avg0 { step * 2 } else { step * 3 };
9 let mut src = src.as_ptr();
10 let mut dst = dst.as_mut_ptr();
11 for _ in 0..h {
12 for _ in 0..w {
13 let t = clip_u8(( i16::from(*src)
14 - 5 * i16::from(*src.add(step))
15 + 20 * i16::from(*src.add(step * 2))
16 + 20 * i16::from(*src.add(step * 3))
17 - 5 * i16::from(*src.add(step * 4))
18 + i16::from(*src.add(step * 5))
19 + 16) >> 5);
20 *dst = ((u16::from(t) + u16::from(*src.add(avgidx)) + 1) >> 1) as u8;
21 src = src.add(1);
22 dst = dst.add(1);
23 }
24 dst = dst.sub(w).add(dstride);
25 src = src.sub(w).add(sstride);
26 }
27 }
28 }
29
30 fn interp_block2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool) {
31 unsafe {
32 let step = if hor { 1 } else { sstride };
33 let mut pix = dst.as_mut_ptr();
34 let mut src = src.as_ptr();
35 for _ in 0..h {
36 for x in 0..w {
37 *pix.add(x) = clip_u8(( i16::from(*src)
38 - 5 * i16::from(*src.add(step))
39 + 20 * i16::from(*src.add(step * 2))
40 + 20 * i16::from(*src.add(step * 3))
41 - 5 * i16::from(*src.add(step * 4))
42 + i16::from(*src.add(step * 5))
43 + 16) >> 5);
44 src = src.add(1);
45 }
46 pix = pix.add(dstride);
47 src = src.sub(w);
48 src = src.add(sstride);
49 }
50 }
51 }
52
53 fn mc_avg_tmp(dst: &mut [u8], dstride: usize, w: usize, h: usize, tmp: &[u8], tmp2: &[u8]) {
54 unsafe {
55 let mut src1 = tmp.as_ptr();
56 let mut src2 = tmp2.as_ptr();
57 let mut dst = dst.as_mut_ptr();
58 for _ in 0..h {
59 for x in 0..w {
60 let a = *src1.add(x);
61 let b = *src2.add(x);
62 *dst.add(x) = ((u16::from(a) + u16::from(b) + 1) >> 1) as u8;
63 }
64 dst = dst.add(dstride);
65 src1 = src1.add(TMP_BUF_STRIDE);
66 src2 = src2.add(TMP_BUF_STRIDE);
67 }
68 }
69 }
70
71 fn h264_mc00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
72 unsafe {
73 let mut src = src.as_ptr();
74 let mut dst = dst.as_mut_ptr();
75 for _ in 0..h {
76 std::ptr::copy_nonoverlapping(src, dst, w);
77 src = src.add(sstride);
78 dst = dst.add(dstride);
79 }
80 }
81 }
82
83 fn h264_mc01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
84 interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true);
85 }
86
87 fn h264_mc02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
88 interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true);
89 }
90
91 fn h264_mc03(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
92 interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false);
93 }
94
95 fn h264_mc10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
96 interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true);
97 }
98
99 fn h264_mc11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
100 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
101 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
102 h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
103 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h);
104 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
105 }
106
107 fn h264_mc12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
108 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
109 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
110 h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
111 h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h);
112 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
113 }
114
115 fn h264_mc13(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
116 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
117 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
118 h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
119 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h);
120 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
121 }
122
123 fn h264_mc20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
124 interp_block2(dst, dstride, &src[2..], sstride, w, h, false);
125 }
126
127 fn h264_mc21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
128 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
129 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
130 h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
131 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h);
132 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
133 }
134
135 fn h264_mc22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
136 let mut tmp: [i32; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
137 unsafe {
138 let mut src = src.as_ptr();
139 let mut dst = tmp.as_mut_ptr();
140 for _ in 0..h {
141 for _ in 0..w+5 {
142 *dst = i32::from(*src)
143 - 5 * i32::from(*src.add(sstride))
144 + 20 * i32::from(*src.add(sstride * 2))
145 + 20 * i32::from(*src.add(sstride * 3))
146 - 5 * i32::from(*src.add(sstride * 4))
147 + i32::from(*src.add(sstride * 5));
148 dst = dst.add(1);
149 src = src.add(1);
150 }
151 src = src.sub(w+5).add(sstride);
152 dst = dst.sub(w+5).add(TMP_BUF_STRIDE);
153 }
154 }
155 unsafe {
156 let mut dst = dst.as_mut_ptr();
157 let mut src = tmp.as_ptr();
158 for _ in 0..h {
159 for _ in 0..w {
160 *dst = clip_u8(((*src - 5 * *src.add(1) + 20 * *src.add(2) + 20 * *src.add(3) - 5 * *src.add(4) + *src.add(5) + 512) >> 10) as i16);
161 dst = dst.add(1);
162 src = src.add(1);
163 }
164 dst = dst.sub(w).add(dstride);
165 src = src.sub(w).add(TMP_BUF_STRIDE);
166 }
167 }
168 }
169
170 fn h264_mc23(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
171 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
172 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
173 h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
174 h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h);
175 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
176 }
177
178 fn h264_mc30(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
179 interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false);
180 }
181
182 fn h264_mc31(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
183 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
184 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
185 h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
186 h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h);
187 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
188 }
189
190 fn h264_mc32(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
191 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
192 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
193 h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h);
194 h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h);
195 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
196 }
197
198 fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
199 let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
200 let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() };
201 h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h);
202 h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h);
203 mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2);
204 }
205
206
207 fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) {
208 let a0 = 8 - dx;
209 let a1 = dx;
210 let b0 = 8 - dy;
211 let b1 = dy;
212
213 if a0 == 8 && b0 == 8 {
214 unsafe {
215 let mut src = src.as_ptr();
216 let mut dst = dst.as_mut_ptr();
217 for _ in 0..h {
218 std::ptr::copy_nonoverlapping(src, dst, w);
219 src = src.add(sstride);
220 dst = dst.add(dstride);
221 }
222 }
223 } else if a0 == 8 {
224 unsafe {
225 let mut src0 = src.as_ptr();
226 let mut src1 = src0.add(sstride);
227 let mut dst = dst.as_mut_ptr();
228 for _ in 0..h {
229 for x in 0..w {
230 let a = *src0.add(x);
231 let b = *src1.add(x);
232 *dst.add(x) = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8;
233 }
234 src0 = src0.add(sstride);
235 src1 = src1.add(sstride);
236 dst = dst.add(dstride);
237 }
238 }
239 } else if b0 == 8 {
240 unsafe {
241 let mut src = src.as_ptr();
242 let mut dst = dst.as_mut_ptr();
243 for _ in 0..h {
244 let mut a = *src;
245 for x in 0..w {
246 let b = *src.add(x + 1);
247 *dst.add(x) = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8;
248 a = b;
249 }
250 src = src.add(sstride);
251 dst = dst.add(dstride);
252 }
253 }
254 } else {
255 unsafe {
256 let mut src0 = src.as_ptr();
257 let mut src1 = src0.add(sstride);
258 let mut dst = dst.as_mut_ptr();
259 for _ in 0..h {
260 let mut a = *src0;
261 let mut c = *src1;
262 for x in 0..w {
263 let b = *src0.add(x + 1);
264 let d = *src1.add(x + 1);
265 *dst.add(x) = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8;
266 a = b;
267 c = d;
268 }
269 src0 = src0.add(sstride);
270 src1 = src1.add(sstride);
271 dst = dst.add(dstride);
272 }
273 }
274 }
275 }
276
277 pub fn chroma_interp_8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) {
278 chroma_interp(dst, dstride, src, sstride, dx, dy, 8, h);
279 }
280
281 pub fn chroma_interp_4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) {
282 chroma_interp(dst, dstride, src, sstride, dx, dy, 4, h);
283 }
284
285 pub fn chroma_interp_2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, h: usize) {
286 chroma_interp(dst, dstride, src, sstride, dx, dy, 2, h);
287 }
288
289 macro_rules! luma_mc {
290 ($orig:ident, $func4:ident, $func8:ident, $func16:ident) => {
291 fn $func4(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {
292 $orig(dst, dstride, src, sstride, 4, h);
293 }
294 fn $func8(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {
295 $orig(dst, dstride, src, sstride, 8, h);
296 }
297 fn $func16(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, h: usize) {
298 $orig(dst, dstride, src, sstride, 16, h);
299 }
300 }
301 }
302
303 luma_mc!(h264_mc00, h264_mc00_4, h264_mc00_8, h264_mc00_16);
304 luma_mc!(h264_mc01, h264_mc01_4, h264_mc01_8, h264_mc01_16);
305 luma_mc!(h264_mc02, h264_mc02_4, h264_mc02_8, h264_mc02_16);
306 luma_mc!(h264_mc03, h264_mc03_4, h264_mc03_8, h264_mc03_16);
307 luma_mc!(h264_mc10, h264_mc10_4, h264_mc10_8, h264_mc10_16);
308 luma_mc!(h264_mc11, h264_mc11_4, h264_mc11_8, h264_mc11_16);
309 luma_mc!(h264_mc12, h264_mc12_4, h264_mc12_8, h264_mc12_16);
310 luma_mc!(h264_mc13, h264_mc13_4, h264_mc13_8, h264_mc13_16);
311 luma_mc!(h264_mc20, h264_mc20_4, h264_mc20_8, h264_mc20_16);
312 luma_mc!(h264_mc21, h264_mc21_4, h264_mc21_8, h264_mc21_16);
313 luma_mc!(h264_mc22, h264_mc22_4, h264_mc22_8, h264_mc22_16);
314 luma_mc!(h264_mc23, h264_mc23_4, h264_mc23_8, h264_mc23_16);
315 luma_mc!(h264_mc30, h264_mc30_4, h264_mc30_8, h264_mc30_16);
316 luma_mc!(h264_mc31, h264_mc31_4, h264_mc31_8, h264_mc31_16);
317 luma_mc!(h264_mc32, h264_mc32_4, h264_mc32_8, h264_mc32_16);
318 luma_mc!(h264_mc33, h264_mc33_4, h264_mc33_8, h264_mc33_16);
319
320 pub const H264_LUMA_INTERP: &[[super::MCFunc; 16]; 3] = &[
321 [
322 h264_mc00_4, h264_mc01_4, h264_mc02_4, h264_mc03_4,
323 h264_mc10_4, h264_mc11_4, h264_mc12_4, h264_mc13_4,
324 h264_mc20_4, h264_mc21_4, h264_mc22_4, h264_mc23_4,
325 h264_mc30_4, h264_mc31_4, h264_mc32_4, h264_mc33_4
326 ], [
327 h264_mc00_8, h264_mc01_8, h264_mc02_8, h264_mc03_8,
328 h264_mc10_8, h264_mc11_8, h264_mc12_8, h264_mc13_8,
329 h264_mc20_8, h264_mc21_8, h264_mc22_8, h264_mc23_8,
330 h264_mc30_8, h264_mc31_8, h264_mc32_8, h264_mc33_8
331 ], [
332 h264_mc00_16, h264_mc01_16, h264_mc02_16, h264_mc03_16,
333 h264_mc10_16, h264_mc11_16, h264_mc12_16, h264_mc13_16,
334 h264_mc20_16, h264_mc21_16, h264_mc22_16, h264_mc23_16,
335 h264_mc30_16, h264_mc31_16, h264_mc32_16, h264_mc33_16
336 ]
337 ];
338
339 impl super::RegisterSIMD for super::H264MC {
340 fn register_simd(&mut self) {}
341 }