]>
Commit | Line | Data |
---|---|---|
999fbb83 KS |
1 | use nihav_codec_support::codecs::blockdsp::*; |
2 | ||
3 | use super::clip_u8; | |
4 | ||
5 | const TMP_BUF_STRIDE: usize = 32; | |
6 | ||
7 | fn interp_block1(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool) { | |
8 | unsafe { | |
9 | let step = if hor { 1 } else { sstride }; | |
10 | let avgidx = if avg0 { step * 2 } else { step * 3 }; | |
11 | let mut src = src.as_ptr(); | |
12 | let mut dst = dst.as_mut_ptr(); | |
13 | for _ in 0..h { | |
14 | for _ in 0..w { | |
15 | let t = clip_u8(( i16::from(*src) | |
16 | - 5 * i16::from(*src.add(step)) | |
17 | + 20 * i16::from(*src.add(step * 2)) | |
18 | + 20 * i16::from(*src.add(step * 3)) | |
19 | - 5 * i16::from(*src.add(step * 4)) | |
20 | + i16::from(*src.add(step * 5)) | |
21 | + 16) >> 5); | |
22 | *dst = ((u16::from(t) + u16::from(*src.add(avgidx)) + 1) >> 1) as u8; | |
23 | src = src.add(1); | |
24 | dst = dst.add(1); | |
25 | } | |
26 | dst = dst.sub(w).add(dstride); | |
27 | src = src.sub(w).add(sstride); | |
28 | } | |
29 | } | |
30 | } | |
31 | ||
32 | fn interp_block2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool) { | |
33 | unsafe { | |
34 | let step = if hor { 1 } else { sstride }; | |
35 | let mut pix = dst.as_mut_ptr(); | |
36 | let mut src = src.as_ptr(); | |
37 | for _ in 0..h { | |
38 | for x in 0..w { | |
39 | *pix.add(x) = clip_u8(( i16::from(*src) | |
40 | - 5 * i16::from(*src.add(step)) | |
41 | + 20 * i16::from(*src.add(step * 2)) | |
42 | + 20 * i16::from(*src.add(step * 3)) | |
43 | - 5 * i16::from(*src.add(step * 4)) | |
44 | + i16::from(*src.add(step * 5)) | |
45 | + 16) >> 5); | |
46 | src = src.add(1); | |
47 | } | |
48 | pix = pix.add(dstride); | |
49 | src = src.sub(w); | |
50 | src = src.add(sstride); | |
51 | } | |
52 | } | |
53 | } | |
54 | ||
55 | fn mc_avg_tmp(dst: &mut [u8], dstride: usize, w: usize, h: usize, tmp: &[u8], tmp2: &[u8]) { | |
56 | unsafe { | |
57 | let mut src1 = tmp.as_ptr(); | |
58 | let mut src2 = tmp2.as_ptr(); | |
59 | let mut dst = dst.as_mut_ptr(); | |
60 | for _ in 0..h { | |
61 | for x in 0..w { | |
62 | let a = *src1.add(x); | |
63 | let b = *src2.add(x); | |
64 | *dst.add(x) = ((u16::from(a) + u16::from(b) + 1) >> 1) as u8; | |
65 | } | |
66 | dst = dst.add(dstride); | |
67 | src1 = src1.add(TMP_BUF_STRIDE); | |
68 | src2 = src2.add(TMP_BUF_STRIDE); | |
69 | } | |
70 | } | |
71 | } | |
72 | ||
73 | fn h264_mc00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
74 | unsafe { | |
75 | let mut src = src.as_ptr(); | |
76 | let mut dst = dst.as_mut_ptr(); | |
77 | for _ in 0..h { | |
78 | std::ptr::copy_nonoverlapping(src, dst, w); | |
79 | src = src.add(sstride); | |
80 | dst = dst.add(dstride); | |
81 | } | |
82 | } | |
83 | } | |
84 | ||
85 | fn h264_mc01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
86 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true); | |
87 | } | |
88 | ||
89 | fn h264_mc02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
90 | interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true); | |
91 | } | |
92 | ||
93 | fn h264_mc03(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
94 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false); | |
95 | } | |
96 | ||
97 | fn h264_mc10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
98 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true); | |
99 | } | |
100 | ||
101 | fn h264_mc11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
102 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
103 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
104 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
105 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
106 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
107 | } | |
108 | ||
109 | fn h264_mc12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
110 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
111 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
112 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
113 | h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
114 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
115 | } | |
116 | ||
117 | fn h264_mc13(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
118 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
119 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
120 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
121 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
122 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
123 | } | |
124 | ||
125 | fn h264_mc20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
126 | interp_block2(dst, dstride, &src[2..], sstride, w, h, false); | |
127 | } | |
128 | ||
129 | fn h264_mc21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
130 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
131 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
132 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
133 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
134 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
135 | } | |
136 | ||
137 | fn h264_mc22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
138 | let mut tmp: [i32; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
139 | unsafe { | |
140 | let mut src = src.as_ptr(); | |
141 | let mut dst = tmp.as_mut_ptr(); | |
142 | for _ in 0..h { | |
143 | for _ in 0..w+5 { | |
144 | *dst = i32::from(*src) | |
145 | - 5 * i32::from(*src.add(sstride)) | |
146 | + 20 * i32::from(*src.add(sstride * 2)) | |
147 | + 20 * i32::from(*src.add(sstride * 3)) | |
148 | - 5 * i32::from(*src.add(sstride * 4)) | |
149 | + i32::from(*src.add(sstride * 5)); | |
150 | dst = dst.add(1); | |
151 | src = src.add(1); | |
152 | } | |
153 | src = src.sub(w+5).add(sstride); | |
154 | dst = dst.sub(w+5).add(TMP_BUF_STRIDE); | |
155 | } | |
156 | } | |
157 | unsafe { | |
158 | let mut dst = dst.as_mut_ptr(); | |
159 | let mut src = tmp.as_ptr(); | |
160 | for _ in 0..h { | |
161 | for _ in 0..w { | |
162 | *dst = clip_u8(((*src - 5 * *src.add(1) + 20 * *src.add(2) + 20 * *src.add(3) - 5 * *src.add(4) + *src.add(5) + 512) >> 10) as i16); | |
163 | dst = dst.add(1); | |
164 | src = src.add(1); | |
165 | } | |
166 | dst = dst.sub(w).add(dstride); | |
167 | src = src.sub(w).add(TMP_BUF_STRIDE); | |
168 | } | |
169 | } | |
170 | } | |
171 | ||
172 | fn h264_mc23(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
173 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
174 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
175 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
176 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
177 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
178 | } | |
179 | ||
180 | fn h264_mc30(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
181 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false); | |
182 | } | |
183 | ||
184 | fn h264_mc31(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
185 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
186 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
187 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
188 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
189 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
190 | } | |
191 | ||
192 | fn h264_mc32(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
193 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
194 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
195 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
196 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
197 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
198 | } | |
199 | ||
200 | fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
201 | let mut tmp : [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
202 | let mut tmp2: [u8; TMP_BUF_STRIDE * 16] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; | |
203 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
204 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
205 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
206 | } | |
207 | ||
208 | ||
209 | pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { | |
210 | let a0 = 8 - dx; | |
211 | let a1 = dx; | |
212 | let b0 = 8 - dy; | |
213 | let b1 = dy; | |
214 | ||
215 | if a0 == 8 && b0 == 8 { | |
216 | unsafe { | |
217 | let mut src = src.as_ptr(); | |
218 | let mut dst = dst.as_mut_ptr(); | |
219 | for _ in 0..h { | |
220 | std::ptr::copy_nonoverlapping(src, dst, w); | |
221 | src = src.add(sstride); | |
222 | dst = dst.add(dstride); | |
223 | } | |
224 | } | |
225 | } else if a0 == 8 { | |
226 | unsafe { | |
227 | let mut src0 = src.as_ptr(); | |
228 | let mut src1 = src0.add(sstride); | |
229 | let mut dst = dst.as_mut_ptr(); | |
230 | for _ in 0..h { | |
231 | for x in 0..w { | |
232 | let a = *src0.add(x); | |
233 | let b = *src1.add(x); | |
234 | *dst.add(x) = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8; | |
235 | } | |
236 | src0 = src0.add(sstride); | |
237 | src1 = src1.add(sstride); | |
238 | dst = dst.add(dstride); | |
239 | } | |
240 | } | |
241 | } else if b0 == 8 { | |
242 | unsafe { | |
243 | let mut src = src.as_ptr(); | |
244 | let mut dst = dst.as_mut_ptr(); | |
245 | for _ in 0..h { | |
246 | let mut a = *src; | |
247 | for x in 0..w { | |
248 | let b = *src.add(x + 1); | |
249 | *dst.add(x) = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8; | |
250 | a = b; | |
251 | } | |
252 | src = src.add(sstride); | |
253 | dst = dst.add(dstride); | |
254 | } | |
255 | } | |
256 | } else { | |
257 | unsafe { | |
258 | let mut src0 = src.as_ptr(); | |
259 | let mut src1 = src0.add(sstride); | |
260 | let mut dst = dst.as_mut_ptr(); | |
261 | for _ in 0..h { | |
262 | let mut a = *src0; | |
263 | let mut c = *src1; | |
264 | for x in 0..w { | |
265 | let b = *src0.add(x + 1); | |
266 | let d = *src1.add(x + 1); | |
267 | *dst.add(x) = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8; | |
268 | a = b; | |
269 | c = d; | |
270 | } | |
271 | src0 = src0.add(sstride); | |
272 | src1 = src1.add(sstride); | |
273 | dst = dst.add(dstride); | |
274 | } | |
275 | } | |
276 | } | |
277 | } | |
278 | ||
279 | pub const H264_LUMA_INTERP: &[BlkInterpFunc] = &[ | |
280 | h264_mc00, h264_mc01, h264_mc02, h264_mc03, | |
281 | h264_mc10, h264_mc11, h264_mc12, h264_mc13, | |
282 | h264_mc20, h264_mc21, h264_mc22, h264_mc23, | |
283 | h264_mc30, h264_mc31, h264_mc32, h264_mc33 | |
284 | ]; |