]>
Commit | Line | Data |
---|---|---|
999fbb83 KS |
1 | use nihav_codec_support::codecs::blockdsp::*; |
2 | ||
3 | use super::clip_u8; | |
4 | ||
5 | const TMP_BUF_STRIDE: usize = 32; | |
6 | ||
7 | fn interp_block1(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool, avg0: bool) { | |
8 | ||
9 | let step = if hor { 1 } else { sstride }; | |
10 | let mut idx = 0; | |
11 | let avgidx = if avg0 { step * 2 } else { step * 3 }; | |
12 | ||
13 | for dline in dst.chunks_mut(dstride).take(h) { | |
14 | for (x, pix) in dline.iter_mut().take(w).enumerate() { | |
15 | let t = clip_u8(( i16::from(src[idx + x]) | |
16 | - 5 * i16::from(src[idx + x + step]) | |
17 | + 20 * i16::from(src[idx + x + step * 2]) | |
18 | + 20 * i16::from(src[idx + x + step * 3]) | |
19 | - 5 * i16::from(src[idx + x + step * 4]) | |
20 | + i16::from(src[idx + x + step * 5]) | |
21 | + 16) >> 5); | |
22 | *pix = ((u16::from(t) + u16::from(src[idx + x + avgidx]) + 1) >> 1) as u8; | |
23 | } | |
24 | idx += sstride; | |
25 | } | |
26 | } | |
27 | ||
28 | fn interp_block2(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize, hor: bool) { | |
29 | let step = if hor { 1 } else { sstride }; | |
30 | let mut idx = 0; | |
31 | for dline in dst.chunks_mut(dstride).take(h) { | |
32 | for (x, pix) in dline.iter_mut().take(w).enumerate() { | |
33 | *pix = clip_u8(( i16::from(src[idx + x]) | |
34 | - 5 * i16::from(src[idx + x + step]) | |
35 | + 20 * i16::from(src[idx + x + step * 2]) | |
36 | + 20 * i16::from(src[idx + x + step * 3]) | |
37 | - 5 * i16::from(src[idx + x + step * 4]) | |
38 | + i16::from(src[idx + x + step * 5]) | |
39 | + 16) >> 5); | |
40 | } | |
41 | idx += sstride; | |
42 | } | |
43 | } | |
44 | ||
45 | fn mc_avg_tmp(dst: &mut [u8], dstride: usize, w: usize, h: usize, tmp: &[u8], tmp2: &[u8]) { | |
46 | for (dline, (sline0, sline1)) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE).zip(tmp2.chunks(TMP_BUF_STRIDE))).take(h) { | |
47 | for (pix, (&a, &b)) in dline.iter_mut().zip(sline0.iter().zip(sline1.iter())).take(w) { | |
48 | *pix = ((u16::from(a) + u16::from(b) + 1) >> 1) as u8; | |
49 | } | |
50 | } | |
51 | } | |
52 | ||
53 | fn h264_mc00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
54 | for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { | |
55 | dline[..w].copy_from_slice(&sline[..w]); | |
56 | } | |
57 | } | |
58 | ||
59 | fn h264_mc01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
60 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, true); | |
61 | } | |
62 | ||
63 | fn h264_mc02(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
64 | interp_block2(dst, dstride, &src[sstride * 2..], sstride, w, h, true); | |
65 | } | |
66 | ||
67 | fn h264_mc03(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
68 | interp_block1(dst, dstride, &src[sstride * 2..], sstride, w, h, true, false); | |
69 | } | |
70 | ||
71 | fn h264_mc10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
72 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, true); | |
73 | } | |
74 | ||
75 | fn h264_mc11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
76 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
77 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
78 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
79 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
80 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
81 | } | |
82 | ||
83 | fn h264_mc12(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
84 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
85 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
86 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
87 | h264_mc22(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
88 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
89 | } | |
90 | ||
91 | fn h264_mc13(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
92 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
93 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
94 | h264_mc02(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
95 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
96 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
97 | } | |
98 | ||
99 | fn h264_mc20(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
100 | interp_block2(dst, dstride, &src[2..], sstride, w, h, false); | |
101 | } | |
102 | ||
103 | fn h264_mc21(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
104 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
105 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
106 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
107 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, src, sstride, w, h); | |
108 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
109 | } | |
110 | ||
111 | fn h264_mc22(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
112 | let mut tmp = [0i32; TMP_BUF_STRIDE * 16]; | |
113 | let mut idx = 0; | |
114 | for dline in tmp.chunks_mut(TMP_BUF_STRIDE).take(h) { | |
115 | for (x, pix) in dline.iter_mut().take(w + 5).enumerate() { | |
116 | *pix = i32::from(src[idx + x]) | |
117 | - 5 * i32::from(src[idx + x + sstride]) | |
118 | + 20 * i32::from(src[idx + x + sstride * 2]) | |
119 | + 20 * i32::from(src[idx + x + sstride * 3]) | |
120 | - 5 * i32::from(src[idx + x + sstride * 4]) | |
121 | + i32::from(src[idx + x + sstride * 5]); | |
122 | } | |
123 | idx += sstride; | |
124 | } | |
125 | for (dline, sline) in dst.chunks_mut(dstride).zip(tmp.chunks(TMP_BUF_STRIDE)).take(h) { | |
126 | for (x, pix) in dline.iter_mut().take(w).enumerate() { | |
127 | *pix = clip_u8(((sline[x] - 5 * sline[x + 1] + 20 * sline[x + 2] + 20 * sline[x + 3] - 5 * sline[x + 4] + sline[x + 5] + 512) >> 10) as i16); | |
128 | } | |
129 | } | |
130 | } | |
131 | ||
132 | fn h264_mc23(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
133 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
134 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
135 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
136 | h264_mc20(&mut tmp2, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
137 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
138 | } | |
139 | ||
140 | fn h264_mc30(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
141 | interp_block1(dst, dstride, &src[2..], sstride, w, h, false, false); | |
142 | } | |
143 | ||
144 | fn h264_mc31(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
145 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
146 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
147 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
148 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
149 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
150 | } | |
151 | ||
152 | fn h264_mc32(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
153 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
154 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
155 | h264_mc22(&mut tmp, TMP_BUF_STRIDE, src, sstride, w, h); | |
156 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
157 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
158 | } | |
159 | ||
160 | fn h264_mc33(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
161 | let mut tmp = [0u8; TMP_BUF_STRIDE * 16]; | |
162 | let mut tmp2 = [0u8; TMP_BUF_STRIDE * 16]; | |
163 | h264_mc20(&mut tmp, TMP_BUF_STRIDE, &src[1..], sstride, w, h); | |
164 | h264_mc02(&mut tmp2, TMP_BUF_STRIDE, &src[sstride..], sstride, w, h); | |
165 | mc_avg_tmp(dst, dstride, w, h, &tmp, &tmp2); | |
166 | } | |
167 | ||
168 | ||
169 | pub fn chroma_interp(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, dx: u16, dy: u16, w: usize, h: usize) { | |
170 | let a0 = 8 - dx; | |
171 | let a1 = dx; | |
172 | let b0 = 8 - dy; | |
173 | let b1 = dy; | |
174 | ||
175 | let src1 = &src[sstride..]; | |
176 | if a0 == 8 && b0 == 8 { | |
177 | for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { | |
178 | drow[..w].copy_from_slice(&line[..w]); | |
179 | } | |
180 | } else if a0 == 8 { | |
181 | for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) { | |
182 | for (pix, (&a, &b)) in drow.iter_mut().take(w).zip(line0.iter().zip(line1.iter())) { | |
183 | *pix = ((u16::from(a) * b0 + u16::from(b) * b1 + 4) >> 3) as u8; | |
184 | } | |
185 | } | |
186 | } else if b0 == 8 { | |
187 | for (drow, line) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(h) { | |
188 | let mut a = line[0]; | |
189 | for (pix, &b) in drow.iter_mut().take(w).zip(line.iter().skip(1)) { | |
190 | *pix = ((u16::from(a) * a0 + u16::from(b) * a1 + 4) >> 3) as u8; | |
191 | a = b; | |
192 | } | |
193 | } | |
194 | } else { | |
195 | for (drow, (line0, line1)) in dst.chunks_mut(dstride).zip(src.chunks(sstride).zip(src1.chunks(sstride))).take(h) { | |
196 | let mut a = line0[0]; | |
197 | let mut c = line1[0]; | |
198 | for (pix, (&b, &d)) in drow.iter_mut().take(w).zip(line0[1..].iter().zip(line1[1..].iter())) { | |
199 | *pix = ((u16::from(a) * a0 * b0 + u16::from(b) * a1 * b0 + u16::from(c) * a0 * b1 + u16::from(d) * a1 * b1 + 0x20) >> 6) as u8; | |
200 | a = b; | |
201 | c = d; | |
202 | } | |
203 | } | |
204 | } | |
205 | } | |
206 | ||
207 | pub const H264_LUMA_INTERP: &[BlkInterpFunc] = &[ | |
208 | h264_mc00, h264_mc01, h264_mc02, h264_mc03, | |
209 | h264_mc10, h264_mc11, h264_mc12, h264_mc13, | |
210 | h264_mc20, h264_mc21, h264_mc22, h264_mc23, | |
211 | h264_mc30, h264_mc31, h264_mc32, h264_mc33 | |
212 | ]; |