]> git.nihav.org Git - nihav.git/blame_incremental - codecs/vp8dsp.rs
add MPEG-4 ASP decoder
[nihav.git] / codecs / vp8dsp.rs
... / ...
CommitLineData
1use nihav_core::frame::NAVideoBufferRef;
2use nihav_codec_support::codecs::blockdsp::edge_emu;
3
4fn clip_u8(val: i16) -> u8 {
5 val.max(0).min(255) as u8
6}
7
8fn delta(p1: i16, p0: i16, q0: i16, q1: i16) -> i16 {
9 ((p1 - q1).max(-128).min(127) + 3 * (q0 - p0)).max(-128).min(127)
10}
11
12pub type LoopFilterFunc = fn(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16);
13
14pub fn simple_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, _thr_inner: i16, _thr_hev: i16) {
15 for _ in 0..len {
16 let p1 = i16::from(buf[off - step * 2]);
17 let p0 = i16::from(buf[off - step * 1]);
18 let q0 = i16::from(buf[off + step * 0]);
19 let q1 = i16::from(buf[off + step * 1]);
20 let diff = (p0 - q0).abs() * 2 + ((p1 - q1).abs() >> 1);
21 if diff <= thr {
22 let diff = delta(p1, p0, q0, q1);
23 let diffq0 = (diff + 4).min(127) >> 3;
24 let diffp0 = (diff + 3).min(127) >> 3;
25 buf[off - step * 1] = clip_u8(p0 + diffp0);
26 buf[off + step * 0] = clip_u8(q0 - diffq0);
27 }
28 off += stride;
29 }
30}
31
32fn normal_loop_filter(buf: &mut [u8], mut off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16, edge: bool) {
33 for _i in 0..len {
34 let p1 = i16::from(buf[off - step * 2]);
35 let p0 = i16::from(buf[off - step * 1]);
36 let q0 = i16::from(buf[off + step * 0]);
37 let q1 = i16::from(buf[off + step * 1]);
38 let diff = (p0 - q0).abs() * 2 + ((p1 - q1).abs() >> 1);
39 if diff <= thr {
40 let p3 = i16::from(buf[off - step * 4]);
41 let p2 = i16::from(buf[off - step * 3]);
42 let p1 = i16::from(buf[off - step * 2]);
43 let q1 = i16::from(buf[off + step * 1]);
44 let q2 = i16::from(buf[off + step * 2]);
45 let q3 = i16::from(buf[off + step * 3]);
46 let dp2 = p3 - p2;
47 let dp1 = p2 - p1;
48 let dp0 = p1 - p0;
49 let dq0 = q1 - q0;
50 let dq1 = q2 - q1;
51 let dq2 = q3 - q2;
52 if (dp0.abs() <= thr_inner) && (dp1.abs() <= thr_inner) &&
53 (dp2.abs() <= thr_inner) && (dq0.abs() <= thr_inner) &&
54 (dq1.abs() <= thr_inner) && (dq2.abs() <= thr_inner) {
55 let high_edge_variation = (dp0.abs() > thr_hev) || (dq0.abs() > thr_hev);
56 if high_edge_variation {
57 let diff = delta(p1, p0, q0, q1);
58 let diffq0 = (diff + 4).min(127) >> 3;
59 let diffp0 = (diff + 3).min(127) >> 3;
60 buf[off - step * 1] = clip_u8(p0 + diffp0);
61 buf[off + step * 0] = clip_u8(q0 - diffq0);
62 } else if edge {
63 let d = delta(p1, p0, q0, q1);
64 let diff0 = (d * 27 + 63) >> 7;
65 buf[off - step * 1] = clip_u8(p0 + diff0);
66 buf[off + step * 0] = clip_u8(q0 - diff0);
67 let diff1 = (d * 18 + 63) >> 7;
68 buf[off - step * 2] = clip_u8(p1 + diff1);
69 buf[off + step * 1] = clip_u8(q1 - diff1);
70 let diff2 = (d * 9 + 63) >> 7;
71 buf[off - step * 3] = clip_u8(p2 + diff2);
72 buf[off + step * 2] = clip_u8(q2 - diff2);
73 } else {
74 let diff = (3 * (q0 - p0)).max(-128).min(127);
75 let diffq0 = (diff + 4).min(127) >> 3;
76 let diffp0 = (diff + 3).min(127) >> 3;
77 buf[off - step * 1] = clip_u8(p0 + diffp0);
78 buf[off + step * 0] = clip_u8(q0 - diffq0);
79 let diff2 = (diffq0 + 1) >> 1;
80 buf[off - step * 2] = clip_u8(p1 + diff2);
81 buf[off + step * 1] = clip_u8(q1 - diff2);
82 }
83 }
84 }
85 off += stride;
86 }
87}
88
89pub fn normal_loop_filter_inner(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) {
90 normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, false);
91}
92
93pub fn normal_loop_filter_edge(buf: &mut [u8], off: usize, step: usize, stride: usize, len: usize, thr: i16, thr_inner: i16, thr_hev: i16) {
94 normal_loop_filter(buf, off, step, stride, len, thr, thr_inner, thr_hev, true);
95}
96
97pub fn iwht4x4(coeffs: &mut [i16; 16]) {
98 for i in 0..4 {
99 let s0 = coeffs[i];
100 let s1 = coeffs[i + 4];
101 let s2 = coeffs[i + 8];
102 let s3 = coeffs[i + 12];
103 let a1 = s0 + s3;
104 let b1 = s1 + s2;
105 let c1 = s1 - s2;
106 let d1 = s0 - s3;
107 coeffs[i] = a1 + b1;
108 coeffs[i + 4] = c1 + d1;
109 coeffs[i + 8] = a1 - b1;
110 coeffs[i + 12] = d1 - c1;
111 }
112 for row in coeffs.chunks_mut(4) {
113 let a1 = row[0] + row[3];
114 let b1 = row[1] + row[2];
115 let c1 = row[1] - row[2];
116 let d1 = row[0] - row[3];
117 row[0] = (a1 + b1 + 3) >> 3;
118 row[1] = (c1 + d1 + 3) >> 3;
119 row[2] = (a1 - b1 + 3) >> 3;
120 row[3] = (d1 - c1 + 3) >> 3;
121 }
122}
123
124pub fn iwht4x4_dc(coeffs: &mut [i16; 16]) {
125 let dc = (coeffs[0] + 3) >> 3;
126 *coeffs = [dc; 16];
127}
128
129const COS_PI8_SQRT2_MINUS1: i32 = 20091;
130const SIN_PI8_SQRT2: i32 = 35468;
131
132macro_rules! idct4 {
133 ($s0: expr, $s1: expr, $s2: expr, $s3: expr, $shift: expr) => {{
134 let a1 = i32::from($s0) + i32::from($s2);
135 let b1 = i32::from($s0) - i32::from($s2);
136 let temp1 = (i32::from($s1) * SIN_PI8_SQRT2) >> 16;
137 let temp2 = i32::from($s3) + ((i32::from($s3) * COS_PI8_SQRT2_MINUS1) >> 16);
138 let c1 = temp1 - temp2;
139 let temp1 = i32::from($s1) + ((i32::from($s1) * COS_PI8_SQRT2_MINUS1) >> 16);
140 let temp2 = (i32::from($s3) * SIN_PI8_SQRT2) >> 16;
141 let d1 = temp1 + temp2;
142
143 let bias = (1 << $shift) >> 1;
144 $s0 = ((a1 + d1 + bias) >> $shift) as i16;
145 $s3 = ((a1 - d1 + bias) >> $shift) as i16;
146 $s1 = ((b1 + c1 + bias) >> $shift) as i16;
147 $s2 = ((b1 - c1 + bias) >> $shift) as i16;
148 }}
149}
150
151pub fn idct4x4(coeffs: &mut [i16; 16]) {
152 for i in 0..4 {
153 idct4!(coeffs[i], coeffs[i + 4], coeffs[i + 8], coeffs[i + 12], 0);
154 }
155 for row in coeffs.chunks_mut(4) {
156 idct4!(row[0], row[1], row[2], row[3], 3);
157 }
158}
159
160pub fn idct4x4_dc(coeffs: &mut [i16; 16]) {
161 let dc = (coeffs[0] + 4) >> 3;
162 *coeffs = [dc; 16];
163}
164macro_rules! interpolate {
165 ($src: expr, $off: expr, $step: expr, $mode: expr) => {{
166 let s0 = i32::from($src[$off + 0 * $step]);
167 let s1 = i32::from($src[$off + 1 * $step]);
168 let a = (8 - $mode) as i32;
169 let b = $mode as i32;
170 ((a * s0 + b * s1 + 4) >> 3).max(0).min(255) as u8
171 }}
172}
173
174const TMP_STRIDE: usize = 16;
175
176fn mc_block_common(dst: &mut [u8], mut doff: usize, dstride: usize, src: &[u8], sstride: usize, size: usize, mx: usize, my: usize) {
177 if (mx == 0) && (my == 0) {
178 let dst = &mut dst[doff..];
179 for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) {
180 (&mut out[0..size]).copy_from_slice(&src[0..size]);
181 }
182 } else if my == 0 {
183 for src in src.chunks(sstride).take(size) {
184 for x in 0..size {
185 dst[doff + x] = interpolate!(src, x, 1, mx);
186 }
187 doff += dstride;
188 }
189 } else if mx == 0 {
190 for y in 0..size {
191 for x in 0..size {
192 dst[doff + x] = interpolate!(src, x + y * sstride, sstride, my);
193 }
194 doff += dstride;
195 }
196 } else {
197 let mut tmp = [0u8; TMP_STRIDE * (16 + 1)];
198 for (y, dst) in tmp.chunks_mut(TMP_STRIDE).take(size + 1).enumerate() {
199 for x in 0..size {
200 dst[x] = interpolate!(src, x + y * sstride, 1, mx);
201 }
202 }
203 for y in 0..size {
204 for x in 0..size {
205 dst[doff + x] = interpolate!(tmp, x + y * TMP_STRIDE, TMP_STRIDE, my);
206 }
207 doff += dstride;
208 }
209 }
210}
211fn mc_block(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
212 mvx: i16, mvy: i16, reffrm: NAVideoBufferRef<u8>, plane: usize,
213 mc_buf: &mut [u8], size: usize) {
214 if (mvx == 0) && (mvy == 0) {
215 let dst = &mut dst[doff..];
216 let sstride = reffrm.get_stride(plane);
217 let srcoff = reffrm.get_offset(plane) + xpos + ypos * sstride;
218 let src = &reffrm.get_data();
219 let src = &src[srcoff..];
220 for (out, src) in dst.chunks_mut(dstride).take(size).zip(src.chunks(sstride)) {
221 (&mut out[0..size]).copy_from_slice(&src[0..size]);
222 }
223 return;
224 }
225 let (w, h) = reffrm.get_dimensions(plane);
226 let wa = if plane == 0 { (w + 15) & !15 } else { (w + 7) & !7 } as isize;
227 let ha = if plane == 0 { (h + 15) & !15 } else { (h + 7) & !7 } as isize;
228 let bsize = (size as isize) + 1;
229 let ref_x = (xpos as isize) + ((mvx >> 3) as isize);
230 let ref_y = (ypos as isize) + ((mvy >> 3) as isize);
231
232 let (src, sstride) = if (ref_x < 0) || (ref_x + bsize > wa) || (ref_y < 0) || (ref_y + bsize > ha) {
233 edge_emu(&reffrm, ref_x, ref_y, bsize as usize, bsize as usize, mc_buf, 32, plane, 4);
234 (mc_buf as &[u8], 32)
235 } else {
236 let off = reffrm.get_offset(plane);
237 let stride = reffrm.get_stride(plane);
238 let data = reffrm.get_data();
239 (&data[off + (ref_x as usize) + (ref_y as usize) * stride..], stride)
240 };
241 let mx = (mvx & 7) as usize;
242 let my = (mvy & 7) as usize;
243 mc_block_common(dst, doff, dstride, src, sstride, size, mx, my);
244}
245pub fn mc_block16x16_bilin(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
246 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
247 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 16);
248}
249pub fn mc_block8x8_bilin(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
250 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
251 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 8);
252}
253pub fn mc_block4x4_bilin(dst: &mut [u8], doff: usize, dstride: usize, xpos: usize, ypos: usize,
254 mvx: i16, mvy: i16, src: NAVideoBufferRef<u8>, plane: usize, mc_buf: &mut [u8]) {
255 mc_block(dst, doff, dstride, xpos, ypos, mvx, mvy, src, plane, mc_buf, 4);
256}