]>
Commit | Line | Data |
---|---|---|
1 | use nihav_core::frame::{NAVideoBuffer, NAVideoBufferRef, NASimpleVideoFrame}; | |
2 | use super::{BlockDSP, CBPInfo, MV}; | |
3 | use super::super::blockdsp; | |
4 | use super::data::H263_CHROMA_ROUND; | |
5 | ||
6 | /*const W1: i32 = 22725; | |
7 | const W2: i32 = 21407; | |
8 | const W3: i32 = 19266; | |
9 | const W4: i32 = 16383; | |
10 | const W5: i32 = 12873; | |
11 | const W6: i32 = 8867; | |
12 | const W7: i32 = 4520; | |
13 | ||
14 | const ROW_SHIFT: u8 = 11; | |
15 | const COL_SHIFT: u8 = 20; | |
16 | ||
17 | fn idct_row(row: &mut [i16]) { | |
18 | let in0 = row[0] as i32; | |
19 | let in1 = row[1] as i32; | |
20 | let in2 = row[2] as i32; | |
21 | let in3 = row[3] as i32; | |
22 | let in4 = row[4] as i32; | |
23 | let in5 = row[5] as i32; | |
24 | let in6 = row[6] as i32; | |
25 | let in7 = row[7] as i32; | |
26 | ||
27 | let mut a0 = in0 * W1 + (1 << (ROW_SHIFT - 1)); | |
28 | let mut a1 = a0; | |
29 | let mut a2 = a0; | |
30 | let mut a3 = a0; | |
31 | ||
32 | a0 += W2 * in2; | |
33 | a1 += W6 * in2; | |
34 | a2 -= W6 * in2; | |
35 | a3 -= W2 * in2; | |
36 | ||
37 | let mut b0 = W1 * in1 + W3 * in3; | |
38 | let mut b1 = W3 * in1 - W7 * in3; | |
39 | let mut b2 = W5 * in1 - W1 * in3; | |
40 | let mut b3 = W7 * in1 - W5 * in3; | |
41 | ||
42 | a0 += W4 * in4 + W6 * in6; | |
43 | a1 -= W4 * in4 + W2 * in6; | |
44 | a2 -= W4 * in4 - W2 * in6; | |
45 | a3 += W4 * in4 - W6 * in6; | |
46 | ||
47 | b0 += W5 * in5 + W7 * in7; | |
48 | b1 -= W1 * in5 + W5 * in7; | |
49 | b2 += W7 * in5 + W3 * in7; | |
50 | b3 += W3 * in5 - W1 * in7; | |
51 | ||
52 | row[0] = ((a0 + b0) >> ROW_SHIFT) as i16; | |
53 | row[7] = ((a0 - b0) >> ROW_SHIFT) as i16; | |
54 | row[1] = ((a1 + b1) >> ROW_SHIFT) as i16; | |
55 | row[6] = ((a1 - b1) >> ROW_SHIFT) as i16; | |
56 | row[2] = ((a2 + b2) >> ROW_SHIFT) as i16; | |
57 | row[5] = ((a2 - b2) >> ROW_SHIFT) as i16; | |
58 | row[3] = ((a3 + b3) >> ROW_SHIFT) as i16; | |
59 | row[4] = ((a3 - b3) >> ROW_SHIFT) as i16; | |
60 | } | |
61 | ||
62 | fn idct_col(blk: &mut [i16; 64], off: usize) { | |
63 | let in0 = blk[off + 0*8] as i32; | |
64 | let in1 = blk[off + 1*8] as i32; | |
65 | let in2 = blk[off + 2*8] as i32; | |
66 | let in3 = blk[off + 3*8] as i32; | |
67 | let in4 = blk[off + 4*8] as i32; | |
68 | let in5 = blk[off + 5*8] as i32; | |
69 | let in6 = blk[off + 6*8] as i32; | |
70 | let in7 = blk[off + 7*8] as i32; | |
71 | ||
72 | let mut a0 = in0 * W1 + (1 << (COL_SHIFT - 1)); | |
73 | let mut a1 = a0; | |
74 | let mut a2 = a0; | |
75 | let mut a3 = a0; | |
76 | ||
77 | a0 += W2 * in2; | |
78 | a1 += W6 * in2; | |
79 | a2 -= W6 * in2; | |
80 | a3 -= W2 * in2; | |
81 | ||
82 | let mut b0 = W1 * in1 + W3 * in3; | |
83 | let mut b1 = W3 * in1 - W7 * in3; | |
84 | let mut b2 = W5 * in1 - W1 * in3; | |
85 | let mut b3 = W7 * in1 - W5 * in3; | |
86 | ||
87 | a0 += W4 * in4 + W6 * in6; | |
88 | a1 -= W4 * in4 + W2 * in6; | |
89 | a2 -= W4 * in4 - W2 * in6; | |
90 | a3 += W4 * in4 - W6 * in6; | |
91 | ||
92 | b0 += W5 * in5 + W7 * in7; | |
93 | b1 -= W1 * in5 + W5 * in7; | |
94 | b2 += W7 * in5 + W3 * in7; | |
95 | b3 += W3 * in5 - W1 * in7; | |
96 | ||
97 | blk[off + 0*8] = ((a0 + b0) >> COL_SHIFT) as i16; | |
98 | blk[off + 7*8] = ((a0 - b0) >> COL_SHIFT) as i16; | |
99 | blk[off + 1*8] = ((a1 + b1) >> COL_SHIFT) as i16; | |
100 | blk[off + 6*8] = ((a1 - b1) >> COL_SHIFT) as i16; | |
101 | blk[off + 2*8] = ((a2 + b2) >> COL_SHIFT) as i16; | |
102 | blk[off + 5*8] = ((a2 - b2) >> COL_SHIFT) as i16; | |
103 | blk[off + 3*8] = ((a3 + b3) >> COL_SHIFT) as i16; | |
104 | blk[off + 4*8] = ((a3 - b3) >> COL_SHIFT) as i16; | |
105 | } | |
106 | ||
107 | #[allow(dead_code)] | |
108 | pub fn h263_idct(blk: &mut [i16; 64]) { | |
109 | for i in 0..8 { idct_row(&mut blk[i*8..(i+1)*8]); } | |
110 | for i in 0..8 { idct_col(blk, i); } | |
111 | }*/ | |
112 | ||
113 | const W1: i32 = 2841; | |
114 | const W2: i32 = 2676; | |
115 | const W3: i32 = 2408; | |
116 | const W5: i32 = 1609; | |
117 | const W6: i32 = 1108; | |
118 | const W7: i32 = 565; | |
119 | const W8: i32 = 181; | |
120 | ||
121 | const ROW_SHIFT: u8 = 8; | |
122 | const COL_SHIFT: u8 = 14; | |
123 | ||
124 | #[allow(clippy::erasing_op)] | |
125 | fn idct_row(row: &mut [i16]) { | |
126 | let in0 = ((i32::from(row[0])) << 11) + (1 << (ROW_SHIFT - 1)); | |
127 | let in1 = (i32::from(row[4])) << 11; | |
128 | let in2 = i32::from(row[6]); | |
129 | let in3 = i32::from(row[2]); | |
130 | let in4 = i32::from(row[1]); | |
131 | let in5 = i32::from(row[7]); | |
132 | let in6 = i32::from(row[5]); | |
133 | let in7 = i32::from(row[3]); | |
134 | ||
135 | let tmp = W7 * (in4 + in5); | |
136 | let a4 = tmp + (W1 - W7) * in4; | |
137 | let a5 = tmp - (W1 + W7) * in5; | |
138 | ||
139 | let tmp = W3 * (in6 + in7); | |
140 | let a6 = tmp - (W3 - W5) * in6; | |
141 | let a7 = tmp - (W3 + W5) * in7; | |
142 | ||
143 | let tmp = in0 + in1; | |
144 | ||
145 | let a0 = in0 - in1; | |
146 | let t1 = W6 * (in2 + in3); | |
147 | let a2 = t1 - (W2 + W6) * in2; | |
148 | let a3 = t1 + (W2 - W6) * in3; | |
149 | let b1 = a4 + a6; | |
150 | ||
151 | let b4 = a4 - a6; | |
152 | let t2 = a5 - a7; | |
153 | let b6 = a5 + a7; | |
154 | let b7 = tmp + a3; | |
155 | let b5 = tmp - a3; | |
156 | let b3 = a0 + a2; | |
157 | let b0 = a0 - a2; | |
158 | let b2 = (W8 * (b4 + t2) + 128) >> 8; | |
159 | let b4 = (W8 * (b4 - t2) + 128) >> 8; | |
160 | ||
161 | row[0] = ((b7 + b1) >> ROW_SHIFT) as i16; | |
162 | row[7] = ((b7 - b1) >> ROW_SHIFT) as i16; | |
163 | row[1] = ((b3 + b2) >> ROW_SHIFT) as i16; | |
164 | row[6] = ((b3 - b2) >> ROW_SHIFT) as i16; | |
165 | row[2] = ((b0 + b4) >> ROW_SHIFT) as i16; | |
166 | row[5] = ((b0 - b4) >> ROW_SHIFT) as i16; | |
167 | row[3] = ((b5 + b6) >> ROW_SHIFT) as i16; | |
168 | row[4] = ((b5 - b6) >> ROW_SHIFT) as i16; | |
169 | } | |
170 | ||
171 | #[allow(clippy::erasing_op)] | |
172 | fn idct_col(blk: &mut [i16; 64], off: usize) { | |
173 | let in0 = ((i32::from(blk[off + 0*8])) << 8) + (1 << (COL_SHIFT - 1)); | |
174 | let in1 = (i32::from(blk[off + 4*8])) << 8; | |
175 | let in2 = i32::from(blk[off + 6*8]); | |
176 | let in3 = i32::from(blk[off + 2*8]); | |
177 | let in4 = i32::from(blk[off + 1*8]); | |
178 | let in5 = i32::from(blk[off + 7*8]); | |
179 | let in6 = i32::from(blk[off + 5*8]); | |
180 | let in7 = i32::from(blk[off + 3*8]); | |
181 | ||
182 | let tmp = W7 * (in4 + in5); | |
183 | let a4 = (tmp + (W1 - W7) * in4) >> 3; | |
184 | let a5 = (tmp - (W1 + W7) * in5) >> 3; | |
185 | ||
186 | let tmp = W3 * (in6 + in7); | |
187 | let a6 = (tmp - (W3 - W5) * in6) >> 3; | |
188 | let a7 = (tmp - (W3 + W5) * in7) >> 3; | |
189 | ||
190 | let tmp = in0 + in1; | |
191 | ||
192 | let a0 = in0 - in1; | |
193 | let t1 = W6 * (in2 + in3); | |
194 | let a2 = (t1 - (W2 + W6) * in2) >> 3; | |
195 | let a3 = (t1 + (W2 - W6) * in3) >> 3; | |
196 | let b1 = a4 + a6; | |
197 | ||
198 | let b4 = a4 - a6; | |
199 | let t2 = a5 - a7; | |
200 | let b6 = a5 + a7; | |
201 | let b7 = tmp + a3; | |
202 | let b5 = tmp - a3; | |
203 | let b3 = a0 + a2; | |
204 | let b0 = a0 - a2; | |
205 | let b2 = (W8 * (b4 + t2) + 128) >> 8; | |
206 | let b4 = (W8 * (b4 - t2) + 128) >> 8; | |
207 | ||
208 | blk[off + 0*8] = ((b7 + b1) >> COL_SHIFT) as i16; | |
209 | blk[off + 7*8] = ((b7 - b1) >> COL_SHIFT) as i16; | |
210 | blk[off + 1*8] = ((b3 + b2) >> COL_SHIFT) as i16; | |
211 | blk[off + 6*8] = ((b3 - b2) >> COL_SHIFT) as i16; | |
212 | blk[off + 2*8] = ((b0 + b4) >> COL_SHIFT) as i16; | |
213 | blk[off + 5*8] = ((b0 - b4) >> COL_SHIFT) as i16; | |
214 | blk[off + 3*8] = ((b5 + b6) >> COL_SHIFT) as i16; | |
215 | blk[off + 4*8] = ((b5 - b6) >> COL_SHIFT) as i16; | |
216 | } | |
217 | ||
218 | #[allow(dead_code)] | |
219 | pub fn h263_idct(blk: &mut [i16; 64]) { | |
220 | for i in 0..8 { idct_row(&mut blk[i*8..(i+1)*8]); } | |
221 | for i in 0..8 { idct_col(blk, i); } | |
222 | } | |
223 | ||
224 | struct IDCTAnnexW {} | |
225 | ||
226 | impl IDCTAnnexW { | |
227 | const CPO8: i32 = 0x539f; | |
228 | const SPO8: i32 = 0x4546; | |
229 | const CPO16: i32 = 0x7d8a; | |
230 | const SPO16: i32 = 0x18f9; | |
231 | const C3PO16: i32 = 0x6a6e; | |
232 | const S3PO16: i32 = 0x471d; | |
233 | const OOR2: i32 = 0x5a82; | |
234 | ||
235 | fn rotate(a: i32, b: i32, c: i32, s: i32, cs: i8, ss: i8) -> (i32, i32) { | |
236 | let (t00, t10) = if cs > 0 { | |
237 | ((a * c) >> cs, (b * c) >> cs) | |
238 | } else { | |
239 | ((a * c) << -cs, (b * c) << -cs) | |
240 | }; | |
241 | let (t01, t11) = if ss > 0 { | |
242 | ((a * s) >> ss, (b * s) >> ss) | |
243 | } else { | |
244 | ((a * s) << -ss, (b * s) << -ss) | |
245 | }; | |
246 | ((t01 - t10 + 0x7FFF) >> 16, (t00 + t11 + 0x7FFF) >> 16) | |
247 | } | |
248 | ||
249 | fn bfly(a: i32, b: i32) -> (i32, i32) { (a + b, a - b) } | |
250 | ||
251 | fn idct_row(dst: &mut [i32; 64], src: &[i16; 64]) { | |
252 | for (drow, srow) in dst.chunks_mut(8).zip(src.chunks(8)) { | |
253 | let s0 = i32::from(srow[0]) << 4; | |
254 | let s1 = i32::from(srow[1]) << 4; | |
255 | let s2 = i32::from(srow[2]) << 4; | |
256 | let s3 = i32::from(srow[3]) << 4; | |
257 | let s4 = i32::from(srow[4]) << 4; | |
258 | let s5 = i32::from(srow[5]) << 4; | |
259 | let s6 = i32::from(srow[6]) << 4; | |
260 | let s7 = i32::from(srow[7]) << 4; | |
261 | ||
262 | let (s2, s6) = Self::rotate(s2, s6, Self::CPO8, Self::SPO8, -2, -1); | |
263 | let (s1, s7) = Self::rotate(s1, s7, Self::CPO16, Self::SPO16, -1, -1); | |
264 | let (s3, s5) = Self::rotate(s3, s5, Self::C3PO16, Self::S3PO16, -1, -1); | |
265 | let (s0, s4) = Self::bfly(s0, s4); | |
266 | ||
267 | let (s3, s1) = Self::bfly(s1, s3); | |
268 | let (s5, s7) = Self::bfly(s7, s5); | |
269 | let (s0, s6) = Self::bfly(s0, s6); | |
270 | let (s4, s2) = Self::bfly(s4, s2); | |
271 | ||
272 | let (s3, s7) = Self::bfly(s7, s3); | |
273 | let s1 = (s1 * Self::OOR2 * 4).saturating_add(0x7FFF) >> 16; | |
274 | let s5 = (s5 * Self::OOR2 * 4).saturating_add(0x7FFF) >> 16; | |
275 | ||
276 | drow[1] = s4 + s3; | |
277 | drow[6] = s4 - s3; | |
278 | drow[2] = s2 + s7; | |
279 | drow[5] = s2 - s7; | |
280 | drow[0] = s0 + s5; | |
281 | drow[7] = s0 - s5; | |
282 | drow[3] = s6 + s1; | |
283 | drow[4] = s6 - s1; | |
284 | } | |
285 | } | |
286 | ||
287 | #[allow(clippy::erasing_op)] | |
288 | #[allow(clippy::identity_op)] | |
289 | fn idct_col(dst: &mut [i16; 64], src: &[i32; 64]) { | |
290 | for i in 0..8 { | |
291 | let s0 = src[i + 8 * 0]; | |
292 | let s1 = src[i + 8 * 1]; | |
293 | let s2 = src[i + 8 * 2]; | |
294 | let s3 = src[i + 8 * 3]; | |
295 | let s4 = src[i + 8 * 4]; | |
296 | let s5 = src[i + 8 * 5]; | |
297 | let s6 = src[i + 8 * 6]; | |
298 | let s7 = src[i + 8 * 7]; | |
299 | ||
300 | let (s2, s6) = Self::rotate(s2, s6, Self::CPO8, Self::SPO8, -1, 0); | |
301 | let (s1, s7) = Self::rotate(s1, s7, Self::CPO16, Self::SPO16, 0, 0); | |
302 | let (s3, s5) = Self::rotate(s3, s5, Self::C3PO16, Self::S3PO16, 0, 0); | |
303 | let (a, b) = Self::bfly(s0, s4); | |
304 | let (s0, s4) = if s4 >= 0 { | |
305 | (a >> 1, b >> 1) | |
306 | } else { | |
307 | ((a + 1) >> 1, (b + 1) >> 1) | |
308 | }; | |
309 | ||
310 | let (s3, s1) = Self::bfly(s1, s3); | |
311 | let (s5, s7) = Self::bfly(s7, s5); | |
312 | let (s0, s6) = Self::bfly(s0, s6); | |
313 | let (s4, s2) = Self::bfly(s4, s2); | |
314 | ||
315 | let (s3, s7) = Self::bfly(s7, s3); | |
316 | let s1 = (s1 * Self::OOR2 * 4).saturating_add(0x7FFF) >> 16; | |
317 | let s5 = (s5 * Self::OOR2 * 4).saturating_add(0x7FFF) >> 16; | |
318 | ||
319 | dst[i + 8 * 1] = ((s4 + s3) >> 6) as i16; | |
320 | dst[i + 8 * 6] = ((s4 - s3) >> 6) as i16; | |
321 | dst[i + 8 * 2] = ((s2 + s7) >> 6) as i16; | |
322 | dst[i + 8 * 5] = ((s2 - s7) >> 6) as i16; | |
323 | dst[i + 8 * 0] = ((s0 + s5) >> 6) as i16; | |
324 | dst[i + 8 * 7] = ((s0 - s5) >> 6) as i16; | |
325 | dst[i + 8 * 3] = ((s6 + s1) >> 6) as i16; | |
326 | dst[i + 8 * 4] = ((s6 - s1) >> 6) as i16; | |
327 | } | |
328 | } | |
329 | } | |
330 | #[allow(dead_code)] | |
331 | pub fn h263_annex_w_idct(blk: &mut [i16; 64]) { | |
332 | let mut tmp = [0i32; 64]; | |
333 | IDCTAnnexW::idct_row(&mut tmp, blk); | |
334 | IDCTAnnexW::idct_col(blk, &tmp); | |
335 | } | |
336 | ||
337 | fn h263_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
338 | { | |
339 | let mut didx = 0; | |
340 | let mut sidx = 0; | |
341 | for _ in 0..bh { | |
342 | for x in 0..bw { dst[didx + x] = src[sidx + x]; } | |
343 | didx += dstride; | |
344 | sidx += sstride; | |
345 | } | |
346 | } | |
347 | ||
348 | fn h263_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
349 | { | |
350 | let mut didx = 0; | |
351 | let mut sidx = 0; | |
352 | for _ in 0..bh { | |
353 | for x in 0..bw { dst[didx + x] = (((src[sidx + x] as u16) + (src[sidx + x + 1] as u16) + 1) >> 1) as u8; } | |
354 | didx += dstride; | |
355 | sidx += sstride; | |
356 | } | |
357 | } | |
358 | ||
359 | fn h263_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
360 | { | |
361 | let mut didx = 0; | |
362 | let mut sidx = 0; | |
363 | for _ in 0..bh { | |
364 | for x in 0..bw { dst[didx + x] = (((src[sidx + x] as u16) + (src[sidx + x + sstride] as u16) + 1) >> 1) as u8; } | |
365 | didx += dstride; | |
366 | sidx += sstride; | |
367 | } | |
368 | } | |
369 | ||
370 | fn h263_interp11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
371 | { | |
372 | let mut didx = 0; | |
373 | let mut sidx = 0; | |
374 | for _ in 0..bh { | |
375 | for x in 0..bw { | |
376 | dst[didx + x] = (((src[sidx + x] as u16) + | |
377 | (src[sidx + x + 1] as u16) + | |
378 | (src[sidx + x + sstride] as u16) + | |
379 | (src[sidx + x + sstride + 1] as u16) + 2) >> 2) as u8; | |
380 | } | |
381 | didx += dstride; | |
382 | sidx += sstride; | |
383 | } | |
384 | } | |
385 | ||
386 | pub const H263_INTERP_FUNCS: &[blockdsp::BlkInterpFunc] = &[ | |
387 | h263_interp00, h263_interp01, h263_interp10, h263_interp11 ]; | |
388 | ||
389 | fn h263_interp00_avg(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
390 | { | |
391 | let mut didx = 0; | |
392 | let mut sidx = 0; | |
393 | for _ in 0..bh { | |
394 | for x in 0..bw { | |
395 | let a = dst[didx + x] as u16; | |
396 | let b = src[sidx + x] as u16; | |
397 | dst[didx + x] = ((a + b + 1) >> 1) as u8; | |
398 | } | |
399 | didx += dstride; | |
400 | sidx += sstride; | |
401 | } | |
402 | } | |
403 | ||
404 | fn h263_interp01_avg(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
405 | { | |
406 | let mut didx = 0; | |
407 | let mut sidx = 0; | |
408 | for _ in 0..bh { | |
409 | for x in 0..bw { | |
410 | let a = dst[didx + x] as u16; | |
411 | let b = ((src[sidx + x] as u16) + (src[sidx + x + 1] as u16) + 1) >> 1; | |
412 | dst[didx + x] = ((a + b + 1) >> 1) as u8; | |
413 | } | |
414 | didx += dstride; | |
415 | sidx += sstride; | |
416 | } | |
417 | } | |
418 | ||
419 | fn h263_interp10_avg(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
420 | { | |
421 | let mut didx = 0; | |
422 | let mut sidx = 0; | |
423 | for _ in 0..bh { | |
424 | for x in 0..bw { | |
425 | let a = dst[didx + x] as u16; | |
426 | let b = ((src[sidx + x] as u16) + (src[sidx + x + sstride] as u16) + 1) >> 1; | |
427 | dst[didx + x] = ((a + b + 1) >> 1) as u8; | |
428 | } | |
429 | didx += dstride; | |
430 | sidx += sstride; | |
431 | } | |
432 | } | |
433 | ||
434 | fn h263_interp11_avg(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize) | |
435 | { | |
436 | let mut didx = 0; | |
437 | let mut sidx = 0; | |
438 | for _ in 0..bh { | |
439 | for x in 0..bw { | |
440 | let a = dst[didx + x] as u16; | |
441 | let b = ((src[sidx + x] as u16) + | |
442 | (src[sidx + x + 1] as u16) + | |
443 | (src[sidx + x + sstride] as u16) + | |
444 | (src[sidx + x + sstride + 1] as u16) + 2) >> 2; | |
445 | dst[didx + x] = ((a + b + 1) >> 1) as u8; | |
446 | } | |
447 | didx += dstride; | |
448 | sidx += sstride; | |
449 | } | |
450 | } | |
451 | ||
452 | pub const H263_INTERP_AVG_FUNCS: &[blockdsp::BlkInterpFunc] = &[ | |
453 | h263_interp00_avg, h263_interp01_avg, h263_interp10_avg, h263_interp11_avg ]; | |
454 | ||
455 | #[derive(Default)] | |
456 | pub struct H263BlockDSP { } | |
457 | ||
458 | impl H263BlockDSP { | |
459 | pub fn new() -> Self { | |
460 | H263BlockDSP { } | |
461 | } | |
462 | } | |
463 | ||
464 | #[allow(clippy::erasing_op)] | |
465 | fn deblock_hor(buf: &mut NAVideoBuffer<u8>, comp: usize, strength: u8, off: usize) { | |
466 | let stride = buf.get_stride(comp); | |
467 | let dptr = buf.get_data_mut().unwrap(); | |
468 | let buf = dptr.as_mut_slice(); | |
469 | for x in 0..8 { | |
470 | let a = buf[off - 2 * stride + x] as i16; | |
471 | let b = buf[off - 1 * stride + x] as i16; | |
472 | let c = buf[off + 0 * stride + x] as i16; | |
473 | let d = buf[off + 1 * stride + x] as i16; | |
474 | let diff = ((a - d) + (c - b) * 4) / 8; | |
475 | if (diff != 0) && (diff > -24) && (diff < 24) { | |
476 | let d1a = (diff.abs() - 2 * (diff.abs() - (strength as i16)).max(0)).max(0); | |
477 | let d1 = if diff < 0 { -d1a } else { d1a }; | |
478 | let hd1 = d1a / 2; | |
479 | let d2 = ((a - d) / 4).max(-hd1).min(hd1); | |
480 | ||
481 | buf[off - 2 * stride + x] = (a - d2) as u8; | |
482 | buf[off - 1 * stride + x] = (b + d1).max(0).min(255) as u8; | |
483 | buf[off + 0 * stride + x] = (c - d1).max(0).min(255) as u8; | |
484 | buf[off + 1 * stride + x] = (d + d2) as u8; | |
485 | } | |
486 | } | |
487 | } | |
488 | ||
489 | fn deblock_ver(buf: &mut NAVideoBuffer<u8>, comp: usize, strength: u8, off: usize) { | |
490 | let stride = buf.get_stride(comp); | |
491 | let dptr = buf.get_data_mut().unwrap(); | |
492 | let buf = dptr.as_mut_slice(); | |
493 | for y in 0..8 { | |
494 | let a = buf[off - 2 + y * stride] as i16; | |
495 | let b = buf[off - 1 + y * stride] as i16; | |
496 | let c = buf[off + 0 + y * stride] as i16; | |
497 | let d = buf[off + 1 + y * stride] as i16; | |
498 | let diff = (a - d + (c - b) * 4) / 8; | |
499 | if (diff != 0) && (diff > -24) && (diff < 24) { | |
500 | let d1a = (diff.abs() - 2 * (diff.abs() - (strength as i16)).max(0)).max(0); | |
501 | let d1 = if diff < 0 { -d1a } else { d1a }; | |
502 | let hd1 = d1a / 2; | |
503 | let d2 = ((a - d) / 4).max(-hd1).min(hd1); | |
504 | ||
505 | buf[off - 2 + y * stride] = (a - d2) as u8; | |
506 | buf[off - 1 + y * stride] = (b + d1).max(0).min(255) as u8; | |
507 | buf[off + y * stride] = (c - d1).max(0).min(255) as u8; | |
508 | buf[off + 1 + y * stride] = (d + d2) as u8; | |
509 | } | |
510 | } | |
511 | } | |
512 | ||
513 | const FILTER_STRENGTH: [u8; 32] = [ | |
514 | 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, | |
515 | 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12 | |
516 | ]; | |
517 | ||
518 | pub fn h263_filter_row(buf: &mut NAVideoBuffer<u8>, mb_y: usize, mb_w: usize, cbpi: &CBPInfo) { | |
519 | let stride = buf.get_stride(0); | |
520 | let mut off = buf.get_offset(0) + mb_y * 16 * stride; | |
521 | for mb_x in 0..mb_w { | |
522 | let coff = off; | |
523 | let coded0 = cbpi.is_coded(mb_x, 0); | |
524 | let coded1 = cbpi.is_coded(mb_x, 1); | |
525 | let q = cbpi.get_q(mb_w + mb_x); | |
526 | let strength = if q < 32 { FILTER_STRENGTH[q as usize] } else { 0 }; | |
527 | if mb_y != 0 { | |
528 | if coded0 && cbpi.is_coded_top(mb_x, 0) { deblock_hor(buf, 0, strength, coff); } | |
529 | if coded1 && cbpi.is_coded_top(mb_x, 1) { deblock_hor(buf, 0, strength, coff + 8); } | |
530 | } | |
531 | let coff = off + 8 * stride; | |
532 | if cbpi.is_coded(mb_x, 2) && coded0 { deblock_hor(buf, 0, q, coff); } | |
533 | if cbpi.is_coded(mb_x, 3) && coded1 { deblock_hor(buf, 0, q, coff + 8); } | |
534 | off += 16; | |
535 | } | |
536 | let mut leftt = false; | |
537 | let mut leftc = false; | |
538 | let mut off = buf.get_offset(0) + mb_y * 16 * stride; | |
539 | for mb_x in 0..mb_w { | |
540 | let ctop0 = cbpi.is_coded_top(mb_x, 0); | |
541 | let ctop1 = cbpi.is_coded_top(mb_x, 0); | |
542 | let ccur0 = cbpi.is_coded(mb_x, 0); | |
543 | let ccur1 = cbpi.is_coded(mb_x, 1); | |
544 | let q = cbpi.get_q(mb_w + mb_x); | |
545 | let strength = if q < 32 { FILTER_STRENGTH[q as usize] } else { 0 }; | |
546 | if mb_y != 0 { | |
547 | let coff = off - 8 * stride; | |
548 | let qtop = cbpi.get_q(mb_x); | |
549 | let strtop = if qtop < 32 { FILTER_STRENGTH[qtop as usize] } else { 0 }; | |
550 | if leftt && ctop0 { deblock_ver(buf, 0, strtop, coff); } | |
551 | if ctop0 && ctop1 { deblock_ver(buf, 0, strtop, coff + 8); } | |
552 | } | |
553 | if leftc && ccur0 { deblock_ver(buf, 0, strength, off); } | |
554 | if ccur0 && ccur1 { deblock_ver(buf, 0, strength, off + 8); } | |
555 | leftt = ctop1; | |
556 | leftc = ccur1; | |
557 | off += 16; | |
558 | } | |
559 | let strideu = buf.get_stride(1); | |
560 | let stridev = buf.get_stride(2); | |
561 | let offu = buf.get_offset(1) + mb_y * 8 * strideu; | |
562 | let offv = buf.get_offset(2) + mb_y * 8 * stridev; | |
563 | if mb_y != 0 { | |
564 | for mb_x in 0..mb_w { | |
565 | let ctu = cbpi.is_coded_top(mb_x, 4); | |
566 | let ccu = cbpi.is_coded(mb_x, 4); | |
567 | let ctv = cbpi.is_coded_top(mb_x, 5); | |
568 | let ccv = cbpi.is_coded(mb_x, 5); | |
569 | let q = cbpi.get_q(mb_w + mb_x); | |
570 | let strength = if q < 32 { FILTER_STRENGTH[q as usize] } else { 0 }; | |
571 | if ctu && ccu { deblock_hor(buf, 1, strength, offu + mb_x * 8); } | |
572 | if ctv && ccv { deblock_hor(buf, 2, strength, offv + mb_x * 8); } | |
573 | } | |
574 | let mut leftu = false; | |
575 | let mut leftv = false; | |
576 | let offu = buf.get_offset(1) + (mb_y - 1) * 8 * strideu; | |
577 | let offv = buf.get_offset(2) + (mb_y - 1) * 8 * stridev; | |
578 | for mb_x in 0..mb_w { | |
579 | let ctu = cbpi.is_coded_top(mb_x, 4); | |
580 | let ctv = cbpi.is_coded_top(mb_x, 5); | |
581 | let qt = cbpi.get_q(mb_x); | |
582 | let strt = if qt < 32 { FILTER_STRENGTH[qt as usize] } else { 0 }; | |
583 | if leftu && ctu { deblock_ver(buf, 1, strt, offu + mb_x * 8); } | |
584 | if leftv && ctv { deblock_ver(buf, 2, strt, offv + mb_x * 8); } | |
585 | leftu = ctu; | |
586 | leftv = ctv; | |
587 | } | |
588 | } | |
589 | } | |
590 | ||
591 | impl BlockDSP for H263BlockDSP { | |
592 | fn idct(&self, blk: &mut [i16; 64]) { | |
593 | h263_idct(blk) | |
594 | } | |
595 | fn copy_blocks(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mv: MV) { | |
596 | let mode = ((mv.x & 1) + (mv.y & 1) * 2) as usize; | |
597 | let cmode = (if (mv.x & 3) != 0 { 1 } else { 0 }) + (if (mv.y & 3) != 0 { 2 } else { 0 }); | |
598 | ||
599 | let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap(); | |
600 | ||
601 | blockdsp::copy_block(&mut dst, src.clone(), 0, xpos, ypos, mv.x >> 1, mv.y >> 1, 16, 16, 0, 1, mode, H263_INTERP_FUNCS); | |
602 | blockdsp::copy_block(&mut dst, src.clone(), 1, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_FUNCS); | |
603 | blockdsp::copy_block(&mut dst, src, 2, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_FUNCS); | |
604 | } | |
605 | fn copy_blocks8x8(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mvs: &[MV; 4]) { | |
606 | let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap(); | |
607 | ||
608 | for i in 0..4 { | |
609 | let xadd = (i & 1) * 8; | |
610 | let yadd = (i & 2) * 4; | |
611 | let mode = ((mvs[i].x & 1) + (mvs[i].y & 1) * 2) as usize; | |
612 | ||
613 | blockdsp::copy_block(&mut dst, src.clone(), 0, xpos + xadd, ypos + yadd, mvs[i].x >> 1, mvs[i].y >> 1, 8, 8, 0, 1, mode, H263_INTERP_FUNCS); | |
614 | } | |
615 | ||
616 | let sum_mv = mvs[0] + mvs[1] + mvs[2] + mvs[3]; | |
617 | let cmx = (sum_mv.x >> 3) + H263_CHROMA_ROUND[(sum_mv.x & 0xF) as usize]; | |
618 | let cmy = (sum_mv.y >> 3) + H263_CHROMA_ROUND[(sum_mv.y & 0xF) as usize]; | |
619 | let mode = ((cmx & 1) + (cmy & 1) * 2) as usize; | |
620 | for plane in 1..3 { | |
621 | blockdsp::copy_block(&mut dst, src.clone(), plane, xpos >> 1, ypos >> 1, cmx >> 1, cmy >> 1, 8, 8, 0, 1, mode, H263_INTERP_FUNCS); | |
622 | } | |
623 | } | |
624 | fn avg_blocks(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mv: MV) { | |
625 | let mode = ((mv.x & 1) + (mv.y & 1) * 2) as usize; | |
626 | let cmode = (if (mv.x & 3) != 0 { 1 } else { 0 }) + (if (mv.y & 3) != 0 { 2 } else { 0 }); | |
627 | ||
628 | let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap(); | |
629 | ||
630 | blockdsp::copy_block(&mut dst, src.clone(), 0, xpos, ypos, mv.x >> 1, mv.y >> 1, 16, 16, 0, 1, mode, H263_INTERP_AVG_FUNCS); | |
631 | blockdsp::copy_block(&mut dst, src.clone(), 1, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_AVG_FUNCS); | |
632 | blockdsp::copy_block(&mut dst, src, 2, xpos >> 1, ypos >> 1, mv.x >> 2, mv.y >> 2, 8, 8, 0, 1, cmode, H263_INTERP_AVG_FUNCS); | |
633 | } | |
634 | fn avg_blocks8x8(&self, dst: &mut NAVideoBuffer<u8>, src: NAVideoBufferRef<u8>, xpos: usize, ypos: usize, mvs: &[MV; 4]) { | |
635 | let mut dst = NASimpleVideoFrame::from_video_buf(dst).unwrap(); | |
636 | ||
637 | for i in 0..4 { | |
638 | let xadd = (i & 1) * 8; | |
639 | let yadd = (i & 2) * 4; | |
640 | let mode = ((mvs[i].x & 1) + (mvs[i].y & 1) * 2) as usize; | |
641 | ||
642 | blockdsp::copy_block(&mut dst, src.clone(), 0, xpos + xadd, ypos + yadd, mvs[i].x >> 1, mvs[i].y >> 1, 8, 8, 0, 1, mode, H263_INTERP_AVG_FUNCS); | |
643 | } | |
644 | ||
645 | let sum_mv = mvs[0] + mvs[1] + mvs[2] + mvs[3]; | |
646 | let cmx = (sum_mv.x >> 3) + H263_CHROMA_ROUND[(sum_mv.x & 0xF) as usize]; | |
647 | let cmy = (sum_mv.y >> 3) + H263_CHROMA_ROUND[(sum_mv.y & 0xF) as usize]; | |
648 | let mode = ((cmx & 1) + (cmy & 1) * 2) as usize; | |
649 | for plane in 1..3 { | |
650 | blockdsp::copy_block(&mut dst, src.clone(), plane, xpos >> 1, ypos >> 1, cmx >> 1, cmy >> 1, 8, 8, 0, 1, mode, H263_INTERP_AVG_FUNCS); | |
651 | } | |
652 | } | |
653 | fn filter_row(&self, buf: &mut NAVideoBuffer<u8>, mb_y: usize, mb_w: usize, cbpi: &CBPInfo) { | |
654 | h263_filter_row(buf, mb_y, mb_w, cbpi) | |
655 | } | |
656 | } | |
657 | ||
658 | macro_rules! obmc_filter { | |
659 | ($src: expr, $base_off: expr, $off0: expr, $w0: expr, $off1: expr, $w1: expr, $off2: expr, $w2: expr) => ({ | |
660 | let a = $src[$base_off + $off0] as u16; | |
661 | let b = $src[$base_off + $off1] as u16; | |
662 | let c = $src[$base_off + $off2] as u16; | |
663 | ((a * $w0 + b * $w1 + c * $w2 + 4) >> 3) as u8 | |
664 | }) | |
665 | } | |
666 | pub fn obmc_filter(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize) { | |
667 | let top_off = 8 + sstride * 0; | |
668 | let left_off = 0 + sstride * 8; | |
669 | let right_off = 16 + sstride * 8; | |
670 | let bottom_off = 8 + sstride * 16; | |
671 | let cur_off = 8 + sstride * 8; | |
672 | ||
673 | let mut doff = 0; | |
674 | let mut soff = 0; | |
675 | ||
676 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, top_off, 2, cur_off, 4); | |
677 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 1, top_off, 2, cur_off, 5); | |
678 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, top_off, 2, cur_off, 5); | |
679 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, top_off, 2, cur_off, 5); | |
680 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, top_off, 2, cur_off, 5); | |
681 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, top_off, 2, cur_off, 5); | |
682 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 1, top_off, 2, cur_off, 5); | |
683 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, top_off, 2, cur_off, 4); | |
684 | doff += dstride; | |
685 | soff += sstride; | |
686 | ||
687 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, top_off, 1, cur_off, 5); | |
688 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 2, top_off, 1, cur_off, 5); | |
689 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, top_off, 2, cur_off, 5); | |
690 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, top_off, 2, cur_off, 5); | |
691 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, top_off, 2, cur_off, 5); | |
692 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, top_off, 2, cur_off, 5); | |
693 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 2, top_off, 1, cur_off, 5); | |
694 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, top_off, 1, cur_off, 5); | |
695 | doff += dstride; | |
696 | soff += sstride; | |
697 | ||
698 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, top_off, 1, cur_off, 5); | |
699 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 2, top_off, 1, cur_off, 5); | |
700 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, top_off, 1, cur_off, 6); | |
701 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, top_off, 1, cur_off, 6); | |
702 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, top_off, 1, cur_off, 6); | |
703 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, top_off, 1, cur_off, 6); | |
704 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 2, top_off, 1, cur_off, 5); | |
705 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, top_off, 1, cur_off, 5); | |
706 | doff += dstride; | |
707 | soff += sstride; | |
708 | ||
709 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, top_off, 1, cur_off, 5); | |
710 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 2, top_off, 1, cur_off, 5); | |
711 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, top_off, 1, cur_off, 6); | |
712 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, top_off, 1, cur_off, 6); | |
713 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, top_off, 1, cur_off, 6); | |
714 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, top_off, 1, cur_off, 6); | |
715 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 2, top_off, 1, cur_off, 5); | |
716 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, top_off, 1, cur_off, 5); | |
717 | doff += dstride; | |
718 | soff += sstride; | |
719 | ||
720 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, bottom_off, 1, cur_off, 5); | |
721 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 2, bottom_off, 1, cur_off, 5); | |
722 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, bottom_off, 1, cur_off, 6); | |
723 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, bottom_off, 1, cur_off, 6); | |
724 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, bottom_off, 1, cur_off, 6); | |
725 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, bottom_off, 1, cur_off, 6); | |
726 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 2, bottom_off, 1, cur_off, 5); | |
727 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, bottom_off, 1, cur_off, 5); | |
728 | doff += dstride; | |
729 | soff += sstride; | |
730 | ||
731 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, bottom_off, 1, cur_off, 5); | |
732 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 2, bottom_off, 1, cur_off, 5); | |
733 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, bottom_off, 1, cur_off, 6); | |
734 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, bottom_off, 1, cur_off, 6); | |
735 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, bottom_off, 1, cur_off, 6); | |
736 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, bottom_off, 1, cur_off, 6); | |
737 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 2, bottom_off, 1, cur_off, 5); | |
738 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, bottom_off, 1, cur_off, 5); | |
739 | doff += dstride; | |
740 | soff += sstride; | |
741 | ||
742 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, bottom_off, 1, cur_off, 5); | |
743 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 2, bottom_off, 1, cur_off, 5); | |
744 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, bottom_off, 2, cur_off, 5); | |
745 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, bottom_off, 2, cur_off, 5); | |
746 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, bottom_off, 2, cur_off, 5); | |
747 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, bottom_off, 2, cur_off, 5); | |
748 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 2, bottom_off, 1, cur_off, 5); | |
749 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, bottom_off, 1, cur_off, 5); | |
750 | doff += dstride; | |
751 | soff += sstride; | |
752 | ||
753 | dst[doff + 0] = obmc_filter!(src, soff + 0, left_off, 2, bottom_off, 2, cur_off, 4); | |
754 | dst[doff + 1] = obmc_filter!(src, soff + 1, left_off, 1, bottom_off, 2, cur_off, 5); | |
755 | dst[doff + 2] = obmc_filter!(src, soff + 2, left_off, 1, bottom_off, 2, cur_off, 5); | |
756 | dst[doff + 3] = obmc_filter!(src, soff + 3, left_off, 1, bottom_off, 2, cur_off, 5); | |
757 | dst[doff + 4] = obmc_filter!(src, soff + 4, right_off, 1, bottom_off, 2, cur_off, 5); | |
758 | dst[doff + 5] = obmc_filter!(src, soff + 5, right_off, 1, bottom_off, 2, cur_off, 5); | |
759 | dst[doff + 6] = obmc_filter!(src, soff + 6, right_off, 1, bottom_off, 2, cur_off, 5); | |
760 | dst[doff + 7] = obmc_filter!(src, soff + 7, right_off, 2, bottom_off, 2, cur_off, 4); | |
761 | } |