semi-working VP5/6 decoder
[nihav.git] / nihav-duck / src / codecs / vpcommon.rs
CommitLineData
5b24175d 1use nihav_core::codecs::*;
8e4b2f44 2use nihav_core::codecs::blockdsp::*;
5b24175d
KS
3
4#[derive(Clone,Copy,Debug,PartialEq)]
5#[allow(dead_code)]
6pub enum VPMBType {
7 Intra,
8 InterNoMV,
9 InterMV,
10 InterNearest,
11 InterNear,
12 InterFourMV,
13 GoldenNoMV,
14 GoldenMV,
15 GoldenNearest,
16 GoldenNear,
17}
18
3584b223
KS
19pub const VP_REF_INTER: u8 = 1;
20pub const VP_REF_GOLDEN: u8 = 2;
21
5b24175d
KS
22#[allow(dead_code)]
23impl VPMBType {
24 pub fn is_intra(self) -> bool { self == VPMBType::Intra }
25 pub fn get_ref_id(self) -> u8 {
26 match self {
27 VPMBType::Intra => 0,
28 VPMBType::InterNoMV |
29 VPMBType::InterMV |
30 VPMBType::InterNearest |
31 VPMBType::InterNear |
3584b223
KS
32 VPMBType::InterFourMV => VP_REF_INTER,
33 _ => VP_REF_GOLDEN,
5b24175d
KS
34 }
35 }
36}
37
38impl Default for VPMBType {
39 fn default() -> Self { VPMBType::Intra }
40}
41
42#[derive(Default)]
43pub struct VPShuffler {
44 lastframe: Option<NAVideoBufferRef<u8>>,
45 goldframe: Option<NAVideoBufferRef<u8>>,
46}
47
48impl VPShuffler {
49 pub fn new() -> Self { VPShuffler { lastframe: None, goldframe: None } }
50 pub fn clear(&mut self) { self.lastframe = None; self.goldframe = None; }
51 pub fn add_frame(&mut self, buf: NAVideoBufferRef<u8>) {
52 self.lastframe = Some(buf);
53 }
54 pub fn add_golden_frame(&mut self, buf: NAVideoBufferRef<u8>) {
55 self.goldframe = Some(buf);
56 }
57 pub fn get_last(&mut self) -> Option<NAVideoBufferRef<u8>> {
58 if let Some(ref frm) = self.lastframe {
59 Some(frm.clone())
60 } else {
61 None
62 }
63 }
64 pub fn get_golden(&mut self) -> Option<NAVideoBufferRef<u8>> {
65 if let Some(ref frm) = self.goldframe {
66 Some(frm.clone())
67 } else {
68 None
69 }
70 }
71}
72
3584b223
KS
73#[allow(dead_code)]
74pub struct BoolCoder<'a> {
75 pub src: &'a [u8],
76 pos: usize,
77 value: u32,
78 range: u32,
79 bits: i32,
80}
81
82#[allow(dead_code)]
83impl<'a> BoolCoder<'a> {
84 pub fn new(src: &'a [u8]) -> DecoderResult<Self> {
85 if src.len() < 3 { return Err(DecoderError::ShortData); }
86 let value = ((src[0] as u32) << 24) | ((src[1] as u32) << 16) | ((src[2] as u32) << 8) | (src[3] as u32);
87 Ok(Self { src, pos: 4, value, range: 255, bits: 8 })
88 }
89 pub fn read_bool(&mut self) -> bool {
90 self.read_prob(128)
91 }
92 pub fn read_prob(&mut self, prob: u8) -> bool {
93 self.renorm();
94 let split = 1 + (((self.range - 1) * (prob as u32)) >> 8);
95 let bit;
96 if self.value < (split << 24) {
97 self.range = split;
98 bit = false;
99 } else {
100 self.range -= split;
101 self.value -= split << 24;
102 bit = true;
103 }
104 bit
105 }
106 pub fn read_bits(&mut self, bits: u8) -> u32 {
107 let mut val = 0u32;
108 for _ in 0..bits {
109 val = (val << 1) | (self.read_prob(128) as u32);
110 }
111 val
112 }
113 pub fn read_probability(&mut self) -> u8 {
114 let val = self.read_bits(7) as u8;
115 if val == 0 {
116 1
117 } else {
118 val << 1
119 }
120 }
121 fn renorm(&mut self) {
122 let shift = self.range.leading_zeros() & 7;
123 self.range <<= shift;
124 self.value <<= shift;
125 self.bits -= shift as i32;
126 if (self.bits <= 0) && (self.pos < self.src.len()) {
127 self.value |= (self.src[self.pos] as u32) << (-self.bits as u8);
128 self.pos += 1;
129 self.bits += 8;
130 }
131/* while self.range < 0x80 {
132 self.range <<= 1;
133 self.value <<= 1;
134 self.bits -= 1;
135 if (self.bits <= 0) && (self.pos < self.src.len()) {
136 self.value |= self.src[self.pos] as u32;
137 self.pos += 1;
138 self.bits = 8;
139 }
140 }*/
141 }
142 pub fn skip_bytes(&mut self, nbytes: usize) {
143 for _ in 0..nbytes {
144 self.value <<= 8;
145 if self.pos < self.src.len() {
146 self.value |= self.src[self.pos] as u32;
147 self.pos += 1;
148 }
149 }
150 }
151}
152
153#[allow(dead_code)]
154pub fn rescale_prob(prob: u8, weights: &[i16; 2], maxval: i32) -> u8 {
155 ((((prob as i32) * (weights[0] as i32) + 128) >> 8) + (weights[1] as i32)).min(maxval).max(1) as u8
156}
157
158#[macro_export]
159macro_rules! vp_tree {
160 ($bc: expr, $prob: expr, $node1: expr, $node2: expr) => {
161 if !$bc.read_prob($prob) {
162 $node1
163 } else {
164 $node2
165 }
166 };
167 ($leaf: expr) => { $leaf }
168}
169
5b24175d
KS
170const C1S7: i32 = 64277;
171const C2S6: i32 = 60547;
172const C3S5: i32 = 54491;
173const C4S4: i32 = 46341;
174const C5S3: i32 = 36410;
175const C6S2: i32 = 25080;
176const C7S1: i32 = 12785;
177
178fn mul16(a: i32, b: i32) -> i32 {
179 (a * b) >> 16
180}
181
182macro_rules! idct_step {
183 ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr,
184 $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr,
185 $bias:expr, $shift:expr, $otype:ty) => {
186 let t_a = mul16(C1S7, i32::from($s1)) + mul16(C7S1, i32::from($s7));
187 let t_b = mul16(C7S1, i32::from($s1)) - mul16(C1S7, i32::from($s7));
188 let t_c = mul16(C3S5, i32::from($s3)) + mul16(C5S3, i32::from($s5));
189 let t_d = mul16(C3S5, i32::from($s5)) - mul16(C5S3, i32::from($s3));
190 let t_a1 = mul16(C4S4, t_a - t_c);
191 let t_b1 = mul16(C4S4, t_b - t_d);
192 let t_c = t_a + t_c;
193 let t_d = t_b + t_d;
194 let t_e = mul16(C4S4, i32::from($s0 + $s4)) + $bias;
195 let t_f = mul16(C4S4, i32::from($s0 - $s4)) + $bias;
196 let t_g = mul16(C2S6, i32::from($s2)) + mul16(C6S2, i32::from($s6));
197 let t_h = mul16(C6S2, i32::from($s2)) - mul16(C2S6, i32::from($s6));
198 let t_e1 = t_e - t_g;
199 let t_g = t_e + t_g;
200 let t_a = t_f + t_a1;
201 let t_f = t_f - t_a1;
202 let t_b = t_b1 - t_h;
203 let t_h = t_b1 + t_h;
204
205 $d0 = ((t_g + t_c) >> $shift) as $otype;
206 $d7 = ((t_g - t_c) >> $shift) as $otype;
207 $d1 = ((t_a + t_h) >> $shift) as $otype;
208 $d2 = ((t_a - t_h) >> $shift) as $otype;
209 $d3 = ((t_e1 + t_d) >> $shift) as $otype;
210 $d4 = ((t_e1 - t_d) >> $shift) as $otype;
211 $d5 = ((t_f + t_b) >> $shift) as $otype;
212 $d6 = ((t_f - t_b) >> $shift) as $otype;
213 }
214}
215
216pub fn vp_idct(coeffs: &mut [i16; 64]) {
217 let mut tmp = [0i32; 64];
218 for (src, dst) in coeffs.chunks(8).zip(tmp.chunks_mut(8)) {
219 idct_step!(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
220 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], 0, 0, i32);
221 }
222 let src = &tmp;
223 let dst = coeffs;
224 for i in 0..8 {
225 idct_step!(src[0 * 8 + i], src[1 * 8 + i], src[2 * 8 + i], src[3 * 8 + i],
226 src[4 * 8 + i], src[5 * 8 + i], src[6 * 8 + i], src[7 * 8 + i],
227 dst[0 * 8 + i], dst[1 * 8 + i], dst[2 * 8 + i], dst[3 * 8 + i],
228 dst[4 * 8 + i], dst[5 * 8 + i], dst[6 * 8 + i], dst[7 * 8 + i], 8, 4, i16);
229 }
230}
231
232pub fn vp_idct_dc(coeffs: &mut [i16; 64]) {
233 let dc = ((mul16(C4S4, mul16(C4S4, i32::from(coeffs[0]))) + 8) >> 4) as i16;
234 for i in 0..64 {
235 coeffs[i] = dc;
236 }
237}
238
239pub fn unquant(coeffs: &mut [i16; 64], qmat: &[i16; 64]) {
240 for i in 1..64 {
241 coeffs[i] = coeffs[i].wrapping_mul(qmat[i]);
242 }
243}
244
245pub fn vp_put_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
246 vp_idct(coeffs);
247 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
248 for y in 0..8 {
249 for x in 0..8 {
250 frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
251 }
252 off += frm.stride[plane];
253 }
254}
255
3584b223
KS
256pub fn vp_put_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
257 vp_idct(coeffs);
258 let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
259 for y in 0..8 {
260 for x in 0..8 {
261 frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
262 }
263 off += frm.stride[plane] * 2;
264 }
265}
266
5b24175d
KS
267pub fn vp_put_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
268 vp_idct_dc(coeffs);
269 let dc = (coeffs[0] + 128).min(255).max(0) as u8;
270 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
271 for _ in 0..8 {
272 for x in 0..8 {
273 frm.data[off + x] = dc;
274 }
275 off += frm.stride[plane];
276 }
277}
278
279pub fn vp_add_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
280 vp_idct(coeffs);
281 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
282 for y in 0..8 {
283 for x in 0..8 {
284 frm.data[off + x] = (coeffs[x + y * 8] + (frm.data[off + x] as i16)).min(255).max(0) as u8;
285 }
286 off += frm.stride[plane];
287 }
288}
289
3584b223
KS
290pub fn vp_add_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
291 vp_idct(coeffs);
292 let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
293 for y in 0..8 {
294 for x in 0..8 {
295 frm.data[off + x] = (coeffs[x + y * 8] + (frm.data[off + x] as i16)).min(255).max(0) as u8;
296 }
297 off += frm.stride[plane] * 2;
298 }
299}
300
5b24175d
KS
301pub fn vp_add_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
302 vp_idct_dc(coeffs);
303 let dc = coeffs[0];
304 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
305 for _ in 0..8 {
306 for x in 0..8 {
307 frm.data[off + x] = (dc + (frm.data[off + x] as i16)).min(255).max(0) as u8;
308 }
309 off += frm.stride[plane];
310 }
311}
8d8ddfe1
KS
312
313pub fn vp31_loop_filter(data: &mut [u8], mut off: usize, step: usize, stride: usize,
314 len: usize, loop_str: i16) {
315 for _ in 0..len {
316 let a = data[off - step * 2] as i16;
317 let b = data[off - step] as i16;
318 let c = data[off] as i16;
319 let d = data[off + step] as i16;
320 let mut diff = ((a - d) + 3 * (c - b) + 4) >> 3;
321 if diff.abs() >= 2 * loop_str {
322 diff = 0;
323 } else if diff.abs() >= loop_str {
324 if diff < 0 {
325 diff = -diff - 2 * loop_str;
326 } else {
327 diff = -diff + 2 * loop_str;
328 }
329 }
330 if diff != 0 {
331 data[off - step] = (b + diff).max(0).min(255) as u8;
332 data[off] = (c - diff).max(0).min(255) as u8;
333 }
334
335 off += stride;
336 }
337}
338
8e4b2f44
KS
339pub fn vp_copy_block(dst: &mut NASimpleVideoFrame<u8>, src: NAVideoBufferRef<u8>, comp: usize,
340 dx: usize, dy: usize, mv_x: i16, mv_y: i16,
341 preborder: usize, postborder: usize, loop_str: i16,
342 mode: usize, interp: &[BlkInterpFunc], mut mc_buf: NAVideoBufferRef<u8>)
343{
344 let sx = (dx as isize) + (mv_x as isize);
345 let sy = (dy as isize) + (mv_y as isize);
346 if ((sx | sy) & 7) == 0 {
347 copy_block(dst, src, comp, dx, dy, mv_x, mv_y, 8, 8, preborder, postborder, mode, interp);
348 return;
349 }
350 let pre = preborder.max(2);
351 let post = postborder.max(1);
352 let bsize = 8 + pre + post;
353 let src_x = sx - (pre as isize);
354 let src_y = sy - (pre as isize);
355 {
356 let mut tmp_buf = NASimpleVideoFrame::from_video_buf(&mut mc_buf).unwrap();
357 copy_block(&mut tmp_buf, src, comp, 0, 0, src_x as i16, src_y as i16,
358 bsize, bsize, 0, 0, 0, interp);
359 if (sy & 7) != 0 {
360 let foff = (8 - (sy & 7)) as usize;
361 let off = (pre + foff) * tmp_buf.stride[comp];
362 vp31_loop_filter(tmp_buf.data, off, tmp_buf.stride[comp], 1, bsize, loop_str);
363 }
364 if (sx & 7) != 0 {
365 let foff = (8 - (sx & 7)) as usize;
366 let off = pre + foff;
367 vp31_loop_filter(tmp_buf.data, off, 1, tmp_buf.stride[comp], bsize, loop_str);
368 }
369 }
370 let dxoff = (pre as i16) - (dx as i16);
371 let dyoff = (pre as i16) - (dy as i16);
372 copy_block(dst, mc_buf, comp, dx, dy, dxoff, dyoff, 8, 8, preborder, postborder, 0/* mode*/, interp);
373}
3584b223
KS
374
375fn vp3_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
376{
377 let mut didx = 0;
378 let mut sidx = 0;
379 for _ in 0..bh {
380 for x in 0..bw { dst[didx + x] = src[sidx + x]; }
381 didx += dstride;
382 sidx += sstride;
383 }
384}
385
386fn vp3_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
387{
388 let mut didx = 0;
389 let mut sidx = 0;
390 for _ in 0..bh {
391 for x in 0..bw { dst[didx + x] = (((src[sidx + x] as u16) + (src[sidx + x + 1] as u16)) >> 1) as u8; }
392 didx += dstride;
393 sidx += sstride;
394 }
395}
396
397fn vp3_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
398{
399 let mut didx = 0;
400 let mut sidx = 0;
401 for _ in 0..bh {
402 for x in 0..bw { dst[didx + x] = (((src[sidx + x] as u16) + (src[sidx + x + sstride] as u16)) >> 1) as u8; }
403 didx += dstride;
404 sidx += sstride;
405 }
406}
407
408fn vp3_interp11(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
409{
410 let mut didx = 0;
411 let mut sidx = 0;
412 for _ in 0..bh {
413 for x in 0..bw {
414 dst[didx + x] = (((src[sidx + x] as u16) +
415 (src[sidx + x + 1] as u16) +
416 (src[sidx + x + sstride] as u16) +
417 (src[sidx + x + sstride + 1] as u16)) >> 2) as u8;
418 }
419 didx += dstride;
420 sidx += sstride;
421 }
422}
423
424pub const VP3_INTERP_FUNCS: &[blockdsp::BlkInterpFunc] = &[ vp3_interp00, vp3_interp01, vp3_interp10, vp3_interp11 ];
425