]> git.nihav.org Git - nihav.git/blame_incremental - nihav-duck/src/codecs/vpcommon.rs
h264: cache data before use in fill_deblock()
[nihav.git] / nihav-duck / src / codecs / vpcommon.rs
... / ...
CommitLineData
1use nihav_core::codecs::*;
2use nihav_codec_support::codecs::blockdsp;
3use nihav_codec_support::codecs::blockdsp::*;
4
5pub const VP_YUVA420_FORMAT: NAPixelFormaton = NAPixelFormaton{
6 model: ColorModel::YUV(YUVSubmodel::YUVJ),
7 components: 4,
8 comp_info: [
9 Some(NAPixelChromaton{ h_ss: 0, v_ss: 0, packed: false, depth: 8, shift: 0, comp_offs: 0, next_elem: 1}),
10 Some(NAPixelChromaton{ h_ss: 1, v_ss: 1, packed: false, depth: 8, shift: 0, comp_offs: 1, next_elem: 1}),
11 Some(NAPixelChromaton{ h_ss: 1, v_ss: 1, packed: false, depth: 8, shift: 0, comp_offs: 2, next_elem: 1}),
12 Some(NAPixelChromaton{ h_ss: 0, v_ss: 0, packed: false, depth: 8, shift: 0, comp_offs: 3, next_elem: 1}),
13 None ],
14 elem_size: 0,
15 be: false,
16 alpha: true,
17 palette: false
18 };
19
20#[derive(Clone,Copy,Debug,PartialEq,Default)]
21#[allow(dead_code)]
22pub enum VPMBType {
23 #[default]
24 Intra,
25 InterNoMV,
26 InterMV,
27 InterNearest,
28 InterNear,
29 InterFourMV,
30 GoldenNoMV,
31 GoldenMV,
32 GoldenNearest,
33 GoldenNear,
34}
35
36pub const VP_REF_INTER: u8 = 1;
37pub const VP_REF_GOLDEN: u8 = 2;
38
39#[allow(dead_code)]
40impl VPMBType {
41 pub fn is_intra(self) -> bool { self == VPMBType::Intra }
42 pub fn get_ref_id(self) -> u8 {
43 match self {
44 VPMBType::Intra => 0,
45 VPMBType::InterNoMV |
46 VPMBType::InterMV |
47 VPMBType::InterNearest |
48 VPMBType::InterNear |
49 VPMBType::InterFourMV => VP_REF_INTER,
50 _ => VP_REF_GOLDEN,
51 }
52 }
53}
54
55#[derive(Default)]
56pub struct VPShuffler {
57 lastframe: Option<NAVideoBufferRef<u8>>,
58 goldframe: Option<NAVideoBufferRef<u8>>,
59}
60
61impl VPShuffler {
62 pub fn new() -> Self { VPShuffler { lastframe: None, goldframe: None } }
63 pub fn clear(&mut self) { self.lastframe = None; self.goldframe = None; }
64 pub fn add_frame(&mut self, buf: NAVideoBufferRef<u8>) {
65 self.lastframe = Some(buf);
66 }
67 pub fn add_golden_frame(&mut self, buf: NAVideoBufferRef<u8>) {
68 self.goldframe = Some(buf);
69 }
70 pub fn get_last(&mut self) -> Option<NAVideoBufferRef<u8>> {
71 self.lastframe.as_ref().cloned()
72 }
73 pub fn get_golden(&mut self) -> Option<NAVideoBufferRef<u8>> {
74 self.goldframe.as_ref().cloned()
75 }
76 pub fn has_refs(&self) -> bool {
77 self.lastframe.is_some()
78 }
79}
80
81pub const VP56_COEF_BASE: [i16; 6] = [ 5, 7, 11, 19, 35, 67 ];
82pub const VP56_COEF_ADD_PROBS: [[u8; 12]; 6] = [
83 [ 159, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
84 [ 165, 145, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0 ],
85 [ 173, 148, 140, 128, 0, 0, 0, 0, 0, 0, 0, 0 ],
86 [ 176, 155, 140, 135, 128, 0, 0, 0, 0, 0, 0, 0 ],
87 [ 180, 157, 141, 134, 130, 128, 0, 0, 0, 0, 0, 0 ],
88 [ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 128 ],
89];
90
91#[allow(dead_code)]
92pub struct BoolCoder<'a> {
93 pub src: &'a [u8],
94 pos: usize,
95 value: u32,
96 range: u32,
97 bits: i32,
98}
99
100#[allow(dead_code)]
101impl<'a> BoolCoder<'a> {
102 pub fn new(src: &'a [u8]) -> DecoderResult<Self> {
103 if src.len() < 3 { return Err(DecoderError::ShortData); }
104 let value = (u32::from(src[0]) << 24) | (u32::from(src[1]) << 16) | (u32::from(src[2]) << 8) | u32::from(src[3]);
105 Ok(Self { src, pos: 4, value, range: 255, bits: 8 })
106 }
107 pub fn read_bool(&mut self) -> bool {
108 self.read_prob(128)
109 }
110 pub fn read_prob(&mut self, prob: u8) -> bool {
111 self.renorm();
112 let split = 1 + (((self.range - 1) * u32::from(prob)) >> 8);
113 let bit;
114 if self.value < (split << 24) {
115 self.range = split;
116 bit = false;
117 } else {
118 self.range -= split;
119 self.value -= split << 24;
120 bit = true;
121 }
122 bit
123 }
124 pub fn read_bits(&mut self, bits: u8) -> u32 {
125 let mut val = 0u32;
126 for _ in 0..bits {
127 val = (val << 1) | (self.read_prob(128) as u32);
128 }
129 val
130 }
131 pub fn read_byte(&mut self) -> u8 {
132 let mut val = 0u8;
133 for _ in 0..8 {
134 val = (val << 1) | (self.read_prob(128) as u8);
135 }
136 val
137 }
138 pub fn read_sbits(&mut self, bits: u8) -> i32 {
139 let mut val = if self.read_prob(128) { -1i32 } else { 0i32 };
140 for _ in 1..bits {
141 val = (val << 1) | (self.read_prob(128) as i32);
142 }
143 val
144 }
145 pub fn read_probability(&mut self) -> u8 {
146 let val = self.read_bits(7) as u8;
147 if val == 0 {
148 1
149 } else {
150 val << 1
151 }
152 }
153 fn renorm(&mut self) {
154 let shift = self.range.leading_zeros() & 7;
155 self.range <<= shift;
156 self.value <<= shift;
157 self.bits -= shift as i32;
158 if (self.bits <= 0) && (self.pos < self.src.len()) {
159 self.value |= u32::from(self.src[self.pos]) << (-self.bits as u8);
160 self.pos += 1;
161 self.bits += 8;
162 }
163/* while self.range < 0x80 {
164 self.range <<= 1;
165 self.value <<= 1;
166 self.bits -= 1;
167 if (self.bits <= 0) && (self.pos < self.src.len()) {
168 self.value |= u32::from(self.src[self.pos]);
169 self.pos += 1;
170 self.bits = 8;
171 }
172 }*/
173 }
174 pub fn skip_bytes(&mut self, nbytes: usize) {
175 for _ in 0..nbytes {
176 self.value <<= 8;
177 if self.pos < self.src.len() {
178 self.value |= u32::from(self.src[self.pos]);
179 self.pos += 1;
180 }
181 }
182 }
183}
184
185#[allow(dead_code)]
186#[allow(clippy::trivially_copy_pass_by_ref)]
187pub fn rescale_prob(prob: u8, weights: &[i16; 2], maxval: i32) -> u8 {
188 (((i32::from(prob) * i32::from(weights[0]) + 128) >> 8) + i32::from(weights[1])).min(maxval).max(1) as u8
189}
190
191macro_rules! vp_tree {
192 ($bc: expr, $prob: expr, $node1: expr, $node2: expr) => {
193 if !$bc.read_prob($prob) {
194 $node1
195 } else {
196 $node2
197 }
198 };
199 ($leaf: expr) => { $leaf }
200}
201
202const C1S7: i32 = 64277;
203const C2S6: i32 = 60547;
204const C3S5: i32 = 54491;
205const C4S4: i32 = 46341;
206const C5S3: i32 = 36410;
207const C6S2: i32 = 25080;
208const C7S1: i32 = 12785;
209
210fn mul16(a: i32, b: i32) -> i32 {
211 (a * b) >> 16
212}
213
214macro_rules! idct_step {
215 ($s0:expr, $s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr,
216 $d0:expr, $d1:expr, $d2:expr, $d3:expr, $d4:expr, $d5:expr, $d6:expr, $d7:expr,
217 $bias:expr, $shift:expr, $otype:ty) => {
218 let t_a = mul16(C1S7, i32::from($s1)) + mul16(C7S1, i32::from($s7));
219 let t_b = mul16(C7S1, i32::from($s1)) - mul16(C1S7, i32::from($s7));
220 let t_c = mul16(C3S5, i32::from($s3)) + mul16(C5S3, i32::from($s5));
221 let t_d = mul16(C3S5, i32::from($s5)) - mul16(C5S3, i32::from($s3));
222 let t_a1 = mul16(C4S4, t_a - t_c);
223 let t_b1 = mul16(C4S4, t_b - t_d);
224 let t_c = t_a + t_c;
225 let t_d = t_b + t_d;
226 let t_e = mul16(C4S4, i32::from($s0 + $s4)) + $bias;
227 let t_f = mul16(C4S4, i32::from($s0 - $s4)) + $bias;
228 let t_g = mul16(C2S6, i32::from($s2)) + mul16(C6S2, i32::from($s6));
229 let t_h = mul16(C6S2, i32::from($s2)) - mul16(C2S6, i32::from($s6));
230 let t_e1 = t_e - t_g;
231 let t_g = t_e + t_g;
232 let t_a = t_f + t_a1;
233 let t_f = t_f - t_a1;
234 let t_b = t_b1 - t_h;
235 let t_h = t_b1 + t_h;
236
237 $d0 = ((t_g + t_c) >> $shift) as $otype;
238 $d7 = ((t_g - t_c) >> $shift) as $otype;
239 $d1 = ((t_a + t_h) >> $shift) as $otype;
240 $d2 = ((t_a - t_h) >> $shift) as $otype;
241 $d3 = ((t_e1 + t_d) >> $shift) as $otype;
242 $d4 = ((t_e1 - t_d) >> $shift) as $otype;
243 $d5 = ((t_f + t_b) >> $shift) as $otype;
244 $d6 = ((t_f - t_b) >> $shift) as $otype;
245 }
246}
247
248pub fn vp_idct(coeffs: &mut [i16; 64]) {
249 let mut tmp = [0i32; 64];
250 for (src, dst) in coeffs.chunks(8).zip(tmp.chunks_mut(8)) {
251 idct_step!(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
252 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], dst[6], dst[7], 0, 0, i32);
253 }
254 let src = &tmp;
255 let dst = coeffs;
256 for i in 0..8 {
257 idct_step!(src[0 * 8 + i], src[1 * 8 + i], src[2 * 8 + i], src[3 * 8 + i],
258 src[4 * 8 + i], src[5 * 8 + i], src[6 * 8 + i], src[7 * 8 + i],
259 dst[0 * 8 + i], dst[1 * 8 + i], dst[2 * 8 + i], dst[3 * 8 + i],
260 dst[4 * 8 + i], dst[5 * 8 + i], dst[6 * 8 + i], dst[7 * 8 + i], 8, 4, i16);
261 }
262}
263
264pub fn vp_idct_dc(coeffs: &mut [i16; 64]) {
265 let dc = ((mul16(C4S4, mul16(C4S4, i32::from(coeffs[0]))) + 8) >> 4) as i16;
266 for i in 0..64 {
267 coeffs[i] = dc;
268 }
269}
270
271pub fn unquant(coeffs: &mut [i16; 64], qmat: &[i16; 64]) {
272 for i in 1..64 {
273 coeffs[i] = coeffs[i].wrapping_mul(qmat[i]);
274 }
275}
276
277pub fn vp_put_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
278 vp_idct(coeffs);
279 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
280 for y in 0..8 {
281 for x in 0..8 {
282 frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
283 }
284 off += frm.stride[plane];
285 }
286}
287
288pub fn vp_put_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
289 vp_idct(coeffs);
290 let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
291 for y in 0..8 {
292 for x in 0..8 {
293 frm.data[off + x] = (coeffs[x + y * 8] + 128).min(255).max(0) as u8;
294 }
295 off += frm.stride[plane] * 2;
296 }
297}
298
299pub fn vp_put_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
300 vp_idct_dc(coeffs);
301 let dc = (coeffs[0] + 128).min(255).max(0) as u8;
302 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
303 for _ in 0..8 {
304 for x in 0..8 {
305 frm.data[off + x] = dc;
306 }
307 off += frm.stride[plane];
308 }
309}
310
311pub fn vp_add_block(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
312 vp_idct(coeffs);
313 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
314 for y in 0..8 {
315 for x in 0..8 {
316 frm.data[off + x] = (coeffs[x + y * 8] + i16::from(frm.data[off + x])).min(255).max(0) as u8;
317 }
318 off += frm.stride[plane];
319 }
320}
321
322pub fn vp_add_block_ilace(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
323 vp_idct(coeffs);
324 let mut off = frm.offset[plane] + bx * 8 + ((by & !1) * 8 + (by & 1)) * frm.stride[plane];
325 for y in 0..8 {
326 for x in 0..8 {
327 frm.data[off + x] = (coeffs[x + y * 8] + i16::from(frm.data[off + x])).min(255).max(0) as u8;
328 }
329 off += frm.stride[plane] * 2;
330 }
331}
332
333pub fn vp_add_block_dc(coeffs: &mut [i16; 64], bx: usize, by: usize, plane: usize, frm: &mut NASimpleVideoFrame<u8>) {
334 vp_idct_dc(coeffs);
335 let dc = coeffs[0];
336 let mut off = frm.offset[plane] + bx * 8 + by * 8 * frm.stride[plane];
337 for _ in 0..8 {
338 for x in 0..8 {
339 frm.data[off + x] = (dc + i16::from(frm.data[off + x])).min(255).max(0) as u8;
340 }
341 off += frm.stride[plane];
342 }
343}
344
345pub fn vp31_loop_filter(data: &mut [u8], mut off: usize, step: usize, stride: usize,
346 len: usize, loop_str: i16) {
347 for _ in 0..len {
348 let a = i16::from(data[off - step * 2]);
349 let b = i16::from(data[off - step]);
350 let c = i16::from(data[off]);
351 let d = i16::from(data[off + step]);
352 let mut diff = ((a - d) + 3 * (c - b) + 4) >> 3;
353 if diff.abs() >= 2 * loop_str {
354 diff = 0;
355 } else if diff.abs() >= loop_str {
356 if diff < 0 {
357 diff = -diff - 2 * loop_str;
358 } else {
359 diff = -diff + 2 * loop_str;
360 }
361 }
362 if diff != 0 {
363 data[off - step] = (b + diff).max(0).min(255) as u8;
364 data[off] = (c - diff).max(0).min(255) as u8;
365 }
366
367 off += stride;
368 }
369}
370
371pub fn vp_copy_block(dst: &mut NASimpleVideoFrame<u8>, src: NAVideoBufferRef<u8>, comp: usize,
372 dx: usize, dy: usize, mv_x: i16, mv_y: i16,
373 preborder: usize, postborder: usize, loop_str: i16,
374 mode: usize, interp: &[BlkInterpFunc], mut mc_buf: NAVideoBufferRef<u8>)
375{
376 let sx = (dx as isize) + (mv_x as isize);
377 let sy = (dy as isize) + (mv_y as isize);
378 if ((sx | sy) & 7) == 0 {
379 copy_block(dst, src, comp, dx, dy, mv_x, mv_y, 8, 8, preborder, postborder, mode, interp);
380 return;
381 }
382 let pre = preborder.max(2);
383 let post = postborder.max(1);
384 let bsize = 8 + pre + post;
385 let src_x = sx - (pre as isize);
386 let src_y = sy - (pre as isize);
387 {
388 let tmp_buf = NASimpleVideoFrame::from_video_buf(&mut mc_buf).unwrap();
389 edge_emu(src.as_ref(), src_x, src_y, bsize, bsize, &mut tmp_buf.data[tmp_buf.offset[comp]..], tmp_buf.stride[comp], comp, 0);
390// copy_block(&mut tmp_buf, src, comp, 0, 0, src_x as i16, src_y as i16,
391// bsize, bsize, 0, 0, 0, interp);
392 if (sx & 7) != 0 {
393 let foff = (8 - (sx & 7)) as usize;
394 let off = pre + foff + tmp_buf.offset[comp];
395 vp31_loop_filter(tmp_buf.data, off, 1, tmp_buf.stride[comp], bsize, loop_str);
396 }
397 if (sy & 7) != 0 {
398 let foff = (8 - (sy & 7)) as usize;
399 let off = (pre + foff) * tmp_buf.stride[comp] + tmp_buf.offset[comp];
400 vp31_loop_filter(tmp_buf.data, off, tmp_buf.stride[comp], 1, bsize, loop_str);
401 }
402 }
403 let dxoff = (pre as i16) - (dx as i16);
404 let dyoff = (pre as i16) - (dy as i16);
405 copy_block(dst, mc_buf, comp, dx, dy, dxoff, dyoff, 8, 8, preborder, postborder, mode, interp);
406}
407
408fn vp3_interp00(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
409{
410 let mut didx = 0;
411 let mut sidx = 0;
412 for _ in 0..bh {
413 dst[didx..][..bw].copy_from_slice(&src[sidx..][..bw]);
414 didx += dstride;
415 sidx += sstride;
416 }
417}
418
419fn vp3_interp01(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
420{
421 let mut didx = 0;
422 let mut sidx = 0;
423 for _ in 0..bh {
424 for x in 0..bw { dst[didx + x] = ((u16::from(src[sidx + x]) + u16::from(src[sidx + x + 1])) >> 1) as u8; }
425 didx += dstride;
426 sidx += sstride;
427 }
428}
429
430fn vp3_interp10(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
431{
432 let mut didx = 0;
433 let mut sidx = 0;
434 for _ in 0..bh {
435 for x in 0..bw { dst[didx + x] = ((u16::from(src[sidx + x]) + u16::from(src[sidx + x + sstride])) >> 1) as u8; }
436 didx += dstride;
437 sidx += sstride;
438 }
439}
440
441fn vp3_interp1x(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
442{
443 let mut didx = 0;
444 let mut sidx = 0;
445 for _ in 0..bh {
446 for x in 0..bw {
447 dst[didx + x] = ((u16::from(src[sidx + x]) +
448 u16::from(src[sidx + x + sstride + 1])) >> 1) as u8;
449 }
450 didx += dstride;
451 sidx += sstride;
452 }
453}
454
455fn vp3_interp1y(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, bw: usize, bh: usize)
456{
457 let mut didx = 0;
458 let mut sidx = 0;
459 for _ in 0..bh {
460 for x in 0..bw {
461 dst[didx + x] = ((u16::from(src[sidx + x + 1]) +
462 u16::from(src[sidx + x + sstride])) >> 1) as u8;
463 }
464 didx += dstride;
465 sidx += sstride;
466 }
467}
468
469pub const VP3_INTERP_FUNCS: &[blockdsp::BlkInterpFunc] = &[ vp3_interp00, vp3_interp01, vp3_interp10, vp3_interp1x, vp3_interp1y ];
470