]> git.nihav.org Git - nihav.git/blame - nihav-duck/src/codecs/vp6dsp.rs
avimux: do not record palette change chunks in OpenDML index
[nihav.git] / nihav-duck / src / codecs / vp6dsp.rs
CommitLineData
3952bfd9
KS
1use nihav_core::frame::*;
2use nihav_codec_support::codecs::blockdsp::edge_emu;
3
4#[allow(clippy::too_many_arguments)]
5pub fn get_block(dst: &mut [u8], dstride: usize, src: NAVideoBufferRef<u8>, comp: usize,
6 dx: usize, dy: usize, mv_x: i16, mv_y: i16)
7{
8 let (w, h) = src.get_dimensions(comp);
9 let sx = (dx as isize) + (mv_x as isize);
10 let sy = (dy as isize) + (mv_y as isize);
11
12 if (sx - 2 < 0) || (sx + 8 + 2 > (w as isize)) ||
13 (sy - 2 < 0) || (sy + 8 + 2 > (h as isize)) {
14 edge_emu(&src, sx - 2, sy - 2, 8 + 2 + 2, 8 + 2 + 2,
15 dst, dstride, comp, 0);
16 } else {
17 let sstride = src.get_stride(comp);
18 let soff = src.get_offset(comp);
19 let sdta = src.get_data();
20 let sbuf: &[u8] = sdta.as_slice();
21 let saddr = soff + ((sx - 2) as usize) + ((sy - 2) as usize) * sstride;
22 let src = &sbuf[saddr..];
23 for (dline, sline) in dst.chunks_mut(dstride).zip(src.chunks(sstride)).take(12) {
24 dline[..12].copy_from_slice(&sline[..12]);
25 }
26 }
27}
28
29pub fn calc_variance(src: &[u8], stride: usize) -> u16 {
30 let mut sum = 0;
31 let mut ssum = 0;
32 for line in src.chunks(stride * 2).take(4) {
33 for el in line.iter().take(8).step_by(2) {
34 let pix = u32::from(*el);
35 sum += pix;
36 ssum += pix * pix;
37 }
38 }
39 ((ssum * 16 - sum * sum) >> 8) as u16
40}
41
42macro_rules! mc_filter {
43 (bilinear; $a: expr, $b: expr, $c: expr) => {
44 ((u16::from($a) * (8 - $c) + u16::from($b) * $c + 4) >> 3) as u8
45 };
46 (bicubic; $src: expr, $off: expr, $step: expr, $coeffs: expr) => {
47 ((i32::from($src[$off - $step] ) * i32::from($coeffs[0]) +
48 i32::from($src[$off] ) * i32::from($coeffs[1]) +
49 i32::from($src[$off + $step] ) * i32::from($coeffs[2]) +
50 i32::from($src[$off + $step * 2]) * i32::from($coeffs[3]) + 64) >> 7).min(255).max(0) as u8
51 }
52}
53
e510768d 54#[cfg(not(target_arch = "x86_64"))]
3952bfd9
KS
55pub fn mc_bilinear(dst: &mut [u8], dstride: usize, src: &[u8], mut soff: usize, sstride: usize, mx: u16, my: u16) {
56 if my == 0 {
57 for dline in dst.chunks_mut(dstride).take(8) {
58 for i in 0..8 {
59 dline[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx);
60 }
61 soff += sstride;
62 }
63 } else if mx == 0 {
64 for dline in dst.chunks_mut(dstride).take(8) {
65 for i in 0..8 {
66 dline[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + sstride], my);
67 }
68 soff += sstride;
69 }
70 } else {
71 let mut tmp = [0u8; 8];
72 for i in 0..8 {
73 tmp[i] = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx);
74 }
75 soff += sstride;
76 for dline in dst.chunks_mut(dstride).take(8) {
77 for i in 0..8 {
78 let cur = mc_filter!(bilinear; src[soff + i], src[soff + i + 1], mx);
79 dline[i] = mc_filter!(bilinear; tmp[i], cur, my);
80 tmp[i] = cur;
81 }
82 soff += sstride;
83 }
84 }
85}
86
e510768d
KS
87#[cfg(target_arch = "x86_64")]
88use std::arch::x86_64::*;
89
90#[cfg(target_arch = "x86_64")]
91pub fn mc_bilinear(dst: &mut [u8], dstride: usize, src: &[u8], soff: usize, sstride: usize, mx: u16, my: u16) {
92 if my == 0 {
93 unsafe {
94 let mut sptr = src[soff..].as_ptr();
95 let mut dptr = dst.as_mut_ptr();
96 let bias = _mm_set1_epi16(4);
97 let a = _mm_set1_epi16((8 - mx) as i16);
98 let b = _mm_set1_epi16( mx as i16);
99 let z = _mm_setzero_si128();
100 for _ in 0..8 {
101 let s0 = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr as *const __m128i), z);
102 let s1 = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr.add(1) as *const __m128i), z);
103 let s0 = _mm_mullo_epi16(s0, a);
104 let s1 = _mm_mullo_epi16(s1, b);
105 sptr = sptr.add(sstride);
106 let t = _mm_srai_epi16(_mm_add_epi16(_mm_add_epi16(s0, bias), s1), 3);
107 let t = _mm_packus_epi16(t, t);
108 _mm_storel_epi64(dptr as *mut __m128i, t);
109 dptr = dptr.add(dstride);
110 }
111 }
112 } else if mx == 0 {
113 unsafe {
114 let mut sptr = src[soff..].as_ptr();
115 let mut dptr = dst.as_mut_ptr();
116 let bias = _mm_set1_epi16(4);
117 let a = _mm_set1_epi16((8 - my) as i16);
118 let b = _mm_set1_epi16( my as i16);
119 let z = _mm_setzero_si128();
120 let mut last = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr as *const __m128i), z);
121 last = _mm_mullo_epi16(last, a);
122 sptr = sptr.add(sstride);
123 for _ in 0..8 {
124 let s = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr as *const __m128i), z);
125 sptr = sptr.add(sstride);
126 let s1 = _mm_mullo_epi16(s, b);
127 let t = _mm_srai_epi16(_mm_add_epi16(_mm_add_epi16(last, bias), s1), 3);
128 last = _mm_mullo_epi16(s, a);
129 let t = _mm_packus_epi16(t, t);
130 _mm_storel_epi64(dptr as *mut __m128i, t);
131 dptr = dptr.add(dstride);
132 }
133 }
134 } else {
135 unsafe {
136 let mut sptr = src[soff..].as_ptr();
137 let mut dptr = dst.as_mut_ptr();
138 let bias = _mm_set1_epi16(4);
139 let a = _mm_set1_epi16((8 - mx) as i16);
140 let b = _mm_set1_epi16( mx as i16);
141 let c = _mm_set1_epi16((8 - my) as i16);
142 let d = _mm_set1_epi16( my as i16);
143 let z = _mm_setzero_si128();
144
145 let s0 = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr as *const __m128i), z);
146 let s1 = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr.add(1) as *const __m128i), z);
147 let s0 = _mm_mullo_epi16(s0, a);
148 let s1 = _mm_mullo_epi16(s1, b);
149 let t = _mm_srai_epi16(_mm_add_epi16(_mm_add_epi16(s0, bias), s1), 3);
150 let mut last = _mm_mullo_epi16(t, c);
151 sptr = sptr.add(sstride);
152 for _ in 0..8 {
153 let s0 = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr as *const __m128i), z);
154 let s1 = _mm_unpacklo_epi8(_mm_loadl_epi64(sptr.add(1) as *const __m128i), z);
155 let s0 = _mm_mullo_epi16(s0, a);
156 let s1 = _mm_mullo_epi16(s1, b);
157 sptr = sptr.add(sstride);
158 let t = _mm_srai_epi16(_mm_add_epi16(_mm_add_epi16(s0, bias), s1), 3);
159 let t1 = _mm_add_epi16(_mm_add_epi16(last, bias), _mm_mullo_epi16(t, d));
160 last = _mm_mullo_epi16(t, c);
161 let out = _mm_srai_epi16(t1, 3);
162 _mm_storel_epi64(dptr as *mut __m128i, _mm_packus_epi16(out, out));
163 dptr = dptr.add(dstride);
164 }
165 }
166 }
167}
168
3952bfd9
KS
169pub fn mc_bicubic(dst: &mut [u8], dstride: usize, src: &[u8], mut soff: usize, sstride: usize, coeffs_w: &[i16; 4], coeffs_h: &[i16; 4]) {
170 if coeffs_h[1] == 128 {
171 for dline in dst.chunks_mut(dstride).take(8) {
172 for i in 0..8 {
173 dline[i] = mc_filter!(bicubic; src, soff + i, 1, coeffs_w);
174 }
175 soff += sstride;
176 }
177 } else if coeffs_w[1] == 128 { // horizontal-only interpolation
178 for dline in dst.chunks_mut(dstride).take(8) {
179 for i in 0..8 {
180 dline[i] = mc_filter!(bicubic; src, soff + i, sstride, coeffs_h);
181 }
182 soff += sstride;
183 }
184 } else {
185 let mut buf = [0u8; 16 * 11];
186 soff -= sstride;
187 for dline in buf.chunks_mut(16) {
188 for i in 0..8 {
189 dline[i] = mc_filter!(bicubic; src, soff + i, 1, coeffs_w);
190 }
191 soff += sstride;
192 }
193 let mut soff = 16;
194 for dline in dst.chunks_mut(dstride).take(8) {
195 for i in 0..8 {
196 dline[i] = mc_filter!(bicubic; buf, soff + i, 16, coeffs_h);
197 }
198 soff += 16;
199 }
200 }
201}