1 const HISTORY_SIZE: usize = 512;
3 fn val2sign(val: i32) -> i32 {
20 filters: [NFilterContext; 3],
21 lfilt: LastFilterContext,
22 rfilt: LastFilterContext,
25 #[allow(clippy::large_enum_variant)]
33 pub fn new(version: u16, compression: u16) -> Self {
35 FilterMode::Old(OldFilt::new(version, compression))
37 FilterMode::New(NewFilt::new(version, compression))
40 pub fn filter_mono(&mut self, l: &mut [i32]) {
42 FilterMode::Old(ref mut ofilt) => ofilt.filter(l),
43 FilterMode::New(ref mut nfilt) => nfilt.filter_mono(l),
44 FilterMode::None => unreachable!(),
47 pub fn filter_stereo(&mut self, l: &mut [i32], r: &mut [i32]) {
49 FilterMode::Old(ref mut ofilt) => {
52 for (l, r) in l.iter_mut().zip(r.iter_mut()) {
53 let new_l = *l - *r / 2;
54 let new_r = *r + new_l;
59 FilterMode::New(ref mut nfilt) => {
60 nfilt.filter_stereo(l, r);
61 for (l, r) in l.iter_mut().zip(r.iter_mut()) {
62 let new_l = *r - *l / 2;
63 let new_r = *l + new_l;
68 FilterMode::None => unreachable!(),
73 const NEW_FILTER_PARAMS: [[(u8, u8); 3]; 5] = [
74 [ (0, 0), ( 0, 0), ( 0, 0) ],
75 [ (1, 11), ( 0, 0), ( 0, 0) ],
76 [ (4, 11), ( 0, 0), ( 0, 0) ],
77 [ (2, 10), (16, 13), ( 0, 0) ],
78 [ (1, 11), (16, 13), (80, 15) ],
81 #[derive(Clone,Default)]
82 struct NFilterContext {
91 #[cfg(target_arch = "x86_64")]
92 use std::arch::x86_64::*;
94 #[cfg(target_arch = "x86_64")]
95 fn adapt_loop(filt: &mut [i16], coeffs: &[i16], adapt: &[i16], val: i32) -> i32 {
96 let mut sum = [0i32; 4];
97 let iters = filt.len() / 16;
99 let mut sumv = _mm_setzero_si128();
100 let mut fptr = filt.as_mut_ptr();
101 let mut cptr = coeffs.as_ptr();
102 let mut aptr = adapt.as_ptr();
105 let r0 = _mm_loadu_si128(cptr as *const __m128i);
106 let r1 = _mm_loadu_si128(cptr.add(8) as *const __m128i);
107 let c0 = _mm_load_si128(fptr as *const __m128i);
108 let c1 = _mm_load_si128(fptr.add(8) as *const __m128i);
109 sumv = _mm_add_epi32(sumv, _mm_madd_epi16(r0, c0));
110 sumv = _mm_add_epi32(sumv, _mm_madd_epi16(r1, c1));
111 let a0 = _mm_loadu_si128(aptr as *const __m128i);
112 let a1 = _mm_loadu_si128(aptr.add(8) as *const __m128i);
113 let c0 = _mm_add_epi16(c0, a0);
114 let c1 = _mm_add_epi16(c1, a1);
115 _mm_store_si128(fptr as *mut __m128i, c0);
116 _mm_store_si128(fptr.add(8) as *mut __m128i, c1);
123 let r0 = _mm_loadu_si128(cptr as *const __m128i);
124 let r1 = _mm_loadu_si128(cptr.add(8) as *const __m128i);
125 let c0 = _mm_load_si128(fptr as *const __m128i);
126 let c1 = _mm_load_si128(fptr.add(8) as *const __m128i);
127 sumv = _mm_add_epi32(sumv, _mm_madd_epi16(r0, c0));
128 sumv = _mm_add_epi32(sumv, _mm_madd_epi16(r1, c1));
129 let a0 = _mm_loadu_si128(aptr as *const __m128i);
130 let a1 = _mm_loadu_si128(aptr.add(8) as *const __m128i);
131 let c0 = _mm_sub_epi16(c0, a0);
132 let c1 = _mm_sub_epi16(c1, a1);
133 _mm_store_si128(fptr as *mut __m128i, c0);
134 _mm_store_si128(fptr.add(8) as *mut __m128i, c1);
141 let r0 = _mm_loadu_si128(cptr as *const __m128i);
142 let r1 = _mm_loadu_si128(cptr.add(8) as *const __m128i);
143 let c0 = _mm_load_si128(fptr as *const __m128i);
144 let c1 = _mm_load_si128(fptr.add(8) as *const __m128i);
145 sumv = _mm_add_epi32(sumv, _mm_madd_epi16(r0, c0));
146 sumv = _mm_add_epi32(sumv, _mm_madd_epi16(r1, c1));
152 _mm_storeu_si128(sum.as_mut_ptr() as *mut __m128i, sumv);
154 sum[0] + sum[1] + sum[2] + sum[3]
157 #[cfg(not(target_arch = "x86_64"))]
158 fn adapt_loop(filt: &mut [i16], coeffs: &[i16], adapt: &[i16], val: i32) -> i32 {
160 for (coef, (res, adapt)) in filt.iter_mut().zip(coeffs.iter().zip(adapt.iter())) {
161 sum += i32::from(*coef) * i32::from(*res);
171 impl NFilterContext {
172 fn new(ord16: u8, bits: u8, new: bool) -> Self {
173 let order = ord16 as usize * 16;
175 buf: if order > 0 { vec![0; order * 2 + HISTORY_SIZE] } else { Vec::new() },
176 coeffs: vec![0; order],
183 fn reset(&mut self) {
184 for el in self.buf[..self.order * 2].iter_mut() { *el = 0; }
185 for el in self.coeffs.iter_mut() { *el = 0; }
188 fn apply(&mut self, dst: &mut [i32]) {
189 if self.order == 0 { return; }
190 let mut adapt_pos = self.order;
191 let mut delay_pos = self.order * 2;
192 for el in dst.iter_mut() {
193 let sum = adapt_loop(&mut self.coeffs,
194 &self.buf[delay_pos - self.order..],
195 &self.buf[adapt_pos - self.order..], *el);
196 let pred = (sum + (1 << (self.bits - 1))) >> self.bits;
197 let val = *el + pred;
199 self.buf[delay_pos] = val.min(32767).max(-32768) as i16;
201 let aval = val.abs();
202 let sign = val2sign(val) as i16;
203 self.buf[adapt_pos] = if aval == 0 {
205 } else if aval <= self.avg * 4 / 3 {
207 } else if aval <= self.avg * 3 {
212 self.avg += (aval - self.avg) / 16;
213 self.buf[adapt_pos - 1] >>= 1;
214 self.buf[adapt_pos - 2] >>= 1;
215 self.buf[adapt_pos - 8] >>= 1;
217 self.buf[adapt_pos] = 4 * (val2sign(val) as i16);
218 self.buf[adapt_pos - 4] >>= 1;
219 self.buf[adapt_pos - 8] >>= 1;
223 if delay_pos == HISTORY_SIZE + self.order * 2 {
224 delay_pos = self.order * 2;
225 adapt_pos = self.order;
226 for i in 0..self.order * 2 {
227 self.buf[i] = self.buf[HISTORY_SIZE + i];
234 #[derive(Clone,Copy,Default)]
235 struct LastFilterContext {
247 impl LastFilterContext {
249 const COEFFS_A_NEW: [i32; 4] = [360, 317, -109, 98];
253 self.coeffs_a = COEFFS_A_NEW;
254 self.coeffs_b = [0; 5];
256 self.delay_a = [0; 4];
257 self.adapt_a = [0; 4];
258 self.delay_b = [0; 5];
259 self.adapt_b = [0; 5];
261 fn predict_a(&mut self) -> i32 {
262 for i in (0..3).rev() {
263 self.delay_a[i + 1] = self.delay_a[i];
264 self.adapt_a[i + 1] = self.adapt_a[i];
266 self.delay_a[0] = self.last_a;
267 self.delay_a[1] = self.last_a - self.delay_a[1];
268 self.adapt_a[0] = val2sign(self.delay_a[0]);
269 self.adapt_a[1] = val2sign(self.delay_a[1]);
271 self.delay_a[0] * self.coeffs_a[0] +
272 self.delay_a[1] * self.coeffs_a[1] +
273 self.delay_a[2] * self.coeffs_a[2] +
274 self.delay_a[3] * self.coeffs_a[3]
276 fn predict_b(&mut self, other_a: i32) -> i32 {
277 for i in (0..4).rev() {
278 self.delay_b[i + 1] = self.delay_b[i];
279 self.adapt_b[i + 1] = self.adapt_b[i];
281 self.delay_b[0] = other_a - ((self.filter_b * 31) >> 5);
282 self.delay_b[1] = self.delay_b[0] - self.delay_b[1];
283 self.adapt_b[0] = val2sign(self.delay_b[0]);
284 self.adapt_b[1] = val2sign(self.delay_b[1]);
286 self.filter_b = other_a;
288 (self.delay_b[0] * self.coeffs_b[0] +
289 self.delay_b[1] * self.coeffs_b[1] +
290 self.delay_b[2] * self.coeffs_b[2] +
291 self.delay_b[3] * self.coeffs_b[3] +
292 self.delay_b[4] * self.coeffs_b[4]) >> 1
294 fn update_a(&mut self, pred: i32, diff: i32) -> i32 {
295 self.last_a = diff + (pred >> 10);
296 let sign = val2sign(diff);
298 self.coeffs_a[i] += self.adapt_a[i] * sign;
300 self.filter_a = self.last_a + ((self.filter_a * 31) >> 5);
304 fn update_b(&mut self, diff: i32) {
305 let sign = val2sign(diff);
307 self.coeffs_b[i] += self.adapt_b[i] * sign;
310 fn predict_3930(&mut self, diff: i32) -> i32 {
311 for i in (0..3).rev() {
312 self.delay_a[i + 1] = self.delay_a[i];
314 self.delay_a[0] = self.last_a;
315 let d0 = self.delay_a[0];
316 let d1 = self.delay_a[0] - self.delay_a[1];
317 let d2 = self.delay_a[1] - self.delay_a[2];
318 let d3 = self.delay_a[2] - self.delay_a[3];
320 let pred = (self.coeffs_a[0] * d0 +
321 self.coeffs_a[1] * d1 +
322 self.coeffs_a[2] * d2 +
323 self.coeffs_a[3] * d3) >> 9;
324 self.last_a = diff + pred;
325 self.filter_a = self.last_a + ((self.filter_a * 31) >> 5);
327 let sign = val2sign(diff);
328 self.coeffs_a[0] += if d0 < 0 { sign } else { -sign };
329 self.coeffs_a[1] += if d1 < 0 { sign } else { -sign };
330 self.coeffs_a[2] += if d2 < 0 { sign } else { -sign };
331 self.coeffs_a[3] += if d3 < 0 { sign } else { -sign };
338 fn new(version: u16, compression: u16) -> Self {
339 let cidx = (compression / 1000) as usize - 1;
342 filters: [NFilterContext::default(), NFilterContext::default(), NFilterContext::default()],
343 lfilt: LastFilterContext::default(),
344 rfilt: LastFilterContext::default(),
346 obj.version = version;
347 let new = version >= 3980;
349 let (ord16, bits) = NEW_FILTER_PARAMS[cidx][i];
350 obj.filters[i] = NFilterContext::new(ord16, bits, new);
354 fn filter_mono(&mut self, dst: &mut [i32]) {
355 for filt in self.filters.iter_mut() {
360 if self.version >= 3950 {
361 for el in dst.iter_mut() {
362 let pred = self.lfilt.predict_a();
363 *el = self.lfilt.update_a(pred, *el);
366 for el in dst.iter_mut() {
367 *el = self.lfilt.predict_3930(*el);
371 fn filter_stereo(&mut self, l: &mut [i32], r: &mut [i32]) {
372 for filt in self.filters.iter_mut() {
380 if self.version >= 3950 {
381 for (l, r) in l.iter_mut().zip(r.iter_mut()) {
382 let mut pred = self.lfilt.predict_a();
383 pred += self.lfilt.predict_b(self.rfilt.filter_a);
384 let new_l = self.lfilt.update_a(pred, *l);
385 self.lfilt.update_b(*l);
388 let mut pred = self.rfilt.predict_a();
389 pred += self.rfilt.predict_b(self.lfilt.filter_a);
390 let new_r = self.rfilt.update_a(pred, *r);
391 self.rfilt.update_b(*r);
395 for (l, r) in l.iter_mut().zip(r.iter_mut()) {
396 let new_l = self.lfilt.predict_3930(*r);
397 let new_r = self.rfilt.predict_3930(*l);
406 fn new(version: u16, compression: u16) -> Self {
411 fn filter(&mut self, dst: &mut [i32]) {
412 match self.compression {
414 Self::filter_fast(dst);
417 Self::filter_normal(dst, 4, 10);
420 Self::filter_high(dst, 16, 9);
421 Self::filter_normal(dst, 16, 10);
424 if self.version < 3830 {
425 Self::filter_high(dst, 128, 11);
426 Self::filter_normal(dst, 128, 10);
428 Self::filter_extra_high(dst);
429 Self::filter_high(dst, 256, 12);
430 Self::filter_normal(dst, 256, 11);
436 fn filter_fast(dst: &mut [i32]) {
437 const COEFF_A_FAST: i32 = 375;
442 let mut delay = [dst[1], dst[0]];
443 let mut last = dst[2];
444 let mut filter = dst[2];
445 let mut weight = COEFF_A_FAST;
446 for el in dst[3..].iter_mut() {
449 let pred = delay[0] * 2 - delay[1];
450 last = *el + ((pred * weight) >> 9);
451 if (*el ^ pred) > 0 {
460 fn filter_normal(dst: &mut [i32], start: usize, shift: u8) {
461 const COEFFS_A_NORMAL: [i32; 3] = [64, 115, 64];
462 const COEFFS_B_NORMAL: [i32; 2] = [740, 0];
465 let mut coeffs_a = COEFFS_A_NORMAL;
466 let mut coeffs_b = COEFFS_B_NORMAL;
467 let mut filter_a = 0;
468 let mut filter_b = 0;
469 let mut delay_a = [0; 3];
470 let mut delay_b = [0; 2];
472 for (i, el) in dst.iter_mut().enumerate() {
473 delay_a[2] = delay_a[1]; delay_a[1] = delay_a[0]; delay_a[0] = last;
474 delay_b[1] = delay_b[0]; delay_b[0] = filter_b;
476 let val = *el + filter_a;
483 let a0 = delay_a[0] + (delay_a[2] - delay_a[1]) * 8;
484 let a1 = (delay_a[0] - delay_a[1]) * 2;
486 let b0 = delay_b[0] * 2 - delay_b[1];
489 let pred_a = a0 * coeffs_a[0] + a1 * coeffs_a[1] + a2 * coeffs_a[2];
490 let pred_b = b0 * coeffs_b[0] - b1 * coeffs_b[1];
492 let sign = val2sign(*el);
493 coeffs_a[0] += (((a0 >> 30) & 2) - 1) * sign;
494 coeffs_a[1] += (((a1 >> 28) & 8) - 4) * sign;
495 coeffs_a[2] += (((a2 >> 28) & 8) - 4) * sign;
496 last = *el + (pred_a >> 11);
498 let sign = val2sign(last);
499 coeffs_b[0] += (((b0 >> 29) & 4) - 2) * sign;
500 coeffs_b[1] -= (((b1 >> 30) & 2) - 1) * sign;
502 filter_b = last + (pred_b >> shift);
503 filter_a = filter_b + ((filter_a * 31) >> 5);
508 fn filter_high(dst: &mut [i32], order: usize, shift: u8) {
509 let mut coeffs = [0i32; 256];
510 let mut delay = [0i32; 256];
511 if dst.len() <= order {
514 delay[..order].copy_from_slice(&dst[..order]);
515 for el in dst[order..].iter_mut() {
516 let sign = val2sign(*el);
519 sum += delay[i] * coeffs[i];
520 coeffs[i] -= (((delay[i] >> 30) & 2) - 1) * sign;
523 for i in 0..order-1 {
524 delay[i] = delay[i + 1];
526 delay[order - 1] = *el;
529 fn filter_extra_high(dst: &mut [i32]) {
530 let mut coeffs = [0i32; 8];
531 let mut delay = [0i32; 8];
532 for el in dst[256..].iter_mut() {
533 let sign = val2sign(*el);
536 sum += delay[i] * coeffs[i];
537 coeffs[i] -= (((delay[i] >> 30) & 2) - 1) * sign;
539 for i in (0..7).rev() {
540 delay[i + 1] = delay[i];