2ab955477265156fa34a419589afd2053aae05ae
[nihav.git] / nihav-core / src / dsp / fft.rs
1 use std::f32::{self, consts};
2 use std::ops::{Not, Neg, Add, AddAssign, Sub, SubAssign, Mul, MulAssign};
3 use std::fmt;
4
5 #[repr(C)]
6 #[derive(Debug,Clone,Copy,PartialEq)]
7 pub struct FFTComplex {
8 pub re: f32,
9 pub im: f32,
10 }
11
12 impl FFTComplex {
13 pub fn exp(val: f32) -> Self {
14 FFTComplex { re: val.cos(), im: val.sin() }
15 }
16 pub fn rotate(self) -> Self {
17 FFTComplex { re: -self.im, im: self.re }
18 }
19 pub fn scale(self, scale: f32) -> Self {
20 FFTComplex { re: self.re * scale, im: self.im * scale }
21 }
22 }
23
24 impl Neg for FFTComplex {
25 type Output = FFTComplex;
26 fn neg(self) -> Self::Output {
27 FFTComplex { re: -self.re, im: -self.im }
28 }
29 }
30
31 impl Not for FFTComplex {
32 type Output = FFTComplex;
33 fn not(self) -> Self::Output {
34 FFTComplex { re: self.re, im: -self.im }
35 }
36 }
37
38 impl Add for FFTComplex {
39 type Output = FFTComplex;
40 fn add(self, other: Self) -> Self::Output {
41 FFTComplex { re: self.re + other.re, im: self.im + other.im }
42 }
43 }
44
45 impl AddAssign for FFTComplex {
46 fn add_assign(&mut self, other: Self) {
47 self.re += other.re;
48 self.im += other.im;
49 }
50 }
51
52 impl Sub for FFTComplex {
53 type Output = FFTComplex;
54 fn sub(self, other: Self) -> Self::Output {
55 FFTComplex { re: self.re - other.re, im: self.im - other.im }
56 }
57 }
58
59 impl SubAssign for FFTComplex {
60 fn sub_assign(&mut self, other: Self) {
61 self.re -= other.re;
62 self.im -= other.im;
63 }
64 }
65
66 impl Mul for FFTComplex {
67 type Output = FFTComplex;
68 fn mul(self, other: Self) -> Self::Output {
69 FFTComplex { re: self.re * other.re - self.im * other.im,
70 im: self.im * other.re + self.re * other.im }
71 }
72 }
73
74 impl MulAssign for FFTComplex {
75 fn mul_assign(&mut self, other: Self) {
76 let re = self.re * other.re - self.im * other.im;
77 let im = self.im * other.re + self.re * other.im;
78 self.re = re;
79 self.im = im;
80 }
81 }
82
83 impl fmt::Display for FFTComplex {
84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85 write!(f, "({}, {})", self.re, self.im)
86 }
87 }
88
89 pub const FFTC_ZERO: FFTComplex = FFTComplex { re: 0.0, im: 0.0 };
90
91 #[derive(Debug,Clone,Copy,PartialEq)]
92 pub enum FFTMode {
93 Matrix,
94 CooleyTukey,
95 SplitRadix,
96 }
97
98 pub struct FFT {
99 table: Vec<FFTComplex>,
100 perms: Vec<usize>,
101 swaps: Vec<usize>,
102 bits: u32,
103 mode: FFTMode,
104 }
105
106 impl FFT {
107 fn do_fft_inplace_ct(&mut self, data: &mut [FFTComplex], bits: u32, forward: bool) {
108 if bits == 0 { return; }
109 if bits == 1 {
110 let sum01 = data[0] + data[1];
111 let dif01 = data[0] - data[1];
112 data[0] = sum01;
113 data[1] = dif01;
114 return;
115 }
116 if bits == 2 {
117 let sum01 = data[0] + data[1];
118 let dif01 = data[0] - data[1];
119 let sum23 = data[2] + data[3];
120 let dif23 = data[2] - data[3];
121 if forward {
122 data[0] = sum01 + sum23;
123 data[1] = dif01 - dif23.rotate();
124 data[2] = sum01 - sum23;
125 data[3] = dif01 + dif23.rotate();
126 } else {
127 data[0] = sum01 + sum23;
128 data[1] = dif01 + dif23.rotate();
129 data[2] = sum01 - sum23;
130 data[3] = dif01 - dif23.rotate();
131 }
132 return;
133 }
134
135 let hsize = (1 << (bits - 1)) as usize;
136 self.do_fft_inplace_ct(&mut data[0..hsize], bits - 1, forward);
137 self.do_fft_inplace_ct(&mut data[hsize..], bits - 1, forward);
138 let offs = hsize;
139 {
140 let e = data[0];
141 let o = data[hsize];
142 data[0] = e + o;
143 data[hsize] = e - o;
144 }
145 if forward {
146 for k in 1..hsize {
147 let e = data[k];
148 let o = data[k + hsize] * self.table[offs + k];
149 data[k] = e + o;
150 data[k + hsize] = e - o;
151 }
152 } else {
153 for k in 1..hsize {
154 let e = data[k];
155 let o = data[k + hsize] * !self.table[offs + k];
156 data[k] = e + o;
157 data[k + hsize] = e - o;
158 }
159 }
160 }
161
162 fn do_fft_inplace_splitradix(&mut self, data: &mut [FFTComplex], bits: u32, forward: bool) {
163 if bits == 0 { return; }
164 if bits == 1 {
165 let sum01 = data[0] + data[1];
166 let dif01 = data[0] - data[1];
167 data[0] = sum01;
168 data[1] = dif01;
169 return;
170 }
171 if bits == 2 {
172 let sum01 = data[0] + data[2];
173 let dif01 = data[0] - data[2];
174 let sum23 = data[1] + data[3];
175 let dif23 = data[1] - data[3];
176 if forward {
177 data[0] = sum01 + sum23;
178 data[1] = dif01 - dif23.rotate();
179 data[2] = sum01 - sum23;
180 data[3] = dif01 + dif23.rotate();
181 } else {
182 data[0] = sum01 + sum23;
183 data[1] = dif01 + dif23.rotate();
184 data[2] = sum01 - sum23;
185 data[3] = dif01 - dif23.rotate();
186 }
187 return;
188 }
189 let qsize = (1 << (bits - 2)) as usize;
190 let hsize = (1 << (bits - 1)) as usize;
191 let q3size = qsize + hsize;
192
193 self.do_fft_inplace_splitradix(&mut data[0 ..hsize], bits - 1, forward);
194 self.do_fft_inplace_splitradix(&mut data[hsize ..q3size], bits - 2, forward);
195 self.do_fft_inplace_splitradix(&mut data[q3size..], bits - 2, forward);
196 let off = hsize;
197 if forward {
198 {
199 let t3 = data[0 + hsize] + data[0 + q3size];
200 let t4 = (data[0 + hsize] - data[0 + q3size]).rotate();
201 let e1 = data[0];
202 let e2 = data[0 + qsize];
203 data[0] = e1 + t3;
204 data[0 + qsize] = e2 - t4;
205 data[0 + hsize] = e1 - t3;
206 data[0 + q3size] = e2 + t4;
207 }
208 for k in 1..qsize {
209 let t1 = self.table[off + k * 2 + 0] * data[k + hsize];
210 let t2 = self.table[off + k * 2 + 1] * data[k + q3size];
211 let t3 = t1 + t2;
212 let t4 = (t1 - t2).rotate();
213 let e1 = data[k];
214 let e2 = data[k + qsize];
215 data[k] = e1 + t3;
216 data[k + qsize] = e2 - t4;
217 data[k + hsize] = e1 - t3;
218 data[k + qsize * 3] = e2 + t4;
219 }
220 } else {
221 {
222 let t3 = data[0 + hsize] + data[0 + q3size];
223 let t4 = (data[0 + hsize] - data[0 + q3size]).rotate();
224 let e1 = data[0];
225 let e2 = data[0 + qsize];
226 data[0] = e1 + t3;
227 data[0 + qsize] = e2 + t4;
228 data[0 + hsize] = e1 - t3;
229 data[0 + q3size] = e2 - t4;
230 }
231 for k in 1..qsize {
232 let t1 = !self.table[off + k * 2 + 0] * data[k + hsize];
233 let t2 = !self.table[off + k * 2 + 1] * data[k + q3size];
234 let t3 = t1 + t2;
235 let t4 = (t1 - t2).rotate();
236 let e1 = data[k];
237 let e2 = data[k + qsize];
238 data[k] = e1 + t3;
239 data[k + qsize] = e2 + t4;
240 data[k + hsize] = e1 - t3;
241 data[k + qsize * 3] = e2 - t4;
242 }
243 }
244 }
245
246 pub fn do_fft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex], forward: bool) {
247 match self.mode {
248 FFTMode::Matrix => {
249 let base = if forward { -consts::PI * 2.0 / (src.len() as f32) }
250 else { consts::PI * 2.0 / (src.len() as f32) };
251 for k in 0..src.len() {
252 let mut sum = FFTC_ZERO;
253 for n in 0..src.len() {
254 let w = FFTComplex::exp(base * ((n * k) as f32));
255 sum += src[n] * w;
256 }
257 dst[k] = sum;
258 }
259 },
260 FFTMode::CooleyTukey => {
261 let bits = self.bits;
262 for k in 0..src.len() { dst[k] = src[self.perms[k]]; }
263 self.do_fft_inplace_ct(dst, bits, forward);
264 },
265 FFTMode::SplitRadix => {
266 let bits = self.bits;
267 for k in 0..src.len() { dst[k] = src[self.perms[k]]; }
268 self.do_fft_inplace_splitradix(dst, bits, forward);
269 },
270 };
271 }
272
273 pub fn do_fft_inplace(&mut self, data: &mut [FFTComplex], forward: bool) {
274 for idx in 0..self.swaps.len() {
275 let nidx = self.swaps[idx];
276 if idx != nidx {
277 let t = data[nidx];
278 data[nidx] = data[idx];
279 data[idx] = t;
280 }
281 }
282 match self.mode {
283 FFTMode::Matrix => {
284 let size = (1 << self.bits) as usize;
285 let base = if forward { -consts::PI * 2.0 / (size as f32) }
286 else { consts::PI * 2.0 / (size as f32) };
287 let mut res: Vec<FFTComplex> = Vec::with_capacity(size);
288 for k in 0..size {
289 let mut sum = FFTC_ZERO;
290 for n in 0..size {
291 let w = FFTComplex::exp(base * ((n * k) as f32));
292 sum += data[n] * w;
293 }
294 res.push(sum);
295 }
296 for k in 0..size {
297 data[k] = res[k];
298 }
299 },
300 FFTMode::CooleyTukey => {
301 let bits = self.bits;
302 self.do_fft_inplace_ct(data, bits, forward);
303 },
304 FFTMode::SplitRadix => {
305 let bits = self.bits;
306 self.do_fft_inplace_splitradix(data, bits, forward);
307 },
308 };
309 }
310 }
311
312 pub struct FFTBuilder {
313 }
314
315 fn reverse_bits(inval: u32) -> u32 {
316 const REV_TAB: [u8; 16] = [
317 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110,
318 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111,
319 ];
320
321 let mut ret = 0;
322 let mut val = inval;
323 for _ in 0..8 {
324 ret = (ret << 4) | (REV_TAB[(val & 0xF) as usize] as u32);
325 val = val >> 4;
326 }
327 ret
328 }
329
330 fn swp_idx(idx: usize, bits: u32) -> usize {
331 let s = reverse_bits(idx as u32) as usize;
332 s >> (32 - bits)
333 }
334
335 fn gen_sr_perms(swaps: &mut [usize], size: usize) {
336 if size <= 4 { return; }
337 let mut evec: Vec<usize> = Vec::with_capacity(size / 2);
338 let mut ovec1: Vec<usize> = Vec::with_capacity(size / 4);
339 let mut ovec2: Vec<usize> = Vec::with_capacity(size / 4);
340 for k in 0..size/4 {
341 evec.push (swaps[k * 4 + 0]);
342 ovec1.push(swaps[k * 4 + 1]);
343 evec.push (swaps[k * 4 + 2]);
344 ovec2.push(swaps[k * 4 + 3]);
345 }
346 for k in 0..size/2 { swaps[k] = evec[k]; }
347 for k in 0..size/4 { swaps[k + size/2] = ovec1[k]; }
348 for k in 0..size/4 { swaps[k + 3*size/4] = ovec2[k]; }
349 gen_sr_perms(&mut swaps[0..size/2], size/2);
350 gen_sr_perms(&mut swaps[size/2..3*size/4], size/4);
351 gen_sr_perms(&mut swaps[3*size/4..], size/4);
352 }
353
354 fn gen_swaps_for_perm(swaps: &mut Vec<usize>, perms: &Vec<usize>) {
355 let mut idx_arr: Vec<usize> = Vec::with_capacity(perms.len());
356 for i in 0..perms.len() { idx_arr.push(i); }
357 let mut run_size = 0;
358 let mut run_pos = 0;
359 for idx in 0..perms.len() {
360 if perms[idx] == idx_arr[idx] {
361 if run_size == 0 { run_pos = idx; }
362 run_size += 1;
363 } else {
364 for i in 0..run_size {
365 swaps.push(run_pos + i);
366 }
367 run_size = 0;
368 let mut spos = idx + 1;
369 while idx_arr[spos] != perms[idx] { spos += 1; }
370 idx_arr[spos] = idx_arr[idx];
371 idx_arr[idx] = perms[idx];
372 swaps.push(spos);
373 }
374 }
375 }
376
377 impl FFTBuilder {
378 pub fn new_fft(mode: FFTMode, size: usize) -> FFT {
379 let mut swaps: Vec<usize>;
380 let mut perms: Vec<usize>;
381 let mut table: Vec<FFTComplex>;
382 let bits = 31 - (size as u32).leading_zeros();
383 match mode {
384 FFTMode::Matrix => {
385 swaps = Vec::new();
386 perms = Vec::new();
387 table = Vec::new();
388 },
389 FFTMode::CooleyTukey => {
390 perms = Vec::with_capacity(size);
391 for i in 0..size {
392 perms.push(swp_idx(i, bits));
393 }
394 swaps = Vec::with_capacity(size);
395 table = Vec::with_capacity(size);
396 for _ in 0..4 { table.push(FFTC_ZERO); }
397 for b in 3..(bits+1) {
398 let hsize = (1 << (b - 1)) as usize;
399 let base = -consts::PI / (hsize as f32);
400 for k in 0..hsize {
401 table.push(FFTComplex::exp(base * (k as f32)));
402 }
403 }
404 },
405 FFTMode::SplitRadix => {
406 perms = Vec::with_capacity(size);
407 for i in 0..size {
408 perms.push(i);
409 }
410 gen_sr_perms(perms.as_mut_slice(), 1 << bits);
411 swaps = Vec::with_capacity(size);
412 table = Vec::with_capacity(size);
413 for _ in 0..4 { table.push(FFTC_ZERO); }
414 for b in 3..(bits+1) {
415 let qsize = (1 << (b - 2)) as usize;
416 let base = -consts::PI / ((qsize * 2) as f32);
417 for k in 0..qsize {
418 table.push(FFTComplex::exp(base * ((k * 1) as f32)));
419 table.push(FFTComplex::exp(base * ((k * 3) as f32)));
420 }
421 }
422 },
423 };
424 gen_swaps_for_perm(&mut swaps, &perms);
425 FFT { mode: mode, swaps: swaps, perms: perms, bits: bits, table: table }
426 }
427 }
428
429 pub struct RDFT {
430 table: Vec<FFTComplex>,
431 fft: FFT,
432 fwd: bool,
433 size: usize,
434 fwd_fft: bool,
435 }
436
437 fn crossadd(a: &FFTComplex, b: &FFTComplex) -> FFTComplex {
438 FFTComplex { re: a.re + b.re, im: a.im - b.im }
439 }
440
441 impl RDFT {
442 pub fn do_rdft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) {
443 dst.copy_from_slice(src);
444 self.do_rdft_inplace(dst);
445 }
446 pub fn do_rdft_inplace(&mut self, buf: &mut [FFTComplex]) {
447 if !self.fwd {
448 for n in 0..self.size/2 {
449 let in0 = buf[n + 1];
450 let in1 = buf[self.size - n - 1];
451
452 let t0 = crossadd(&in0, &in1);
453 let t1 = FFTComplex { re: in1.im + in0.im, im: in1.re - in0.re };
454 let tab = self.table[n];
455 let t2 = FFTComplex { re: t1.im * tab.im + t1.re * tab.re, im: t1.im * tab.re - t1.re * tab.im };
456
457 buf[n + 1] = FFTComplex { re: t0.im - t2.im, im: t0.re - t2.re }; // (t0 - t2).conj().rotate()
458 buf[self.size - n - 1] = (t0 + t2).rotate();
459 }
460 let a = buf[0].re;
461 let b = buf[0].im;
462 buf[0].re = a - b;
463 buf[0].im = a + b;
464 }
465 self.fft.do_fft_inplace(buf, self.fwd_fft);
466 if self.fwd {
467 for n in 0..self.size/2 {
468 let in0 = buf[n + 1];
469 let in1 = buf[self.size - n - 1];
470
471 let t0 = crossadd(&in0, &in1).scale(0.5);
472 let t1 = FFTComplex { re: in0.im + in1.im, im: in0.re - in1.re };
473 let t2 = t1 * self.table[n];
474
475 buf[n + 1] = crossadd(&t0, &t2);
476 buf[self.size - n - 1] = FFTComplex { re: t0.re - t2.re, im: -(t0.im + t2.im) };
477 }
478 let a = buf[0].re;
479 let b = buf[0].im;
480 buf[0].re = a + b;
481 buf[0].im = a - b;
482 } else {
483 for n in 0..self.size {
484 buf[n] = FFTComplex{ re: buf[n].im, im: buf[n].re };
485 }
486 }
487 }
488 }
489
490 pub struct RDFTBuilder {
491 }
492
493 impl RDFTBuilder {
494 pub fn new_rdft(mode: FFTMode, size: usize, forward: bool, forward_fft: bool) -> RDFT {
495 let mut table: Vec<FFTComplex> = Vec::with_capacity(size / 4);
496 let (base, scale) = if forward { (consts::PI / (size as f32), 0.5) } else { (-consts::PI / (size as f32), 1.0) };
497 for i in 0..size/2 {
498 table.push(FFTComplex::exp(base * ((i + 1) as f32)).scale(scale));
499 }
500 let fft = FFTBuilder::new_fft(mode, size);
501 RDFT { table, fft, size, fwd: forward, fwd_fft: forward_fft }
502 }
503 }
504
505
506 #[cfg(test)]
507 mod test {
508 use super::*;
509
510 #[test]
511 fn test_fft() {
512 let mut fin: [FFTComplex; 128] = [FFTC_ZERO; 128];
513 let mut fout1: [FFTComplex; 128] = [FFTC_ZERO; 128];
514 let mut fout2: [FFTComplex; 128] = [FFTC_ZERO; 128];
515 let mut fout3: [FFTComplex; 128] = [FFTC_ZERO; 128];
516 let mut fft1 = FFTBuilder::new_fft(FFTMode::Matrix, fin.len());
517 let mut fft2 = FFTBuilder::new_fft(FFTMode::CooleyTukey, fin.len());
518 let mut fft3 = FFTBuilder::new_fft(FFTMode::SplitRadix, fin.len());
519 let mut seed: u32 = 42;
520 for i in 0..fin.len() {
521 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
522 let val = (seed >> 16) as i16;
523 fin[i].re = (val as f32) / 256.0;
524 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
525 let val = (seed >> 16) as i16;
526 fin[i].im = (val as f32) / 256.0;
527 }
528 fft1.do_fft(&fin, &mut fout1, true);
529 fft2.do_fft(&fin, &mut fout2, true);
530 fft3.do_fft(&fin, &mut fout3, true);
531
532 for i in 0..fin.len() {
533 assert!((fout1[i].re - fout2[i].re).abs() < 1.0);
534 assert!((fout1[i].im - fout2[i].im).abs() < 1.0);
535 assert!((fout1[i].re - fout3[i].re).abs() < 1.0);
536 assert!((fout1[i].im - fout3[i].im).abs() < 1.0);
537 }
538 fft1.do_fft_inplace(&mut fout1, false);
539 fft2.do_fft_inplace(&mut fout2, false);
540 fft3.do_fft_inplace(&mut fout3, false);
541
542 let sc = 1.0 / (fin.len() as f32);
543 for i in 0..fin.len() {
544 assert!((fin[i].re - fout1[i].re * sc).abs() < 1.0);
545 assert!((fin[i].im - fout1[i].im * sc).abs() < 1.0);
546 assert!((fout1[i].re - fout2[i].re).abs() < 1.0);
547 assert!((fout1[i].im - fout2[i].im).abs() < 1.0);
548 assert!((fout1[i].re - fout3[i].re).abs() < 1.0);
549 assert!((fout1[i].im - fout3[i].im).abs() < 1.0);
550 }
551 }
552
553 #[test]
554 fn test_rdft() {
555 let mut fin: [FFTComplex; 128] = [FFTC_ZERO; 128];
556 let mut fout1: [FFTComplex; 128] = [FFTC_ZERO; 128];
557 let mut rdft = RDFTBuilder::new_rdft(FFTMode::SplitRadix, fin.len(), true, true);
558 let mut seed: u32 = 42;
559 for i in 0..fin.len() {
560 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
561 let val = (seed >> 16) as i16;
562 fin[i].re = (val as f32) / 256.0;
563 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
564 let val = (seed >> 16) as i16;
565 fin[i].im = (val as f32) / 256.0;
566 }
567 rdft.do_rdft(&fin, &mut fout1);
568 let mut irdft = RDFTBuilder::new_rdft(FFTMode::SplitRadix, fin.len(), false, true);
569 irdft.do_rdft_inplace(&mut fout1);
570
571 for i in 0..fin.len() {
572 let tst = fout1[i].scale(0.5/(fout1.len() as f32));
573 assert!((tst.re - fin[i].re).abs() < 1.0);
574 assert!((tst.im - fin[i].im).abs() < 1.0);
575 }
576 }
577 }