switch to better FFT interface and more flexible FFT implementation
[nihav.git] / nihav-core / src / dsp / fft.rs
CommitLineData
3f3b5b23
KS
1use std::f32::{self, consts};
2use std::ops::{Not, Neg, Add, AddAssign, Sub, SubAssign, Mul, MulAssign};
3use std::fmt;
4
b01971f2 5#[repr(C)]
3f3b5b23
KS
6#[derive(Debug,Clone,Copy,PartialEq)]
7pub struct FFTComplex {
8 pub re: f32,
9 pub im: f32,
10}
11
12impl FFTComplex {
13 pub fn exp(val: f32) -> Self {
14 FFTComplex { re: val.cos(), im: val.sin() }
15 }
16 pub fn rotate(self) -> Self {
17 FFTComplex { re: -self.im, im: self.re }
18 }
19 pub fn scale(self, scale: f32) -> Self {
20 FFTComplex { re: self.re * scale, im: self.im * scale }
21 }
22}
23
24impl Neg for FFTComplex {
25 type Output = FFTComplex;
26 fn neg(self) -> Self::Output {
27 FFTComplex { re: -self.re, im: -self.im }
28 }
29}
30
31impl Not for FFTComplex {
32 type Output = FFTComplex;
33 fn not(self) -> Self::Output {
34 FFTComplex { re: self.re, im: -self.im }
35 }
36}
37
38impl Add for FFTComplex {
39 type Output = FFTComplex;
40 fn add(self, other: Self) -> Self::Output {
41 FFTComplex { re: self.re + other.re, im: self.im + other.im }
42 }
43}
44
45impl AddAssign for FFTComplex {
46 fn add_assign(&mut self, other: Self) {
47 self.re += other.re;
48 self.im += other.im;
49 }
50}
51
52impl Sub for FFTComplex {
53 type Output = FFTComplex;
54 fn sub(self, other: Self) -> Self::Output {
55 FFTComplex { re: self.re - other.re, im: self.im - other.im }
56 }
57}
58
59impl SubAssign for FFTComplex {
60 fn sub_assign(&mut self, other: Self) {
61 self.re -= other.re;
62 self.im -= other.im;
63 }
64}
65
66impl Mul for FFTComplex {
67 type Output = FFTComplex;
68 fn mul(self, other: Self) -> Self::Output {
69 FFTComplex { re: self.re * other.re - self.im * other.im,
70 im: self.im * other.re + self.re * other.im }
71 }
72}
73
74impl MulAssign for FFTComplex {
75 fn mul_assign(&mut self, other: Self) {
76 let re = self.re * other.re - self.im * other.im;
77 let im = self.im * other.re + self.re * other.im;
78 self.re = re;
79 self.im = im;
80 }
81}
82
83impl fmt::Display for FFTComplex {
84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85 write!(f, "({}, {})", self.re, self.im)
86 }
87}
88
89pub const FFTC_ZERO: FFTComplex = FFTComplex { re: 0.0, im: 0.0 };
90
9e78289c
KS
91pub fn generic_fft(data: &mut [FFTComplex], forward: bool) {
92 let mut tmp = Vec::with_capacity(data.len());
93 tmp.resize(data.len(), FFTC_ZERO);
94 let base = if forward { -consts::PI * 2.0 / (data.len() as f32) }
95 else { consts::PI * 2.0 / (data.len() as f32) };
96 for k in 0..data.len() {
97 let mut sum = FFTC_ZERO;
98 for n in 0..data.len() {
99 let w = FFTComplex::exp(base * ((n * k) as f32));
100 sum += data[n] * w;
101 }
102 tmp[k] = sum;
103 }
104 for k in 0..data.len() {
105 data[k] = tmp[k];
106 }
3f3b5b23
KS
107}
108
9e78289c
KS
109struct FFTData {
110 table: Vec<FFTComplex>,
111 tmp: Vec<FFTComplex>,
112 twiddle: Vec<FFTComplex>,
113 size: usize,
114 step: usize,
115 div: usize,
3f3b5b23
KS
116}
117
9e78289c
KS
118struct FFTGeneric {}
119
120const FFT3_CONST: f32 = 0.86602540378443864677;
121const FFT5_CONST1: FFTComplex = FFTComplex { re: 0.80901699437494742410, im: 0.58778525229247312915 };
122const FFT5_CONST2: FFTComplex = FFTComplex { re: 0.30901699437494742411, im: 0.95105651629515357211 };
123
124fn twiddle5(a: FFTComplex, b: FFTComplex, c: FFTComplex) -> (FFTComplex, FFTComplex) {
125 let re = a.re * c.re;
126 let im = a.im * c.re;
127 let diffre = b.im * c.im;
128 let diffim = b.re * c.im;
129
130 (FFTComplex { re: re - diffre, im: im + diffim }, FFTComplex { re: re + diffre, im: im - diffim })
131}
132
133impl FFTGeneric {
134 fn new_data(size: usize, forward: bool) -> FFTData {
135 let mut table: Vec<FFTComplex> = Vec::with_capacity(size * size);
136 table.resize(size * size, FFTC_ZERO);
137 let base = consts::PI * 2.0 / (size as f32);
138 if forward {
139 for n in 0..size {
140 for k in 0..size {
141 table[n * size + k] = FFTComplex::exp(-base * ((n * k) as f32));
142 }
143 }
144 } else {
145 for n in 0..size {
146 for k in 0..size {
147 table[n * size + k] = FFTComplex::exp( base * ((n * k) as f32));
148 }
149 }
150 }
151 let mut tmp = Vec::with_capacity(size);
152 tmp.resize(size, FFTC_ZERO);
153 FFTData { table, tmp, twiddle: Vec::new(), size, step: 0, div: 0 }
154 }
155 fn fft(tbl: &mut FFTData, size: usize, data: &mut [FFTComplex], step: usize) {
156 if size == 3 {
157 let s0 = data[step * 0];
158 let s1 = data[step * 1];
159 let s2 = data[step * 2];
160 let t0 = s1 + s2;
161 data[step * 0] += t0;
162 let t1 = s0 - t0.scale(0.5);
163 let t2 = (s2 - s1).rotate().scale(FFT3_CONST);
164 data[step * 1] = t1 + t2;
165 data[step * 2] = t1 - t2;
166 return;
167 }
168 if size == 5 {
169 let s0 = data[step * 0];
170 let s1 = data[step * 1];
171 let s2 = data[step * 2];
172 let s3 = data[step * 3];
173 let s4 = data[step * 4];
174
175 let t0 = s1 + s4;
176 let t1 = s1 - s4;
177 let t2 = s2 + s3;
178 let t3 = s2 - s3;
179 let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2);
180 let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1);
181 let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1);
182 let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2);
183
184 data[step * 0] = s0 + t0 + t2;
185 data[step * 1] = s0 + t5 - t6;
186 data[step * 2] = s0 - t8 + ta;
187 data[step * 3] = s0 - t9 + tb;
188 data[step * 4] = s0 + t4 - t7;
189 return;
190 }
191 for k in 0..tbl.size {
192 tbl.tmp[k] = FFTC_ZERO;
193 for n in 0..tbl.size {
194 tbl.tmp[k] += data[n * step] * tbl.table[k * tbl.size + n];
195 }
196 }
197 for n in 0..tbl.size {
198 data[n * step] = tbl.tmp[n];
199 }
200 }
201 fn ifft(tbl: &mut FFTData, size: usize, data: &mut [FFTComplex], step: usize) {
202 if size == 3 {
203 let s0 = data[step * 0];
204 let s1 = data[step * 1];
205 let s2 = data[step * 2];
206 let t0 = s1 + s2;
207 data[step * 0] += t0;
208 let t1 = s0 - t0.scale(0.5);
209 let t2 = (s2 - s1).rotate().scale(FFT3_CONST);
210 data[step * 1] = t1 - t2;
211 data[step * 2] = t1 + t2;
212 return;
213 }
214 if size == 5 {
215 let s0 = data[step * 0];
216 let s1 = data[step * 1];
217 let s2 = data[step * 2];
218 let s3 = data[step * 3];
219 let s4 = data[step * 4];
220
221 let t0 = s1 + s4;
222 let t1 = s1 - s4;
223 let t2 = s2 + s3;
224 let t3 = s2 - s3;
225 let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2);
226 let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1);
227 let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1);
228 let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2);
229
230 data[step * 0] = s0 + t0 + t2;
231 data[step * 1] = s0 + t4 - t7;
232 data[step * 2] = s0 - t9 + tb;
233 data[step * 3] = s0 - t8 + ta;
234 data[step * 4] = s0 + t5 - t6;
235 return;
236 }
237 Self::fft(tbl, size, data, step);
238 }
239}
240
241struct FFTSplitRadix {}
242
243impl FFTSplitRadix {
244 fn new_data(bits: u8, _forward: bool) -> FFTData {
245 let size = 1 << bits;
246 let mut table = Vec::with_capacity(size);
247 for _ in 0..4 { table.push(FFTC_ZERO); }
248 for b in 3..(bits+1) {
249 let qsize = (1 << (b - 2)) as usize;
250 let base = -consts::PI / ((qsize * 2) as f32);
251 for k in 0..qsize {
252 table.push(FFTComplex::exp(base * ((k * 1) as f32)));
253 table.push(FFTComplex::exp(base * ((k * 3) as f32)));
254 }
255 }
256 FFTData { table, tmp: Vec::new(), twiddle: Vec::new(), size, step: 0, div: 0 }
257 }
258 fn fft(fftdata: &mut FFTData, bits: u8, data: &mut [FFTComplex]) {
3f3b5b23
KS
259 if bits == 0 { return; }
260 if bits == 1 {
261 let sum01 = data[0] + data[1];
262 let dif01 = data[0] - data[1];
263 data[0] = sum01;
264 data[1] = dif01;
265 return;
266 }
267 if bits == 2 {
9e78289c
KS
268 let sum01 = data[0] + data[2];
269 let dif01 = data[0] - data[2];
270 let sum23 = data[1] + data[3];
271 let dif23 = data[1] - data[3];
272 data[0] = sum01 + sum23;
273 data[1] = dif01 - dif23.rotate();
274 data[2] = sum01 - sum23;
275 data[3] = dif01 + dif23.rotate();
3f3b5b23
KS
276 return;
277 }
9e78289c 278 let qsize = (1 << (bits - 2)) as usize;
3f3b5b23 279 let hsize = (1 << (bits - 1)) as usize;
9e78289c
KS
280 let q3size = qsize + hsize;
281
282 Self::fft(fftdata, bits - 1, &mut data[0 ..hsize]);
283 Self::fft(fftdata, bits - 2, &mut data[hsize ..q3size]);
284 Self::fft(fftdata, bits - 2, &mut data[q3size..]);
285 let off = hsize;
3f3b5b23 286 {
9e78289c
KS
287 let t3 = data[0 + hsize] + data[0 + q3size];
288 let t4 = (data[0 + hsize] - data[0 + q3size]).rotate();
289 let e1 = data[0];
290 let e2 = data[0 + qsize];
291 data[0] = e1 + t3;
292 data[0 + qsize] = e2 - t4;
293 data[0 + hsize] = e1 - t3;
294 data[0 + q3size] = e2 + t4;
3f3b5b23 295 }
9e78289c
KS
296 for k in 1..qsize {
297 let t1 = fftdata.table[off + k * 2 + 0] * data[k + hsize];
298 let t2 = fftdata.table[off + k * 2 + 1] * data[k + q3size];
299 let t3 = t1 + t2;
300 let t4 = (t1 - t2).rotate();
301 let e1 = data[k];
302 let e2 = data[k + qsize];
303 data[k] = e1 + t3;
304 data[k + qsize] = e2 - t4;
305 data[k + hsize] = e1 - t3;
306 data[k + qsize * 3] = e2 + t4;
3f3b5b23
KS
307 }
308 }
9e78289c 309 fn ifft(fftdata: &mut FFTData, bits: u8, data: &mut [FFTComplex]) {
3f3b5b23
KS
310 if bits == 0 { return; }
311 if bits == 1 {
312 let sum01 = data[0] + data[1];
313 let dif01 = data[0] - data[1];
314 data[0] = sum01;
315 data[1] = dif01;
316 return;
317 }
318 if bits == 2 {
319 let sum01 = data[0] + data[2];
320 let dif01 = data[0] - data[2];
321 let sum23 = data[1] + data[3];
322 let dif23 = data[1] - data[3];
9e78289c
KS
323 data[0] = sum01 + sum23;
324 data[1] = dif01 + dif23.rotate();
325 data[2] = sum01 - sum23;
326 data[3] = dif01 - dif23.rotate();
3f3b5b23
KS
327 return;
328 }
329 let qsize = (1 << (bits - 2)) as usize;
330 let hsize = (1 << (bits - 1)) as usize;
331 let q3size = qsize + hsize;
332
9e78289c
KS
333 Self::ifft(fftdata, bits - 1, &mut data[0 ..hsize]);
334 Self::ifft(fftdata, bits - 2, &mut data[hsize ..q3size]);
335 Self::ifft(fftdata, bits - 2, &mut data[q3size..]);
3f3b5b23 336 let off = hsize;
9e78289c
KS
337 {
338 let t3 = data[0 + hsize] + data[0 + q3size];
339 let t4 = (data[0 + hsize] - data[0 + q3size]).rotate();
340 let e1 = data[0];
341 let e2 = data[0 + qsize];
342 data[0] = e1 + t3;
343 data[0 + qsize] = e2 + t4;
344 data[0 + hsize] = e1 - t3;
345 data[0 + q3size] = e2 - t4;
346 }
347 for k in 1..qsize {
348 let t1 = !fftdata.table[off + k * 2 + 0] * data[k + hsize];
349 let t2 = !fftdata.table[off + k * 2 + 1] * data[k + q3size];
350 let t3 = t1 + t2;
351 let t4 = (t1 - t2).rotate();
352 let e1 = data[k];
353 let e2 = data[k + qsize];
354 data[k] = e1 + t3;
355 data[k + qsize] = e2 + t4;
356 data[k + hsize] = e1 - t3;
357 data[k + qsize * 3] = e2 - t4;
3f3b5b23
KS
358 }
359 }
9e78289c
KS
360}
361
362struct FFT15 {}
3f3b5b23 363
9e78289c
KS
364const FFT15_INSWAP: [usize; 20] = [ 0, 5, 10, 42, 3, 8, 13, 42, 6, 11, 1, 42, 9, 14, 4, 42, 12, 2, 7, 42 ];
365const FFT15_OUTSWAP: [usize; 20] = [ 0, 10, 5, 42, 6, 1, 11, 42, 12, 7, 2, 42, 3, 13, 8, 42, 9, 4, 14, 42 ];
366
367impl FFT15 {
368 fn new_data(size: usize, _forward: bool) -> FFTData {
369 FFTData { table: Vec::new(), tmp: Vec::new(), twiddle: Vec::new(), size, step: 0, div: 0 }
370 }
371 fn fft3(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) {
372 let s0 = src[0];
373 let s1 = src[1];
374 let s2 = src[2];
375
376 let t0 = s1 + s2;
377 let t1 = s0 - t0.scale(0.5);
378 let t2 = (s2 - s1).rotate().scale(FFT3_CONST);
379
380 dst[FFT15_OUTSWAP[n * 4 + 0] * step] = s0 + t0;
381 dst[FFT15_OUTSWAP[n * 4 + 1] * step] = t1 + t2;
382 dst[FFT15_OUTSWAP[n * 4 + 2] * step] = t1 - t2;
383 }
384 fn ifft3(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) {
385 let s0 = src[0];
386 let s1 = src[1];
387 let s2 = src[2];
388
389 let t0 = s1 + s2;
390 let t1 = s0 - t0.scale(0.5);
391 let t2 = (s2 - s1).rotate().scale(FFT3_CONST);
392
393 dst[FFT15_OUTSWAP[n * 4 + 0] * step] = s0 + t0;
394 dst[FFT15_OUTSWAP[n * 4 + 1] * step] = t1 - t2;
395 dst[FFT15_OUTSWAP[n * 4 + 2] * step] = t1 + t2;
396 }
397 fn fft5(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) {
398 let s0 = src[FFT15_INSWAP[n + 0 * 4] * step];
399 let s1 = src[FFT15_INSWAP[n + 1 * 4] * step];
400 let s2 = src[FFT15_INSWAP[n + 2 * 4] * step];
401 let s3 = src[FFT15_INSWAP[n + 3 * 4] * step];
402 let s4 = src[FFT15_INSWAP[n + 4 * 4] * step];
403
404 let t0 = s1 + s4;
405 let t1 = s1 - s4;
406 let t2 = s2 + s3;
407 let t3 = s2 - s3;
408 let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2);
409 let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1);
410 let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1);
411 let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2);
412
413 dst[0 * 3] = s0 + t0 + t2;
414 dst[1 * 3] = s0 + t5 - t6;
415 dst[2 * 3] = s0 - t8 + ta;
416 dst[3 * 3] = s0 - t9 + tb;
417 dst[4 * 3] = s0 + t4 - t7;
418 }
419 fn ifft5(dst: &mut [FFTComplex], src: &[FFTComplex], step: usize, n: usize) {
420 let s0 = src[FFT15_INSWAP[n + 0 * 4] * step];
421 let s1 = src[FFT15_INSWAP[n + 1 * 4] * step];
422 let s2 = src[FFT15_INSWAP[n + 2 * 4] * step];
423 let s3 = src[FFT15_INSWAP[n + 3 * 4] * step];
424 let s4 = src[FFT15_INSWAP[n + 4 * 4] * step];
425
426 let t0 = s1 + s4;
427 let t1 = s1 - s4;
428 let t2 = s2 + s3;
429 let t3 = s2 - s3;
430 let (t4, t5) = twiddle5(t0, t1, FFT5_CONST2);
431 let (t6, t7) = twiddle5(t2, t3, FFT5_CONST1);
432 let (t8, t9) = twiddle5(t0, t1, FFT5_CONST1);
433 let (ta, tb) = twiddle5(t2, t3, FFT5_CONST2);
434
435 dst[0 * 3] = s0 + t0 + t2;
436 dst[1 * 3] = s0 + t4 - t7;
437 dst[2 * 3] = s0 - t9 + tb;
438 dst[3 * 3] = s0 - t8 + ta;
439 dst[4 * 3] = s0 + t5 - t6;
440 }
441 fn fft(_fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) {
442 let mut tmp = [FFTC_ZERO; 15];
443 for n in 0..3 {
444 Self::fft5(&mut tmp[n..], data, step, n);
445 }
446 for n in 0..5 {
447 Self::fft3(data, &tmp[n * 3..][..3], step, n);
448 }
449 }
450 fn ifft(_fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) {
451 let mut tmp = [FFTC_ZERO; 15];
452 for n in 0..3 {
453 Self::ifft5(&mut tmp[n..], data, step, n);
454 }
455 for n in 0..5 {
456 Self::ifft3(data, &tmp[n * 3..][..3], step, n);
457 }
458 }
459}
460
461
462enum FFTMode {
463 Generic(usize),
464 SplitRadix(u8),
465 Prime15,
466}
467
468impl FFTMode {
469 fn permute(&self, perms: &mut [usize]) {
470 match *self {
471 FFTMode::Generic(_) => {},
472 FFTMode::SplitRadix(bits) => {
473 let div = perms.len() >> bits;
474 gen_sr_perms(perms, 1 << bits);
475 if div > 1 {
476 for i in 0..(1 << bits) {
477 perms[i] *= div;
478 }
479 for i in 1..div {
480 for j in 0..(1 << bits) {
481 perms[(i << bits) + j] = perms[j] + i;
3f3b5b23 482 }
3f3b5b23 483 }
9e78289c
KS
484 }
485 },
486 FFTMode::Prime15 => {},
3f3b5b23
KS
487 };
488 }
9e78289c
KS
489 fn do_fft(&self, fftdata: &mut FFTData, data: &mut [FFTComplex]) {
490 match *self {
491 FFTMode::Generic(size) => FFTGeneric::fft(fftdata, size, data, 1),
492 FFTMode::SplitRadix(bits) => FFTSplitRadix::fft(fftdata, bits, data),
493 FFTMode::Prime15 => FFT15::fft(fftdata, data, 1),
494 };
495 }
496 fn do_fft2(&self, fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) {
497 match *self {
498 FFTMode::Generic(size) => FFTGeneric::fft(fftdata, size, data, step),
499 FFTMode::SplitRadix(_) => unreachable!(),
500 FFTMode::Prime15 => FFT15::fft(fftdata, data, step),
501 };
502 }
503 fn do_ifft(&self, fftdata: &mut FFTData, data: &mut [FFTComplex]) {
504 match *self {
505 FFTMode::Generic(size) => FFTGeneric::ifft(fftdata, size, data, 1),
506 FFTMode::SplitRadix(bits) => FFTSplitRadix::ifft(fftdata, bits, data),
507 FFTMode::Prime15 => FFT15::ifft(fftdata, data, 1),
508 };
509 }
510 fn do_ifft2(&self, fftdata: &mut FFTData, data: &mut [FFTComplex], step: usize) {
511 match *self {
512 FFTMode::Generic(size) => FFTGeneric::ifft(fftdata, size, data, step),
513 FFTMode::SplitRadix(_) => unreachable!(),
514 FFTMode::Prime15 => FFT15::ifft(fftdata, data, step),
515 };
516 }
517 fn get_size(&self) -> usize {
518 match *self {
519 FFTMode::Generic(size) => size,
520 FFTMode::SplitRadix(bits) => 1 << bits,
521 FFTMode::Prime15 => 15,
522 }
523 }
524}
3f3b5b23 525
9e78289c
KS
526pub struct FFT {
527 perms: Vec<usize>,
528 swaps: Vec<usize>,
529 ffts: Vec<(FFTMode, FFTData)>,
530}
531
532impl FFT {
533 pub fn do_fft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) {
534 for k in 0..src.len() { dst[k] = src[self.perms[k]]; }
535 self.do_fft_core(dst);
536 }
537 pub fn do_ifft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) {
538 for k in 0..src.len() { dst[k] = src[self.perms[k]]; }
539 self.do_ifft_core(dst);
540 }
541 pub fn do_fft_inplace(&mut self, data: &mut [FFTComplex]) {
3f3b5b23
KS
542 for idx in 0..self.swaps.len() {
543 let nidx = self.swaps[idx];
544 if idx != nidx {
545 let t = data[nidx];
546 data[nidx] = data[idx];
547 data[idx] = t;
548 }
549 }
9e78289c
KS
550 self.do_fft_core(data);
551 }
552 pub fn do_ifft_inplace(&mut self, data: &mut [FFTComplex]) {
553 for idx in 0..self.swaps.len() {
554 let nidx = self.swaps[idx];
555 if idx != nidx {
556 let t = data[nidx];
557 data[nidx] = data[idx];
558 data[idx] = t;
559 }
560 }
561 self.do_ifft_core(data);
562 }
563 fn do_fft_core(&mut self, data: &mut [FFTComplex]) {
564 for el in self.ffts.iter_mut() {
565 let (mode, ref mut fftdata) = el;
566 let bsize = mode.get_size();
567 let div = fftdata.div;
568 let step = fftdata.step;
569 if step == 1 {
570 mode.do_fft(fftdata, data);
571 for i in 1..div {
572 mode.do_fft(fftdata, &mut data[i * bsize..]);
573 }
574 } else {
575 mode.do_fft2(fftdata, data, div);
576 let mut toff = bsize;
577 for i in 1..div {
578 for j in 1..bsize {
579 data[i + j * div] *= fftdata.twiddle[toff + j];
3f3b5b23 580 }
9e78289c
KS
581 mode.do_fft2(fftdata, &mut data[i..], div);
582 toff += bsize;
583 }
584 }
585 }
586 }
587 fn do_ifft_core(&mut self, data: &mut [FFTComplex]) {
588 for el in self.ffts.iter_mut() {
589 let (mode, ref mut fftdata) = el;
590 let bsize = mode.get_size();
591 let div = fftdata.div;
592 let step = fftdata.step;
593 if step == 1 {
594 mode.do_ifft(fftdata, data);
595 for i in 1..div {
596 mode.do_ifft(fftdata, &mut data[i * bsize..]);
597 }
598 } else {
599 mode.do_ifft2(fftdata, data, div);
600 let mut toff = bsize;
601 for i in 1..div {
602 for j in 1..bsize {
603 data[i + j * div] *= fftdata.twiddle[toff + j];
3f3b5b23 604 }
9e78289c
KS
605 mode.do_ifft2(fftdata, &mut data[i..], div);
606 toff += bsize;
607 }
608 }
609 }
3f3b5b23
KS
610 }
611}
612
613pub struct FFTBuilder {
614}
615
9e78289c 616/*fn reverse_bits(inval: u32) -> u32 {
3f3b5b23
KS
617 const REV_TAB: [u8; 16] = [
618 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110,
619 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111,
620 ];
621
622 let mut ret = 0;
623 let mut val = inval;
624 for _ in 0..8 {
625 ret = (ret << 4) | (REV_TAB[(val & 0xF) as usize] as u32);
626 val = val >> 4;
627 }
628 ret
629}
630
631fn swp_idx(idx: usize, bits: u32) -> usize {
632 let s = reverse_bits(idx as u32) as usize;
633 s >> (32 - bits)
9e78289c 634}*/
3f3b5b23
KS
635
636fn gen_sr_perms(swaps: &mut [usize], size: usize) {
637 if size <= 4 { return; }
638 let mut evec: Vec<usize> = Vec::with_capacity(size / 2);
639 let mut ovec1: Vec<usize> = Vec::with_capacity(size / 4);
640 let mut ovec2: Vec<usize> = Vec::with_capacity(size / 4);
641 for k in 0..size/4 {
642 evec.push (swaps[k * 4 + 0]);
643 ovec1.push(swaps[k * 4 + 1]);
644 evec.push (swaps[k * 4 + 2]);
645 ovec2.push(swaps[k * 4 + 3]);
646 }
647 for k in 0..size/2 { swaps[k] = evec[k]; }
648 for k in 0..size/4 { swaps[k + size/2] = ovec1[k]; }
649 for k in 0..size/4 { swaps[k + 3*size/4] = ovec2[k]; }
650 gen_sr_perms(&mut swaps[0..size/2], size/2);
651 gen_sr_perms(&mut swaps[size/2..3*size/4], size/4);
652 gen_sr_perms(&mut swaps[3*size/4..], size/4);
653}
654
655fn gen_swaps_for_perm(swaps: &mut Vec<usize>, perms: &Vec<usize>) {
656 let mut idx_arr: Vec<usize> = Vec::with_capacity(perms.len());
657 for i in 0..perms.len() { idx_arr.push(i); }
658 let mut run_size = 0;
659 let mut run_pos = 0;
660 for idx in 0..perms.len() {
661 if perms[idx] == idx_arr[idx] {
662 if run_size == 0 { run_pos = idx; }
663 run_size += 1;
664 } else {
665 for i in 0..run_size {
666 swaps.push(run_pos + i);
667 }
668 run_size = 0;
669 let mut spos = idx + 1;
670 while idx_arr[spos] != perms[idx] { spos += 1; }
671 idx_arr[spos] = idx_arr[idx];
672 idx_arr[idx] = perms[idx];
673 swaps.push(spos);
674 }
675 }
676}
677
678impl FFTBuilder {
9e78289c
KS
679 fn generate_twiddle(data: &mut FFTData, size: usize, cur_size: usize, forward: bool) {
680 if size == cur_size { return; }
681 data.twiddle = Vec::with_capacity(size);
682 let div = size / cur_size;
683 let base = if forward { -2.0 * consts::PI / (size as f32) } else { 2.0 * consts::PI / (size as f32) };
684 for n in 0..div {
685 for k in 0..cur_size {
686 data.twiddle.push(FFTComplex::exp(base * ((k * n) as f32)));
687 }
688 }
689 }
690 pub fn new_fft(size: usize, forward: bool) -> FFT {
691 let mut ffts: Vec<(FFTMode, FFTData)> = Vec::with_capacity(1);
692 let mut perms: Vec<usize> = Vec::with_capacity(size);
693 let mut swaps: Vec<usize> = Vec::with_capacity(size);
694 let mut rem_size = size;
695 if rem_size.trailing_zeros() > 0 {
696 let bits = rem_size.trailing_zeros() as u8;
697 let mut data = FFTSplitRadix::new_data(bits, forward);
698 Self::generate_twiddle(&mut data, size, 1 << bits, forward);
699 data.step = 1;
700 data.div = rem_size >> bits;
701 ffts.push((FFTMode::SplitRadix(bits), data));
702 rem_size >>= bits;
703 }
704 if (rem_size % 15) == 0 {
705 let mut data = FFT15::new_data(size, forward);
706 Self::generate_twiddle(&mut data, size, 15, forward);
707 data.step = size / rem_size;
708 data.div = size / rem_size;
709 ffts.push((FFTMode::Prime15, data));
710 rem_size /= 15;
711 }
712 if rem_size > 1 {
713 let mut data = FFTGeneric::new_data(rem_size, forward);
714 Self::generate_twiddle(&mut data, size, rem_size, forward);
715 data.step = size / rem_size;
716 data.div = size / rem_size;
717 ffts.push((FFTMode::Generic(rem_size), data));
718 }
719
720 for i in 0..size {
721 perms.push(i);
722 }
723 for (mode, _) in ffts.iter().rev() {
724 mode.permute(&mut perms);
725 }
3f3b5b23 726 gen_swaps_for_perm(&mut swaps, &perms);
9e78289c
KS
727
728 FFT { perms, swaps, ffts }
3f3b5b23
KS
729 }
730}
731
c3528afd
KS
732pub struct RDFT {
733 table: Vec<FFTComplex>,
734 fft: FFT,
735 fwd: bool,
736 size: usize,
b3799375 737 fwd_fft: bool,
c3528afd
KS
738}
739
740fn crossadd(a: &FFTComplex, b: &FFTComplex) -> FFTComplex {
741 FFTComplex { re: a.re + b.re, im: a.im - b.im }
742}
743
744impl RDFT {
745 pub fn do_rdft(&mut self, src: &[FFTComplex], dst: &mut [FFTComplex]) {
746 dst.copy_from_slice(src);
747 self.do_rdft_inplace(dst);
748 }
749 pub fn do_rdft_inplace(&mut self, buf: &mut [FFTComplex]) {
750 if !self.fwd {
751 for n in 0..self.size/2 {
752 let in0 = buf[n + 1];
753 let in1 = buf[self.size - n - 1];
754
755 let t0 = crossadd(&in0, &in1);
756 let t1 = FFTComplex { re: in1.im + in0.im, im: in1.re - in0.re };
757 let tab = self.table[n];
758 let t2 = FFTComplex { re: t1.im * tab.im + t1.re * tab.re, im: t1.im * tab.re - t1.re * tab.im };
759
760 buf[n + 1] = FFTComplex { re: t0.im - t2.im, im: t0.re - t2.re }; // (t0 - t2).conj().rotate()
761 buf[self.size - n - 1] = (t0 + t2).rotate();
762 }
763 let a = buf[0].re;
764 let b = buf[0].im;
765 buf[0].re = a - b;
766 buf[0].im = a + b;
767 }
9e78289c
KS
768 if self.fwd_fft {
769 self.fft.do_fft_inplace(buf);
770 } else {
771 self.fft.do_ifft_inplace(buf);
772 }
c3528afd
KS
773 if self.fwd {
774 for n in 0..self.size/2 {
775 let in0 = buf[n + 1];
776 let in1 = buf[self.size - n - 1];
777
778 let t0 = crossadd(&in0, &in1).scale(0.5);
779 let t1 = FFTComplex { re: in0.im + in1.im, im: in0.re - in1.re };
780 let t2 = t1 * self.table[n];
781
782 buf[n + 1] = crossadd(&t0, &t2);
783 buf[self.size - n - 1] = FFTComplex { re: t0.re - t2.re, im: -(t0.im + t2.im) };
784 }
785 let a = buf[0].re;
786 let b = buf[0].im;
787 buf[0].re = a + b;
788 buf[0].im = a - b;
789 } else {
790 for n in 0..self.size {
791 buf[n] = FFTComplex{ re: buf[n].im, im: buf[n].re };
792 }
793 }
794 }
795}
796
797pub struct RDFTBuilder {
798}
799
800impl RDFTBuilder {
9e78289c 801 pub fn new_rdft(size: usize, forward: bool, forward_fft: bool) -> RDFT {
c3528afd
KS
802 let mut table: Vec<FFTComplex> = Vec::with_capacity(size / 4);
803 let (base, scale) = if forward { (consts::PI / (size as f32), 0.5) } else { (-consts::PI / (size as f32), 1.0) };
804 for i in 0..size/2 {
805 table.push(FFTComplex::exp(base * ((i + 1) as f32)).scale(scale));
806 }
9e78289c 807 let fft = FFTBuilder::new_fft(size, forward_fft);
b3799375 808 RDFT { table, fft, size, fwd: forward, fwd_fft: forward_fft }
c3528afd
KS
809 }
810}
811
3f3b5b23
KS
812
813#[cfg(test)]
814mod test {
815 use super::*;
816
9e78289c
KS
817 fn test_fft(size: usize) {
818 println!("testing FFT {}", size);
819 let mut fin: Vec<FFTComplex> = Vec::with_capacity(size);
820 let mut fout1: Vec<FFTComplex> = Vec::with_capacity(size);
821 let mut fout2: Vec<FFTComplex> = Vec::with_capacity(size);
822 fin.resize(size, FFTC_ZERO);
823 fout1.resize(size, FFTC_ZERO);
824 fout2.resize(size, FFTC_ZERO);
825 let mut fft = FFTBuilder::new_fft(size, true);
3f3b5b23
KS
826 let mut seed: u32 = 42;
827 for i in 0..fin.len() {
828 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
829 let val = (seed >> 16) as i16;
830 fin[i].re = (val as f32) / 256.0;
831 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
832 let val = (seed >> 16) as i16;
833 fin[i].im = (val as f32) / 256.0;
834 }
9e78289c
KS
835 fft.do_fft(&fin, &mut fout1);
836 fout2.copy_from_slice(&fin);
837 generic_fft(&mut fout2, true);
3f3b5b23
KS
838
839 for i in 0..fin.len() {
840 assert!((fout1[i].re - fout2[i].re).abs() < 1.0);
841 assert!((fout1[i].im - fout2[i].im).abs() < 1.0);
3f3b5b23 842 }
9e78289c
KS
843 let mut ifft = FFTBuilder::new_fft(size, false);
844 ifft.do_ifft_inplace(&mut fout1);
845 generic_fft(&mut fout2, false);
3f3b5b23 846
9e78289c 847 let sc = 1.0 / (size as f32);
3f3b5b23
KS
848 for i in 0..fin.len() {
849 assert!((fin[i].re - fout1[i].re * sc).abs() < 1.0);
850 assert!((fin[i].im - fout1[i].im * sc).abs() < 1.0);
9e78289c
KS
851 assert!((fout1[i].re - fout2[i].re).abs() * sc < 1.0);
852 assert!((fout1[i].im - fout2[i].im).abs() * sc < 1.0);
3f3b5b23
KS
853 }
854 }
c3528afd 855
9e78289c
KS
856 #[test]
857 fn test_ffts() {
858 test_fft(3);
859 test_fft(5);
860 test_fft(16);
861 test_fft(15);
862 test_fft(60);
863 test_fft(256);
864 test_fft(240);
865 }
866
c3528afd
KS
867 #[test]
868 fn test_rdft() {
869 let mut fin: [FFTComplex; 128] = [FFTC_ZERO; 128];
870 let mut fout1: [FFTComplex; 128] = [FFTC_ZERO; 128];
9e78289c 871 let mut rdft = RDFTBuilder::new_rdft(fin.len(), true, true);
c3528afd
KS
872 let mut seed: u32 = 42;
873 for i in 0..fin.len() {
874 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
875 let val = (seed >> 16) as i16;
876 fin[i].re = (val as f32) / 256.0;
877 seed = seed.wrapping_mul(1664525).wrapping_add(1013904223);
878 let val = (seed >> 16) as i16;
879 fin[i].im = (val as f32) / 256.0;
880 }
881 rdft.do_rdft(&fin, &mut fout1);
9e78289c 882 let mut irdft = RDFTBuilder::new_rdft(fin.len(), false, true);
c3528afd
KS
883 irdft.do_rdft_inplace(&mut fout1);
884
885 for i in 0..fin.len() {
886 let tst = fout1[i].scale(0.5/(fout1.len() as f32));
887 assert!((tst.re - fin[i].re).abs() < 1.0);
888 assert!((tst.im - fin[i].im).abs() < 1.0);
889 }
890 }
3f3b5b23 891}