/* ----------------------------------------------------------------------- Copyright: 2010-2016, iMinds-Vision Lab, University of Antwerp 2014-2016, CWI, Amsterdam Contact: astra@uantwerpen.be Website: http://www.astra-toolbox.com/ This file is part of the ASTRA Toolbox. The ASTRA Toolbox is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The ASTRA Toolbox is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------- */ #include "astra/Fourier.h" #include <cmath> namespace astra { /* Copyright Takuya OOURA, 1996-2001 You may use, copy, modify and distribute this code for any purpose (include commercial use) and without fee. Source: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :split-radix data :inplace table :use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, float32 *, int *, float32 *); void rdft(int, int, float32 *, int *, float32 *); void ddct(int, int, float32 *, int *, float32 *); void ddst(int, int, float32 *, int *, float32 *); void dfct(int, float32 *, float32 *, int *, float32 *); void dfst(int, float32 *, float32 *, int *, float32 *); macro definitions USE_CDFT_PTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 USE_CDFT_WINTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 -------- Complex DFT (Discrete Fourier Transform) -------- [definition] <case1> X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n <case2> X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n (notes: sum_j=0^n-1 is a summation from j=0 to n-1) [usage] <case1> ip[0] = 0; // first time only cdft(2*n, 1, a, ip, w); <case2> ip[0] = 0; // first time only cdft(2*n, -1, a, ip, w); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (float32 *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j<n output data a[2*k] = Re(X[k]), a[2*k+1] = Im(X[k]), 0<=k<n ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) strictly, length of ip >= 2+(1<<(int)(log(n+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft(2*n, -1, a, ip, w); is cdft(2*n, 1, a, ip, w); for (j = 0; j <= 2 * n - 1; j++) { a[j] *= 1.0 / n; } . -------- Real DFT / Inverse of Real DFT -------- [definition] <case1> RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 <case2> IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n [usage] <case1> ip[0] = 0; // first time only rdft(n, 1, a, ip, w); <case2> ip[0] = 0; // first time only rdft(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) <case1> output data a[2*k] = R[k], 0<=k<n/2 a[2*k+1] = I[k], 0<k<n/2 a[1] = R[n/2] <case2> input data a[2*j] = R[j], 0<=j<n/2 a[2*j+1] = I[j], 0<j<n/2 a[1] = R[n/2] ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft(n, 1, a, ip, w); is rdft(n, -1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] <case1> IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n <case2> DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n [usage] <case1> ip[0] = 0; // first time only ddct(n, 1, a, ip, w); <case2> ip[0] = 0; // first time only ddct(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) output data a[k] = C[k], 0<=k<n ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct(n, -1, a, ip, w); is a[0] *= 0.5; ddct(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] <case1> IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n <case2> DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n [usage] <case1> ip[0] = 0; // first time only ddst(n, 1, a, ip, w); <case2> ip[0] = 0; // first time only ddst(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) <case1> input data a[j] = A[j], 0<j<n a[0] = A[n] output data a[k] = S[k], 0<=k<n <case2> output data a[k] = S[k], 0<k<n a[0] = S[n] ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst(n, -1, a, ip, w); is a[0] *= 0.5; ddst(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- [definition] C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n [usage] ip[0] = 0; // first time only dfct(n, a, t, ip, w); [parameters] n :data length - 1 (int) n >= 2, n = power of 2 a[0...n] :input/output data (float32 *) output data a[k] = C[k], 0<=k<=n t[0...n/2] :work area (float32 *) ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n [usage] ip[0] = 0; // first time only dfst(n, a, t, ip, w); [parameters] n :data length + 1 (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (float32 *) output data a[k] = S[k], 0<k<n (a[0] is used for work area) t[0...n/2-1] :work area (float32 *) ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (float32 *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of dfst(n, a, t, ip, w); is dfst(n, a, t, ip, w); for (j = 1; j <= n - 1; j++) { a[j] *= 2.0 / n; } . Appendix : The cos/sin table is recalculated when the larger table required. w[] and ip[] are compatible with all routines. */ static int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w); static void bitrv208(float32 *a); static void bitrv208neg(float32 *a); static void bitrv216(float32 *a); static void bitrv216neg(float32 *a); static void bitrv2conj(int n, int *ip, float32 *a); static void bitrv2(int n, int *ip, float32 *a); static void cftb040(float32 *a); static void cftb1st(int n, float32 *a, float32 *w); static void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w); static void cftf040(float32 *a); static void cftf081(float32 *a, float32 *w); static void cftf082(float32 *a, float32 *w); static void cftf161(float32 *a, float32 *w); static void cftf162(float32 *a, float32 *w); static void cftf1st(int n, float32 *a, float32 *w); static void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w); static void cftfx41(int n, float32 *a, int nw, float32 *w); static void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w); static void cftmdl1(int n, float32 *a, float32 *w); static void cftmdl2(int n, float32 *a, float32 *w); static void *cftrec1_th(void *p); static void *cftrec2_th(void *p); static void cftrec4(int n, float32 *a, int nw, float32 *w); static void cftx020(float32 *a); static void dctsub(int n, float32 *a, int nc, float32 *c); static void dstsub(int n, float32 *a, int nc, float32 *c); static void makect(int nc, int *ip, float32 *c); static void makeipt(int nw, int *ip); static void makewt(int nw, int *ip, float32 *w); static void rftbsub(int n, float32 *a, int nc, float32 *c); static void rftfsub(int n, float32 *a, int nc, float32 *c); #ifdef USE_CDFT_THREADS static void cftrec4_th(int n, float32 *a, int nw, float32 *w); #endif /* USE_CDFT_THREADS */ _AstraExport void cdft(int n, int isgn, float32 *a, int *ip, float32 *w) { int nw; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } if (isgn >= 0) { cftfsub(n, a, ip, nw, w); } else { cftbsub(n, a, ip, nw, w); } } _AstraExport void rdft(int n, int isgn, float32 *a, int *ip, float32 *w) { int nw, nc; float32 xi; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 2)) { nc = n >> 2; makect(nc, ip, w + nw); } if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } } _AstraExport void ddct(int n, int isgn, float32 *a, int *ip, float32 *w) { int j, nw, nc; float32 xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } dctsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } _AstraExport void ddst(int n, int isgn, float32 *a, int *ip, float32 *w) { int j, nw, nc; float32 xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } dstsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } _AstraExport void dfct(int n, float32 *a, float32 *t, int *ip, float32 *w) { int j, k, l, m, mh, nw, nc; float32 xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } m = n >> 1; yi = a[m]; xi = a[0] + a[n]; a[0] -= a[n]; t[0] = xi - yi; t[m] = xi + yi; if (n > 2) { mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] - a[n - j]; xi = a[j] + a[n - j]; yr = a[k] - a[n - k]; yi = a[k] + a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi - yi; t[k] = xi + yi; } t[mh] = a[mh] + a[n - mh]; a[mh] -= a[n - mh]; dctsub(m, a, nc, w + nw); if (m > 4) { cftfsub(m, a, ip, nw, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, ip, nw, w); } a[n - 1] = a[0] - a[1]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] + a[j + 1]; a[2 * j - 1] = a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dctsub(m, t, nc, w + nw); if (m > 4) { cftfsub(m, t, ip, nw, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, ip, nw, w); } a[n - l] = t[0] - t[1]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = t[j] - t[j + 1]; a[k + l] = t[j] + t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 0; j < mh; j++) { k = m - j; t[j] = t[m + k] - t[m + j]; t[k] = t[m + k] + t[m + j]; } t[mh] = t[m + mh]; m = mh; } a[l] = t[0]; a[n] = t[2] - t[1]; a[0] = t[2] + t[1]; } else { a[1] = a[0]; a[2] = t[0]; a[0] = t[1]; } } _AstraExport void dfst(int n, float32 *a, float32 *t, int *ip, float32 *w) { int j, k, l, m, mh, nw, nc; float32 xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } if (n > 2) { m = n >> 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] + a[n - j]; xi = a[j] - a[n - j]; yr = a[k] + a[n - k]; yi = a[k] - a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi + yi; t[k] = xi - yi; } t[0] = a[mh] - a[n - mh]; a[mh] += a[n - mh]; a[0] = a[m]; dstsub(m, a, nc, w + nw); if (m > 4) { cftfsub(m, a, ip, nw, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, ip, nw, w); } a[n - 1] = a[1] - a[0]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] - a[j + 1]; a[2 * j - 1] = -a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dstsub(m, t, nc, w + nw); if (m > 4) { cftfsub(m, t, ip, nw, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, ip, nw, w); } a[n - l] = t[1] - t[0]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = -t[j] - t[j + 1]; a[k + l] = t[j] - t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; t[j] = t[m + k] + t[m + j]; t[k] = t[m + k] - t[m + j]; } t[0] = t[m + mh]; m = mh; } a[l] = t[0]; } a[0] = 0; } /* -------- initializing routines -------- */ static void makewt(int nw, int *ip, float32 *w) { int j, nwh, nw0, nw1; float32 delta, wn4r, wk1r, wk1i, wk3r, wk3i; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; wn4r = cos(delta * nwh); w[0] = 1; w[1] = wn4r; if (nwh == 4) { w[2] = cos(delta * 2); w[3] = sin(delta * 2); } else if (nwh > 4) { makeipt(nw, ip); w[2] = 0.5 / cos(delta * 2); w[3] = 0.5 / cos(delta * 6); for (j = 4; j < nwh; j += 4) { w[j] = cos(delta * j); w[j + 1] = sin(delta * j); w[j + 2] = cos(3 * delta * j); w[j + 3] = -sin(3 * delta * j); } } nw0 = 0; while (nwh > 2) { nw1 = nw0 + nwh; nwh >>= 1; w[nw1] = 1; w[nw1 + 1] = wn4r; if (nwh == 4) { wk1r = w[nw0 + 4]; wk1i = w[nw0 + 5]; w[nw1 + 2] = wk1r; w[nw1 + 3] = wk1i; } else if (nwh > 4) { wk1r = w[nw0 + 4]; wk3r = w[nw0 + 6]; w[nw1 + 2] = 0.5 / wk1r; w[nw1 + 3] = 0.5 / wk3r; for (j = 4; j < nwh; j += 4) { wk1r = w[nw0 + 2 * j]; wk1i = w[nw0 + 2 * j + 1]; wk3r = w[nw0 + 2 * j + 2]; wk3i = w[nw0 + 2 * j + 3]; w[nw1 + j] = wk1r; w[nw1 + j + 1] = wk1i; w[nw1 + j + 2] = wk3r; w[nw1 + j + 3] = wk3i; } } nw0 = nw1; } } } static void makeipt(int nw, int *ip) { int j, l, m, m2, p, q; ip[2] = 0; ip[3] = 16; m = 2; for (l = nw; l > 32; l >>= 2) { m2 = m << 1; q = m2 << 3; for (j = m; j < m2; j++) { p = ip[j] << 2; ip[m + j] = p; ip[m2 + j] = p + q; } m = m2; } } static void makect(int nc, int *ip, float32 *c) { int j, nch; float32 delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = cos(delta * nch); c[nch] = 0.5 * c[0]; for (j = 1; j < nch; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ #ifdef USE_CDFT_PTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 8192 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 65536 #endif #include <pthread.h> #include <stdio.h> #include <stdlib.h> #define cdft_thread_t pthread_t #define cdft_thread_create(thp,func,argp) { \ if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ if (pthread_join(th, NULL) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #endif /* USE_CDFT_PTHREADS */ #ifdef USE_CDFT_WINTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 32768 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 524288 #endif #include <windows.h> #include <stdio.h> #include <stdlib.h> #define cdft_thread_t HANDLE #define cdft_thread_create(thp,func,argp) { \ DWORD thid; \ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ if (*(thp) == 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ WaitForSingleObject(th, INFINITE); \ CloseHandle(th); \ } #endif /* USE_CDFT_WINTHREADS */ static void cftfsub(int n, float32 *a, int *ip, int nw, float32 *w) { if (n > 8) { if (n > 32) { cftf1st(n, a, &w[nw - (n >> 2)]); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a, nw, w); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a, nw, w); } else if (n > 128) { cftleaf(n, 1, a, nw, w); } else { cftfx41(n, a, nw, w); } bitrv2(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); bitrv216(a); } else { cftf081(a, w); bitrv208(a); } } else if (n == 8) { cftf040(a); } else if (n == 4) { cftx020(a); } } static void cftbsub(int n, float32 *a, int *ip, int nw, float32 *w) { if (n > 8) { if (n > 32) { cftb1st(n, a, &w[nw - (n >> 2)]); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a, nw, w); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a, nw, w); } else if (n > 128) { cftleaf(n, 1, a, nw, w); } else { cftfx41(n, a, nw, w); } bitrv2conj(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); bitrv216neg(a); } else { cftf081(a, w); bitrv208neg(a); } } else if (n == 8) { cftb040(a); } else if (n == 4) { cftx020(a); } } static void bitrv2(int n, int *ip, float32 *a) { int j, j1, k, k1, l, m, nh, nm; float32 xr, xi, yr, yi; m = 1; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; nm = 4 * m; if (l == 8) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + 2 * ip[m + k]; k1 = 4 * k + 2 * ip[m + j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + 2 * ip[m + k]; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - nm; k1 += 2 * nm - 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } else { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + ip[m + k]; k1 = 4 * k + ip[m + j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + ip[m + k]; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } } static void bitrv2conj(int n, int *ip, float32 *a) { int j, j1, k, k1, l, m, nh, nm; float32 xr, xi, yr, yi; m = 1; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; nm = 4 * m; if (l == 8) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + 2 * ip[m + k]; k1 = 4 * k + 2 * ip[m + j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + 2 * ip[m + k]; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - nm; k1 += 2 * nm - 2; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; } } else { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + ip[m + k]; k1 = 4 * k + ip[m + j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + ip[m + k]; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += nm; k1 += nm; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; } } } static void bitrv216(float32 *a) { float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; a[2] = x8r; a[3] = x8i; a[4] = x4r; a[5] = x4i; a[6] = x12r; a[7] = x12i; a[8] = x2r; a[9] = x2i; a[10] = x10r; a[11] = x10i; a[14] = x14r; a[15] = x14i; a[16] = x1r; a[17] = x1i; a[20] = x5r; a[21] = x5i; a[22] = x13r; a[23] = x13i; a[24] = x3r; a[25] = x3i; a[26] = x11r; a[27] = x11i; a[28] = x7r; a[29] = x7i; } static void bitrv216neg(float32 *a) { float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i, x15r, x15i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x9r = a[18]; x9i = a[19]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; x15r = a[30]; x15i = a[31]; a[2] = x15r; a[3] = x15i; a[4] = x7r; a[5] = x7i; a[6] = x11r; a[7] = x11i; a[8] = x3r; a[9] = x3i; a[10] = x13r; a[11] = x13i; a[12] = x5r; a[13] = x5i; a[14] = x9r; a[15] = x9i; a[16] = x1r; a[17] = x1i; a[18] = x14r; a[19] = x14i; a[20] = x6r; a[21] = x6i; a[22] = x10r; a[23] = x10i; a[24] = x2r; a[25] = x2i; a[26] = x12r; a[27] = x12i; a[28] = x4r; a[29] = x4i; a[30] = x8r; a[31] = x8i; } static void bitrv208(float32 *a) { float32 x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; x1r = a[2]; x1i = a[3]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x6r = a[12]; x6i = a[13]; a[2] = x4r; a[3] = x4i; a[6] = x6r; a[7] = x6i; a[8] = x1r; a[9] = x1i; a[12] = x3r; a[13] = x3i; } static void bitrv208neg(float32 *a) { float32 x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; a[2] = x7r; a[3] = x7i; a[4] = x3r; a[5] = x3i; a[6] = x5r; a[7] = x5i; a[8] = x1r; a[9] = x1i; a[10] = x6r; a[11] = x6i; a[12] = x2r; a[13] = x2i; a[14] = x4r; a[15] = x4i; } static void cftf1st(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, m, mh; float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; csc1 = w[2]; csc3 = w[3]; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; k = 0; for (j = 2; j < mh - 2; j += 4) { k += 4; wk1r = csc1 * (wd1r + w[k]); wk1i = csc1 * (wd1i + w[k + 1]); wk3r = csc3 * (wd3r + w[k + 2]); wk3i = csc3 * (wd3i + w[k + 3]); wd1r = w[k]; wd1i = w[k + 1]; wd3r = w[k + 2]; wd3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; y0r = a[j + 2] + a[j2 + 2]; y0i = a[j + 3] + a[j2 + 3]; y1r = a[j + 2] - a[j2 + 2]; y1i = a[j + 3] - a[j2 + 3]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 + 2] + a[j3 + 2]; y2i = a[j1 + 3] + a[j3 + 3]; y3r = a[j1 + 2] - a[j3 + 2]; y3i = a[j1 + 3] - a[j3 + 3]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j + 2] = y0r + y2r; a[j + 3] = y0i + y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i - y2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r - y3i; x0i = y1i + y3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = y1r + y3i; x0i = y1i - y3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; y0r = a[j0 - 2] + a[j2 - 2]; y0i = a[j0 - 1] + a[j2 - 1]; y1r = a[j0 - 2] - a[j2 - 2]; y1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 - 2] + a[j3 - 2]; y2i = a[j1 - 1] + a[j3 - 1]; y3r = a[j1 - 2] - a[j3 - 2]; y3i = a[j1 - 1] - a[j3 - 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i + y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i - y2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = y1r - y3i; x0i = y1i + y3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = y1r + y3i; x0i = y1i - y3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; } wk1r = csc1 * (wd1r + wn4r); wk1i = csc1 * (wd1i + wn4r); wk3r = csc3 * (wd3r - wn4r); wk3i = csc3 * (wd3i - wn4r); j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = a[j0 - 1] + a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i + x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = a[j0 + 3] + a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = a[j0 + 3] - a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i + x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } static void cftb1st(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, m, mh; float32 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = -a[1] - a[j2 + 1]; x1r = a[0] - a[j2]; x1i = -a[1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j2] = x1r + x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r - x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; csc1 = w[2]; csc3 = w[3]; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; k = 0; for (j = 2; j < mh - 2; j += 4) { k += 4; wk1r = csc1 * (wd1r + w[k]); wk1i = csc1 * (wd1i + w[k + 1]); wk3r = csc3 * (wd3r + w[k + 2]); wk3i = csc3 * (wd3i + w[k + 3]); wd1r = w[k]; wd1i = w[k + 1]; wd3r = w[k + 2]; wd3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = -a[j + 1] - a[j2 + 1]; x1r = a[j] - a[j2]; x1i = -a[j + 1] + a[j2 + 1]; y0r = a[j + 2] + a[j2 + 2]; y0i = -a[j + 3] - a[j2 + 3]; y1r = a[j + 2] - a[j2 + 2]; y1i = -a[j + 3] + a[j2 + 3]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 + 2] + a[j3 + 2]; y2i = a[j1 + 3] + a[j3 + 3]; y3r = a[j1 + 2] - a[j3 + 2]; y3i = a[j1 + 3] - a[j3 + 3]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j + 2] = y0r + y2r; a[j + 3] = y0i - y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i + y2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r + y3i; x0i = y1i + y3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = y1r - y3i; x0i = y1i - y3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; y0r = a[j0 - 2] + a[j2 - 2]; y0i = -a[j0 - 1] - a[j2 - 1]; y1r = a[j0 - 2] - a[j2 - 2]; y1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 - 2] + a[j3 - 2]; y2i = a[j1 - 1] + a[j3 - 1]; y3r = a[j1 - 2] - a[j3 - 2]; y3i = a[j1 - 1] - a[j3 - 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i - y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i + y2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = y1r + y3i; x0i = y1i + y3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = y1r - y3i; x0i = y1i - y3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; } wk1r = csc1 * (wd1r + wn4r); wk1i = csc1 * (wd1i + wn4r); wk3r = csc3 * (wd3r - wn4r); wk3i = csc3 * (wd3i - wn4r); j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = -a[j0 - 1] - a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i - x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = -a[j0 + 3] - a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = -a[j0 + 3] + a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i - x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } #ifdef USE_CDFT_THREADS struct cdft_arg_st { int n0; int n; float32 *a; int nw; float32 *w; }; typedef struct cdft_arg_st cdft_arg_t; static void cftrec4_th(int n, float32 *a, int nw, float32 *w) { int i, idiv4, m, nthread; cdft_thread_t th[4]; cdft_arg_t ag[4]; nthread = 2; idiv4 = 0; m = n >> 1; if (n > CDFT_4THREADS_BEGIN_N) { nthread = 4; idiv4 = 1; m >>= 1; } for (i = 0; i < nthread; i++) { ag[i].n0 = n; ag[i].n = m; ag[i].a = &a[i * m]; ag[i].nw = nw; ag[i].w = w; if (i != idiv4) { cdft_thread_create(&th[i], cftrec1_th, &ag[i]); } else { cdft_thread_create(&th[i], cftrec2_th, &ag[i]); } } for (i = 0; i < nthread; i++) { cdft_thread_wait(th[i]); } } static void *cftrec1_th(void *p) { int isplt, j, k, m, n, n0, nw; float32 *a, *w; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; nw = ((cdft_arg_t *) p)->nw; w = ((cdft_arg_t *) p)->w; m = n0; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); } cftleaf(m, 1, &a[n - m], nw, w); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } return (void *) 0; } static void *cftrec2_th(void *p) { int isplt, j, k, m, n, n0, nw; float32 *a, *w; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; nw = ((cdft_arg_t *) p)->nw; w = ((cdft_arg_t *) p)->w; k = 1; m = n0; while (m > 512) { m >>= 2; k <<= 2; cftmdl2(m, &a[n - m], &w[nw - m]); } cftleaf(m, 0, &a[n - m], nw, w); k >>= 1; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } return (void *) 0; } #endif /* USE_CDFT_THREADS */ static void cftrec4(int n, float32 *a, int nw, float32 *w) { int isplt, j, k, m; m = n; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); } cftleaf(m, 1, &a[n - m], nw, w); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } } int cfttree(int n, int j, int k, float32 *a, int nw, float32 *w) { int i, isplt, m; if ((k & 3) != 0) { isplt = k & 1; if (isplt != 0) { cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); } else { cftmdl2(n, &a[j - n], &w[nw - n]); } } else { m = n; for (i = k; (i & 3) == 0; i >>= 2) { m <<= 2; } isplt = i & 1; if (isplt != 0) { while (m > 128) { cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); m >>= 2; } } else { while (m > 128) { cftmdl2(m, &a[j - m], &w[nw - m]); m >>= 2; } } } return isplt; } static void cftleaf(int n, int isplt, float32 *a, int nw, float32 *w) { if (n == 512) { cftmdl1(128, a, &w[nw - 64]); cftf161(a, &w[nw - 8]); cftf162(&a[32], &w[nw - 32]); cftf161(&a[64], &w[nw - 8]); cftf161(&a[96], &w[nw - 8]); cftmdl2(128, &a[128], &w[nw - 128]); cftf161(&a[128], &w[nw - 8]); cftf162(&a[160], &w[nw - 32]); cftf161(&a[192], &w[nw - 8]); cftf162(&a[224], &w[nw - 32]); cftmdl1(128, &a[256], &w[nw - 64]); cftf161(&a[256], &w[nw - 8]); cftf162(&a[288], &w[nw - 32]); cftf161(&a[320], &w[nw - 8]); cftf161(&a[352], &w[nw - 8]); if (isplt != 0) { cftmdl1(128, &a[384], &w[nw - 64]); cftf161(&a[480], &w[nw - 8]); } else { cftmdl2(128, &a[384], &w[nw - 128]); cftf162(&a[480], &w[nw - 32]); } cftf161(&a[384], &w[nw - 8]); cftf162(&a[416], &w[nw - 32]); cftf161(&a[448], &w[nw - 8]); } else { cftmdl1(64, a, &w[nw - 32]); cftf081(a, &w[nw - 8]); cftf082(&a[16], &w[nw - 8]); cftf081(&a[32], &w[nw - 8]); cftf081(&a[48], &w[nw - 8]); cftmdl2(64, &a[64], &w[nw - 64]); cftf081(&a[64], &w[nw - 8]); cftf082(&a[80], &w[nw - 8]); cftf081(&a[96], &w[nw - 8]); cftf082(&a[112], &w[nw - 8]); cftmdl1(64, &a[128], &w[nw - 32]); cftf081(&a[128], &w[nw - 8]); cftf082(&a[144], &w[nw - 8]); cftf081(&a[160], &w[nw - 8]); cftf081(&a[176], &w[nw - 8]); if (isplt != 0) { cftmdl1(64, &a[192], &w[nw - 32]); cftf081(&a[240], &w[nw - 8]); } else { cftmdl2(64, &a[192], &w[nw - 64]); cftf082(&a[240], &w[nw - 8]); } cftf081(&a[192], &w[nw - 8]); cftf082(&a[208], &w[nw - 8]); cftf081(&a[224], &w[nw - 8]); } } static void cftmdl1(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, m, mh; float32 wn4r, wk1r, wk1i, wk3r, wk3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; k = 0; for (j = 2; j < mh; j += 2) { k += 4; wk1r = w[k]; wk1i = w[k + 1]; wk3r = w[k + 2]; wk3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; } j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); } static void cftmdl2(int n, float32 *a, float32 *w) { int j, j0, j1, j2, j3, k, kr, m, mh; float32 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; mh = n >> 3; m = 2 * mh; wn4r = w[1]; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] - a[j2 + 1]; x0i = a[1] + a[j2]; x1r = a[0] + a[j2 + 1]; x1i = a[1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wn4r * (x2r - x2i); y0i = wn4r * (x2i + x2r); a[0] = x0r + y0r; a[1] = x0i + y0i; a[j1] = x0r - y0r; a[j1 + 1] = x0i - y0i; y0r = wn4r * (x3r - x3i); y0i = wn4r * (x3i + x3r); a[j2] = x1r - y0i; a[j2 + 1] = x1i + y0r; a[j3] = x1r + y0i; a[j3 + 1] = x1i - y0r; k = 0; kr = 2 * m; for (j = 2; j < mh; j += 2) { k += 4; wk1r = w[k]; wk1i = w[k + 1]; wk3r = w[k + 2]; wk3i = w[k + 3]; kr -= 4; wd1i = w[kr]; wd1r = w[kr + 1]; wd3i = w[kr + 2]; wd3r = w[kr + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] - a[j2 + 1]; x0i = a[j + 1] + a[j2]; x1r = a[j] + a[j2 + 1]; x1i = a[j + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wd1r * x2r - wd1i * x2i; y2i = wd1r * x2i + wd1i * x2r; a[j] = y0r + y2r; a[j + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk3r * x1r + wk3i * x1i; y0i = wk3r * x1i - wk3i * x1r; y2r = wd3r * x3r + wd3i * x3i; y2i = wd3r * x3i - wd3i * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wd1i * x0r - wd1r * x0i; y0i = wd1i * x0i + wd1r * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wd3i * x1r + wd3r * x1i; y0i = wd3i * x1i - wd3r * x1r; y2r = wk3i * x3r + wk3r * x3i; y2i = wk3i * x3i - wk3r * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; } wk1r = w[m]; wk1i = w[m + 1]; j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk1i * x1r - wk1r * x1i; y0i = wk1i * x1i + wk1r * x1r; y2r = wk1r * x3r - wk1i * x3i; y2i = wk1r * x3i + wk1i * x3r; a[j2] = y0r - y2r; a[j2 + 1] = y0i - y2i; a[j3] = y0r + y2r; a[j3 + 1] = y0i + y2i; } static void cftfx41(int n, float32 *a, int nw, float32 *w) { if (n == 128) { cftf161(a, &w[nw - 8]); cftf162(&a[32], &w[nw - 32]); cftf161(&a[64], &w[nw - 8]); cftf161(&a[96], &w[nw - 8]); } else { cftf081(a, &w[nw - 8]); cftf082(&a[16], &w[nw - 8]); cftf081(&a[32], &w[nw - 8]); cftf081(&a[48], &w[nw - 8]); } } static void cftf161(float32 *a, float32 *w) { float32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = w[1]; wk1r = w[2]; wk1i = w[3]; x0r = a[0] + a[16]; x0i = a[1] + a[17]; x1r = a[0] - a[16]; x1i = a[1] - a[17]; x2r = a[8] + a[24]; x2i = a[9] + a[25]; x3r = a[8] - a[24]; x3i = a[9] - a[25]; y0r = x0r + x2r; y0i = x0i + x2i; y4r = x0r - x2r; y4i = x0i - x2i; y8r = x1r - x3i; y8i = x1i + x3r; y12r = x1r + x3i; y12i = x1i - x3r; x0r = a[2] + a[18]; x0i = a[3] + a[19]; x1r = a[2] - a[18]; x1i = a[3] - a[19]; x2r = a[10] + a[26]; x2i = a[11] + a[27]; x3r = a[10] - a[26]; x3i = a[11] - a[27]; y1r = x0r + x2r; y1i = x0i + x2i; y5r = x0r - x2r; y5i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y9r = wk1r * x0r - wk1i * x0i; y9i = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y13r = wk1i * x0r - wk1r * x0i; y13i = wk1i * x0i + wk1r * x0r; x0r = a[4] + a[20]; x0i = a[5] + a[21]; x1r = a[4] - a[20]; x1i = a[5] - a[21]; x2r = a[12] + a[28]; x2i = a[13] + a[29]; x3r = a[12] - a[28]; x3i = a[13] - a[29]; y2r = x0r + x2r; y2i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y10r = wn4r * (x0r - x0i); y10i = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; y14r = wn4r * (x0r + x0i); y14i = wn4r * (x0i - x0r); x0r = a[6] + a[22]; x0i = a[7] + a[23]; x1r = a[6] - a[22]; x1i = a[7] - a[23]; x2r = a[14] + a[30]; x2i = a[15] + a[31]; x3r = a[14] - a[30]; x3i = a[15] - a[31]; y3r = x0r + x2r; y3i = x0i + x2i; y7r = x0r - x2r; y7i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y11r = wk1i * x0r - wk1r * x0i; y11i = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y15r = wk1r * x0r - wk1i * x0i; y15i = wk1r * x0i + wk1i * x0r; x0r = y12r - y14r; x0i = y12i - y14i; x1r = y12r + y14r; x1i = y12i + y14i; x2r = y13r - y15r; x2i = y13i - y15i; x3r = y13r + y15r; x3i = y13i + y15i; a[24] = x0r + x2r; a[25] = x0i + x2i; a[26] = x0r - x2r; a[27] = x0i - x2i; a[28] = x1r - x3i; a[29] = x1i + x3r; a[30] = x1r + x3i; a[31] = x1i - x3r; x0r = y8r + y10r; x0i = y8i + y10i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; x3r = y9r - y11r; x3i = y9i - y11i; a[16] = x0r + x2r; a[17] = x0i + x2i; a[18] = x0r - x2r; a[19] = x0i - x2i; a[20] = x1r - x3i; a[21] = x1i + x3r; a[22] = x1r + x3i; a[23] = x1i - x3r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); x0r = y5r + y7i; x0i = y5i - y7r; x3r = wn4r * (x0r - x0i); x3i = wn4r * (x0i + x0r); x0r = y4r - y6i; x0i = y4i + y6r; x1r = y4r + y6i; x1i = y4i - y6r; a[8] = x0r + x2r; a[9] = x0i + x2i; a[10] = x0r - x2r; a[11] = x0i - x2i; a[12] = x1r - x3i; a[13] = x1i + x3r; a[14] = x1r + x3i; a[15] = x1i - x3r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r + y3r; x2i = y1i + y3i; x3r = y1r - y3r; x3i = y1i - y3i; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x0r - x2r; a[3] = x0i - x2i; a[4] = x1r - x3i; a[5] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; } static void cftf162(float32 *a, float32 *w) { float32 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r, x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = w[1]; wk1r = w[4]; wk1i = w[5]; wk3r = w[6]; wk3i = -w[7]; wk2r = w[8]; wk2i = w[9]; x1r = a[0] - a[17]; x1i = a[1] + a[16]; x0r = a[8] - a[25]; x0i = a[9] + a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y0r = x1r + x2r; y0i = x1i + x2i; y4r = x1r - x2r; y4i = x1i - x2i; x1r = a[0] + a[17]; x1i = a[1] - a[16]; x0r = a[8] + a[25]; x0i = a[9] - a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y8r = x1r - x2i; y8i = x1i + x2r; y12r = x1r + x2i; y12i = x1i - x2r; x0r = a[2] - a[19]; x0i = a[3] + a[18]; x1r = wk1r * x0r - wk1i * x0i; x1i = wk1r * x0i + wk1i * x0r; x0r = a[10] - a[27]; x0i = a[11] + a[26]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y1r = x1r + x2r; y1i = x1i + x2i; y5r = x1r - x2r; y5i = x1i - x2i; x0r = a[2] + a[19]; x0i = a[3] - a[18]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[10] + a[27]; x0i = a[11] - a[26]; x2r = wk1r * x0r + wk1i * x0i; x2i = wk1r * x0i - wk1i * x0r; y9r = x1r - x2r; y9i = x1i - x2i; y13r = x1r + x2r; y13i = x1i + x2i; x0r = a[4] - a[21]; x0i = a[5] + a[20]; x1r = wk2r * x0r - wk2i * x0i; x1i = wk2r * x0i + wk2i * x0r; x0r = a[12] - a[29]; x0i = a[13] + a[28]; x2r = wk2i * x0r - wk2r * x0i; x2i = wk2i * x0i + wk2r * x0r; y2r = x1r + x2r; y2i = x1i + x2i; y6r = x1r - x2r; y6i = x1i - x2i; x0r = a[4] + a[21]; x0i = a[5] - a[20]; x1r = wk2i * x0r - wk2r * x0i; x1i = wk2i * x0i + wk2r * x0r; x0r = a[12] + a[29]; x0i = a[13] - a[28]; x2r = wk2r * x0r - wk2i * x0i; x2i = wk2r * x0i + wk2i * x0r; y10r = x1r - x2r; y10i = x1i - x2i; y14r = x1r + x2r; y14i = x1i + x2i; x0r = a[6] - a[23]; x0i = a[7] + a[22]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[14] - a[31]; x0i = a[15] + a[30]; x2r = wk1i * x0r - wk1r * x0i; x2i = wk1i * x0i + wk1r * x0r; y3r = x1r + x2r; y3i = x1i + x2i; y7r = x1r - x2r; y7i = x1i - x2i; x0r = a[6] + a[23]; x0i = a[7] - a[22]; x1r = wk1i * x0r + wk1r * x0i; x1i = wk1i * x0i - wk1r * x0r; x0r = a[14] + a[31]; x0i = a[15] - a[30]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y11r = x1r + x2r; y11i = x1i + x2i; y15r = x1r - x2r; y15i = x1i - x2i; x1r = y0r + y2r; x1i = y0i + y2i; x2r = y1r + y3r; x2i = y1i + y3i; a[0] = x1r + x2r; a[1] = x1i + x2i; a[2] = x1r - x2r; a[3] = x1i - x2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r - y3r; x2i = y1i - y3i; a[4] = x1r - x2i; a[5] = x1i + x2r; a[6] = x1r + x2i; a[7] = x1i - x2r; x1r = y4r - y6i; x1i = y4i + y6r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[8] = x1r + x2r; a[9] = x1i + x2i; a[10] = x1r - x2r; a[11] = x1i - x2i; x1r = y4r + y6i; x1i = y4i - y6r; x0r = y5r + y7i; x0i = y5i - y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[12] = x1r - x2i; a[13] = x1i + x2r; a[14] = x1r + x2i; a[15] = x1i - x2r; x1r = y8r + y10r; x1i = y8i + y10i; x2r = y9r - y11r; x2i = y9i - y11i; a[16] = x1r + x2r; a[17] = x1i + x2i; a[18] = x1r - x2r; a[19] = x1i - x2i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; a[20] = x1r - x2i; a[21] = x1i + x2r; a[22] = x1r + x2i; a[23] = x1i - x2r; x1r = y12r - y14i; x1i = y12i + y14r; x0r = y13r + y15i; x0i = y13i - y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[24] = x1r + x2r; a[25] = x1i + x2i; a[26] = x1r - x2r; a[27] = x1i - x2i; x1r = y12r + y14i; x1i = y12i - y14r; x0r = y13r - y15i; x0i = y13i + y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[28] = x1r - x2i; a[29] = x1i + x2r; a[30] = x1r + x2i; a[31] = x1i - x2r; } static void cftf081(float32 *a, float32 *w) { float32 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[1]; x0r = a[0] + a[8]; x0i = a[1] + a[9]; x1r = a[0] - a[8]; x1i = a[1] - a[9]; x2r = a[4] + a[12]; x2i = a[5] + a[13]; x3r = a[4] - a[12]; x3i = a[5] - a[13]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[2] + a[10]; x0i = a[3] + a[11]; x1r = a[2] - a[10]; x1i = a[3] - a[11]; x2r = a[6] + a[14]; x2i = a[7] + a[15]; x3r = a[6] - a[14]; x3i = a[7] - a[15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[8] = y1r + y5r; a[9] = y1i + y5i; a[10] = y1r - y5r; a[11] = y1i - y5i; a[12] = y3r - y7i; a[13] = y3i + y7r; a[14] = y3r + y7i; a[15] = y3i - y7r; a[0] = y0r + y4r; a[1] = y0i + y4i; a[2] = y0r - y4r; a[3] = y0i - y4i; a[4] = y2r - y6i; a[5] = y2i + y6r; a[6] = y2r + y6i; a[7] = y2i - y6r; } static void cftf082(float32 *a, float32 *w) { float32 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[1]; wk1r = w[2]; wk1i = w[3]; y0r = a[0] - a[9]; y0i = a[1] + a[8]; y1r = a[0] + a[9]; y1i = a[1] - a[8]; x0r = a[4] - a[13]; x0i = a[5] + a[12]; y2r = wn4r * (x0r - x0i); y2i = wn4r * (x0i + x0r); x0r = a[4] + a[13]; x0i = a[5] - a[12]; y3r = wn4r * (x0r - x0i); y3i = wn4r * (x0i + x0r); x0r = a[2] - a[11]; x0i = a[3] + a[10]; y4r = wk1r * x0r - wk1i * x0i; y4i = wk1r * x0i + wk1i * x0r; x0r = a[2] + a[11]; x0i = a[3] - a[10]; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; x0r = a[6] - a[15]; x0i = a[7] + a[14]; y6r = wk1i * x0r - wk1r * x0i; y6i = wk1i * x0i + wk1r * x0r; x0r = a[6] + a[15]; x0i = a[7] - a[14]; y7r = wk1r * x0r - wk1i * x0i; y7i = wk1r * x0i + wk1i * x0r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y4r + y6r; x1i = y4i + y6i; a[0] = x0r + x1r; a[1] = x0i + x1i; a[2] = x0r - x1r; a[3] = x0i - x1i; x0r = y0r - y2r; x0i = y0i - y2i; x1r = y4r - y6r; x1i = y4i - y6i; a[4] = x0r - x1i; a[5] = x0i + x1r; a[6] = x0r + x1i; a[7] = x0i - x1r; x0r = y1r - y3i; x0i = y1i + y3r; x1r = y5r - y7r; x1i = y5i - y7i; a[8] = x0r + x1r; a[9] = x0i + x1i; a[10] = x0r - x1r; a[11] = x0i - x1i; x0r = y1r + y3i; x0i = y1i - y3r; x1r = y5r + y7r; x1i = y5i + y7i; a[12] = x0r - x1i; a[13] = x0i + x1r; a[14] = x0r + x1i; a[15] = x0i - x1r; } static void cftf040(float32 *a) { float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r + x3i; a[7] = x1i - x3r; } static void cftb040(float32 *a) { float32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r + x3i; a[3] = x1i - x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r - x3i; a[7] = x1i + x3r; } static void cftx020(float32 *a) { float32 x0r, x0i; x0r = a[0] - a[2]; x0i = a[1] - a[3]; a[0] += a[2]; a[1] += a[3]; a[2] = x0r; a[3] = x0i; } static void rftfsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } static void rftbsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } static void dctsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[j] - wkr * a[k]; a[j] = wkr * a[j] + wki * a[k]; a[k] = xr; } a[m] *= c[0]; } static void dstsub(int n, float32 *a, int nc, float32 *c) { int j, k, kk, ks, m; float32 wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[k] - wkr * a[j]; a[k] = wkr * a[k] + wki * a[j]; a[j] = xr; } a[m] *= c[0]; } }