diff options
Diffstat (limited to 'gcell/include/gcell/spu/fft_1d_r2.h')
-rw-r--r-- | gcell/include/gcell/spu/fft_1d_r2.h | 529 |
1 files changed, 0 insertions, 529 deletions
diff --git a/gcell/include/gcell/spu/fft_1d_r2.h b/gcell/include/gcell/spu/fft_1d_r2.h deleted file mode 100644 index a51cbc341d..0000000000 --- a/gcell/include/gcell/spu/fft_1d_r2.h +++ /dev/null @@ -1,529 +0,0 @@ -/* -------------------------------------------------------------- */ -/* (C)Copyright 2001,2007, */ -/* International Business Machines Corporation, */ -/* Sony Computer Entertainment, Incorporated, */ -/* Toshiba Corporation, */ -/* */ -/* All Rights Reserved. */ -/* */ -/* Redistribution and use in source and binary forms, with or */ -/* without modification, are permitted provided that the */ -/* following conditions are met: */ -/* */ -/* - Redistributions of source code must retain the above copyright*/ -/* notice, this list of conditions and the following disclaimer. */ -/* */ -/* - Redistributions in binary form must reproduce the above */ -/* copyright notice, this list of conditions and the following */ -/* disclaimer in the documentation and/or other materials */ -/* provided with the distribution. */ -/* */ -/* - Neither the name of IBM Corporation nor the names of its */ -/* contributors may be used to endorse or promote products */ -/* derived from this software without specific prior written */ -/* permission. */ -/* */ -/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ -/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ -/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ -/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ -/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ -/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ -/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ -/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ -/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ -/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ -/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ -/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ -/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* -------------------------------------------------------------- */ -/* PROLOG END TAG zYx */ -#ifndef _FFT_1D_R2_H_ -#define _FFT_1D_R2_H_ 1 - -#include "fft_1d.h" - -/* fft_1d_r2 - * --------- - * Performs a single precision, complex Fast Fourier Transform using - * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. - * The input <in> is an array of complex numbers of length (1<<log2_size) - * entries. The result is returned in the array of complex numbers specified - * by <out>. Note: This routine can support an in-place transformation - * by specifying <in> and <out> to be the same array. - * - * This implementation utilizes the Cooley-Tukey algorithm consisting - * of <log2_size> stages. The basic operation is the butterfly. - * - * p --------------------------> P = p + q*Wi - * \ / - * \ / - * \ / - * \/ - * /\ - * / \ - * / \ - * ____ / \ - * q --| Wi |-----------------> Q = p - q*Wi - * ---- - * - * This routine also requires pre-computed twiddle values, W. W is an - * array of single precision complex numbers of length 1<<(log2_size-2) - * and is computed as follows: - * - * for (i=0; i<n/4; i++) - * W[i].real = cos(i * 2*PI/n); - * W[i].imag = -sin(i * 2*PI/n); - * } - * - * This array actually only contains the first half of the twiddle - * factors. Due for symmetry, the second half of the twiddle factors - * are implied and equal: - * - * for (i=0; i<n/4; i++) - * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n); - * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n); - * } - * - * Further symmetry allows one to generate the twiddle factor table - * using half the number of trig computations as follows: - * - * W[0].real = 1.0; - * W[0].imag = 0.0; - * for (i=1; i<n/4; i++) - * W[i].real = cos(i * 2*PI/n); - * W[n/4 - i].imag = -W[i].real; - * } - * - * The complex numbers are packed into quadwords as follows: - * - * quadword complex - * array element array elements - * ----------------------------------------------------- - * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 | - * ----------------------------------------------------- - * - */ - - -static __inline void _fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) -{ - int i, j, k; - int stage, offset; - int i_rev; - int n, n_2, n_4, n_8, n_16, n_3_16; - int w_stride, w_2stride, w_3stride, w_4stride; - int stride, stride_2, stride_4, stride_3_4; - vector float *W0, *W1, *W2, *W3; - vector float *re0, *re1, *re2, *re3; - vector float *im0, *im1, *im2, *im3; - vector float *in0, *in1, *in2, *in3, *in4, *in5, *in6, *in7; - vector float *out0, *out1, *out2, *out3; - vector float tmp0, tmp1; - vector float w0_re, w0_im, w1_re, w1_im; - vector float w0, w1, w2, w3; - vector float src_lo0, src_lo1, src_lo2, src_lo3; - vector float src_hi0, src_hi1, src_hi2, src_hi3; - vector float dst_lo0, dst_lo1, dst_lo2, dst_lo3; - vector float dst_hi0, dst_hi1, dst_hi2, dst_hi3; - vector float out_re_lo0, out_re_lo1, out_re_lo2, out_re_lo3; - vector float out_im_lo0, out_im_lo1, out_im_lo2, out_im_lo3; - vector float out_re_hi0, out_re_hi1, out_re_hi2, out_re_hi3; - vector float out_im_hi0, out_im_hi1, out_im_hi2, out_im_hi3; - vector float re_lo0, re_lo1, re_lo2, re_lo3; - vector float im_lo0, im_lo1, im_lo2, im_lo3; - vector float re_hi0, re_hi1, re_hi2, re_hi3; - vector float im_hi0, im_hi1, im_hi2, im_hi3; - vector float pq_lo0, pq_lo1, pq_lo2, pq_lo3; - vector float pq_hi0, pq_hi1, pq_hi2, pq_hi3; - vector float re[MAX_FFT_1D_SIZE/4], im[MAX_FFT_1D_SIZE/4]; /* real & imaginary working arrays */ - vector float ppmm = (vector float) { 1.0f, 1.0f, -1.0f, -1.0f}; - vector float pmmp = (vector float) { 1.0f, -1.0f, -1.0f, 1.0f}; - vector unsigned char reverse; - vector unsigned char shuf_lo = (vector unsigned char) { - 0, 1, 2, 3, 4, 5, 6, 7, - 16,17,18,19, 20,21,22,23}; - vector unsigned char shuf_hi = (vector unsigned char) { - 8, 9,10,11, 12,13,14,15, - 24,25,26,27, 28,29,30,31}; - vector unsigned char shuf_0202 = (vector unsigned char) { - 0, 1, 2, 3, 8, 9,10,11, - 0, 1, 2, 3, 8, 9,10,11}; - vector unsigned char shuf_1313 = (vector unsigned char) { - 4, 5, 6, 7, 12,13,14,15, - 4, 5, 6, 7, 12,13,14,15}; - vector unsigned char shuf_0303 = (vector unsigned char) { - 0, 1, 2, 3, 12,13,14,15, - 0, 1, 2, 3, 12,13,14,15}; - vector unsigned char shuf_1212 = (vector unsigned char) { - 4, 5, 6, 7, 8, 9,10,11, - 4, 5, 6, 7, 8, 9,10,11}; - vector unsigned char shuf_0415 = (vector unsigned char) { - 0, 1, 2, 3, 16,17,18,19, - 4, 5, 6, 7, 20,21,22,23}; - vector unsigned char shuf_2637 = (vector unsigned char) { - 8, 9,10,11, 24,25,26,27, - 12,13,14,15,28,29,30,31}; - vector unsigned char shuf_0246 = (vector unsigned char) { - 0, 1, 2, 3, 8, 9,10,11, - 16,17,18,19,24,25,26,27}; - vector unsigned char shuf_1357 = (vector unsigned char) { - 4, 5, 6, 7, 12,13,14,15, - 20,21,22,23,28,29,30,31}; - - n = 1 << log2_size; - n_2 = n >> 1; - n_4 = n >> 2; - n_8 = n >> 3; - n_16 = n >> 4; - - n_3_16 = n_8 + n_16; - - /* Compute a byte reverse shuffle pattern to be used to produce - * an address bit swap. - */ - reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size), - spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8, - 7, 6, 5, 4, 3, 2,1,0}), - log2_size-16)); - - /* Perform the first 3 stages of the FFT. These stages differs from - * other stages in that the inputs are unscrambled and the data is - * reformated into parallel arrays (ie, seperate real and imaginary - * arrays). The term "unscramble" means the bit address reverse the - * data array. In addition, the first three stages have simple twiddle - * weighting factors. - * stage 1: (1, 0) - * stage 2: (1, 0) and (0, -1) - * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707) - * - * The arrays are processed as two halves, simultaneously. The lo (first - * half) and hi (second half). This is done because the scramble - * shares source value between each half of the output arrays. - */ - i = 0; - i_rev = 0; - - in0 = in; - in1 = in + n_8; - in2 = in + n_16; - in3 = in + n_3_16; - - in4 = in + n_4; - in5 = in1 + n_4; - in6 = in2 + n_4; - in7 = in3 + n_4; - - re0 = re; - re1 = re + n_8; - im0 = im; - im1 = im + n_8; - - w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2}; - w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2}; - - do { - src_lo0 = in0[i_rev]; - src_lo1 = in1[i_rev]; - src_lo2 = in2[i_rev]; - src_lo3 = in3[i_rev]; - - src_hi0 = in4[i_rev]; - src_hi1 = in5[i_rev]; - src_hi2 = in6[i_rev]; - src_hi3 = in7[i_rev]; - - /* Perform scramble. - */ - dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo); - dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi); - dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo); - dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi); - dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo); - dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi); - dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo); - dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi); - - /* Perform the stage 1 butterfly. The multiplier constant, ppmm, - * is used to control the sign of the operands since a single - * quadword contains both of P and Q valule of the butterfly. - */ - pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8)); - pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8)); - pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8)); - pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8)); - pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8)); - pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8)); - pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8)); - pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8)); - - /* Perfrom the stage 2 butterfly. For this stage, the - * inputs pq are still interleaved (p.real, p.imag, q.real, - * q.imag), so we must first re-order the data into - * parallel arrays as well as perform the reorder - * associated with the twiddle W[n/4], which equals - * (0, -1). - * - * ie. (A, B) * (0, -1) => (B, -A) - */ - re_lo0 = spu_madd(ppmm, - spu_shuffle(pq_lo1, pq_lo1, shuf_0303), - spu_shuffle(pq_lo0, pq_lo0, shuf_0202)); - im_lo0 = spu_madd(pmmp, - spu_shuffle(pq_lo1, pq_lo1, shuf_1212), - spu_shuffle(pq_lo0, pq_lo0, shuf_1313)); - - re_lo1 = spu_madd(ppmm, - spu_shuffle(pq_lo3, pq_lo3, shuf_0303), - spu_shuffle(pq_lo2, pq_lo2, shuf_0202)); - im_lo1 = spu_madd(pmmp, - spu_shuffle(pq_lo3, pq_lo3, shuf_1212), - spu_shuffle(pq_lo2, pq_lo2, shuf_1313)); - - - re_hi0 = spu_madd(ppmm, - spu_shuffle(pq_hi1, pq_hi1, shuf_0303), - spu_shuffle(pq_hi0, pq_hi0, shuf_0202)); - im_hi0 = spu_madd(pmmp, - spu_shuffle(pq_hi1, pq_hi1, shuf_1212), - spu_shuffle(pq_hi0, pq_hi0, shuf_1313)); - - re_hi1 = spu_madd(ppmm, - spu_shuffle(pq_hi3, pq_hi3, shuf_0303), - spu_shuffle(pq_hi2, pq_hi2, shuf_0202)); - im_hi1 = spu_madd(pmmp, - spu_shuffle(pq_hi3, pq_hi3, shuf_1212), - spu_shuffle(pq_hi2, pq_hi2, shuf_1313)); - - - /* Perform stage 3 butterfly. - */ - FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); - FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); - - re0 += 2; - re1 += 2; - im0 += 2; - im1 += 2; - - i += 8; - i_rev = BIT_SWAP(i, reverse) / 2; - } while (i < n_2); - - /* Process stages 4 to log2_size-2 - */ - for (stage=4, stride=4; stage<log2_size-1; stage++, stride += stride) { - w_stride = n_2 >> stage; - w_2stride = n >> stage; - w_3stride = w_stride + w_2stride; - w_4stride = w_2stride + w_2stride; - - W0 = W; - W1 = W + w_stride; - W2 = W + w_2stride; - W3 = W + w_3stride; - - stride_2 = stride >> 1; - stride_4 = stride >> 2; - stride_3_4 = stride_2 + stride_4; - - re0 = re; im0 = im; - re1 = re + stride_2; im1 = im + stride_2; - re2 = re + stride_4; im2 = im + stride_4; - re3 = re + stride_3_4; im3 = im + stride_3_4; - - for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { - /* Compute the twiddle factors - */ - w0 = W0[offset]; - w1 = W1[offset]; - w2 = W2[offset]; - w3 = W3[offset]; - - tmp0 = spu_shuffle(w0, w2, shuf_0415); - tmp1 = spu_shuffle(w1, w3, shuf_0415); - - w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); - w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); - - j = i; - k = i + stride; - do { - re_lo0 = re0[j]; im_lo0 = im0[j]; - re_lo1 = re1[j]; im_lo1 = im1[j]; - - re_hi0 = re2[j]; im_hi0 = im2[j]; - re_hi1 = re3[j]; im_hi1 = im3[j]; - - re_lo2 = re0[k]; im_lo2 = im0[k]; - re_lo3 = re1[k]; im_lo3 = im1[k]; - - re_hi2 = re2[k]; im_hi2 = im2[k]; - re_hi3 = re3[k]; im_hi3 = im3[k]; - - FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); - FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); - - FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); - FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); - - j += 2 * stride; - k += 2 * stride; - } while (j < n_4); - } - } - - /* Process stage log2_size-1. This is identical to the stage processing above - * except for this stage the inner loop is only executed once so it is removed - * entirely. - */ - w_stride = n_2 >> stage; - w_2stride = n >> stage; - w_3stride = w_stride + w_2stride; - w_4stride = w_2stride + w_2stride; - - stride_2 = stride >> 1; - stride_4 = stride >> 2; - - stride_3_4 = stride_2 + stride_4; - - re0 = re; im0 = im; - re1 = re + stride_2; im1 = im + stride_2; - re2 = re + stride_4; im2 = im + stride_4; - re3 = re + stride_3_4; im3 = im + stride_3_4; - - for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { - /* Compute the twiddle factors - */ - w0 = W[offset]; - w1 = W[offset + w_stride]; - w2 = W[offset + w_2stride]; - w3 = W[offset + w_3stride]; - - tmp0 = spu_shuffle(w0, w2, shuf_0415); - tmp1 = spu_shuffle(w1, w3, shuf_0415); - - w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); - w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); - - j = i; - k = i + stride; - - re_lo0 = re0[j]; im_lo0 = im0[j]; - re_lo1 = re1[j]; im_lo1 = im1[j]; - - re_hi0 = re2[j]; im_hi0 = im2[j]; - re_hi1 = re3[j]; im_hi1 = im3[j]; - - re_lo2 = re0[k]; im_lo2 = im0[k]; - re_lo3 = re1[k]; im_lo3 = im1[k]; - - re_hi2 = re2[k]; im_hi2 = im2[k]; - re_hi3 = re3[k]; im_hi3 = im3[k]; - - FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); - FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); - - FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); - FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); - } - - - /* Process the final stage (stage log2_size). For this stage, - * reformat the data from parallel arrays back into - * interleaved arrays,storing the result into <in>. - * - * This loop has been manually unrolled by 2 to improve - * dual issue rates and reduce stalls. This unrolling - * forces a minimum FFT size of 32. - */ - re0 = re; - re1 = re + n_8; - re2 = re + n_16; - re3 = re + n_3_16; - - im0 = im; - im1 = im + n_8; - im2 = im + n_16; - im3 = im + n_3_16; - - out0 = out; - out1 = out + n_4; - out2 = out + n_8; - out3 = out1 + n_8; - - i = n_16; - - do { - /* Fetch the twiddle factors - */ - w0 = W[0]; - w1 = W[1]; - w2 = W[2]; - w3 = W[3]; - - W += 4; - - w0_re = spu_shuffle(w0, w1, shuf_0246); - w0_im = spu_shuffle(w0, w1, shuf_1357); - w1_re = spu_shuffle(w2, w3, shuf_0246); - w1_im = spu_shuffle(w2, w3, shuf_1357); - - /* Fetch the butterfly inputs, reals and imaginaries - */ - re_lo0 = re0[0]; im_lo0 = im0[0]; - re_lo1 = re1[0]; im_lo1 = im1[0]; - re_lo2 = re0[1]; im_lo2 = im0[1]; - re_lo3 = re1[1]; im_lo3 = im1[1]; - - re_hi0 = re2[0]; im_hi0 = im2[0]; - re_hi1 = re3[0]; im_hi1 = im3[0]; - re_hi2 = re2[1]; im_hi2 = im2[1]; - re_hi3 = re3[1]; im_hi3 = im3[1]; - - re0 += 2; im0 += 2; - re1 += 2; im1 += 2; - re2 += 2; im2 += 2; - re3 += 2; im3 += 2; - - /* Perform the butterflys - */ - FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); - FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im); - - FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); - FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im); - - /* Interleave the results and store them into the output buffers (ie, - * the original input buffers. - */ - out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415); - out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637); - out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415); - out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637); - - out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415); - out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637); - out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415); - out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637); - - out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415); - out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637); - out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415); - out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637); - - out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415); - out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637); - out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415); - out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637); - - out0 += 4; - out1 += 4; - out2 += 4; - out3 += 4; - - i -= 2; - } while (i); -} - -#endif /* _FFT_1D_R2_H_ */ |