GNU Radio 3.4.2 C++ API
|
00001 /* -------------------------------------------------------------- */ 00002 /* (C)Copyright 2001,2007, */ 00003 /* International Business Machines Corporation, */ 00004 /* Sony Computer Entertainment, Incorporated, */ 00005 /* Toshiba Corporation, */ 00006 /* */ 00007 /* All Rights Reserved. */ 00008 /* */ 00009 /* Redistribution and use in source and binary forms, with or */ 00010 /* without modification, are permitted provided that the */ 00011 /* following conditions are met: */ 00012 /* */ 00013 /* - Redistributions of source code must retain the above copyright*/ 00014 /* notice, this list of conditions and the following disclaimer. */ 00015 /* */ 00016 /* - Redistributions in binary form must reproduce the above */ 00017 /* copyright notice, this list of conditions and the following */ 00018 /* disclaimer in the documentation and/or other materials */ 00019 /* provided with the distribution. */ 00020 /* */ 00021 /* - Neither the name of IBM Corporation nor the names of its */ 00022 /* contributors may be used to endorse or promote products */ 00023 /* derived from this software without specific prior written */ 00024 /* permission. */ 00025 /* */ 00026 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 00027 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 00028 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 00029 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 00030 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */ 00031 /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ 00032 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */ 00033 /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */ 00034 /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ 00035 /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ 00036 /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ 00037 /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */ 00038 /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 00039 /* -------------------------------------------------------------- */ 00040 /* PROLOG END TAG zYx */ 00041 #ifndef _FFT_1D_R2_H_ 00042 #define _FFT_1D_R2_H_ 1 00043 00044 #include "fft_1d.h" 00045 00046 /* fft_1d_r2 00047 * --------- 00048 * Performs a single precision, complex Fast Fourier Transform using 00049 * the DFT (Discrete Fourier Transform) with radix-2 decimation in time. 00050 * The input <in> is an array of complex numbers of length (1<<log2_size) 00051 * entries. The result is returned in the array of complex numbers specified 00052 * by <out>. Note: This routine can support an in-place transformation 00053 * by specifying <in> and <out> to be the same array. 00054 * 00055 * This implementation utilizes the Cooley-Tukey algorithm consisting 00056 * of <log2_size> stages. The basic operation is the butterfly. 00057 * 00058 * p --------------------------> P = p + q*Wi 00059 * \ / 00060 * \ / 00061 * \ / 00062 * \/ 00063 * /\ 00064 * / \ 00065 * / \ 00066 * ____ / \ 00067 * q --| Wi |-----------------> Q = p - q*Wi 00068 * ---- 00069 * 00070 * This routine also requires pre-computed twiddle values, W. W is an 00071 * array of single precision complex numbers of length 1<<(log2_size-2) 00072 * and is computed as follows: 00073 * 00074 * for (i=0; i<n/4; i++) 00075 * W[i].real = cos(i * 2*PI/n); 00076 * W[i].imag = -sin(i * 2*PI/n); 00077 * } 00078 * 00079 * This array actually only contains the first half of the twiddle 00080 * factors. Due for symmetry, the second half of the twiddle factors 00081 * are implied and equal: 00082 * 00083 * for (i=0; i<n/4; i++) 00084 * W[i+n/4].real = W[i].imag = sin(i * 2*PI/n); 00085 * W[i+n/4].imag = -W[i].real = -cos(i * 2*PI/n); 00086 * } 00087 * 00088 * Further symmetry allows one to generate the twiddle factor table 00089 * using half the number of trig computations as follows: 00090 * 00091 * W[0].real = 1.0; 00092 * W[0].imag = 0.0; 00093 * for (i=1; i<n/4; i++) 00094 * W[i].real = cos(i * 2*PI/n); 00095 * W[n/4 - i].imag = -W[i].real; 00096 * } 00097 * 00098 * The complex numbers are packed into quadwords as follows: 00099 * 00100 * quadword complex 00101 * array element array elements 00102 * ----------------------------------------------------- 00103 * i | real 2*i | imag 2*i | real 2*i+1 | imag 2*i+1 | 00104 * ----------------------------------------------------- 00105 * 00106 */ 00107 00108 00109 static __inline void _fft_1d_r2(vector float *out, vector float *in, vector float *W, int log2_size) 00110 { 00111 int i, j, k; 00112 int stage, offset; 00113 int i_rev; 00114 int n, n_2, n_4, n_8, n_16, n_3_16; 00115 int w_stride, w_2stride, w_3stride, w_4stride; 00116 int stride, stride_2, stride_4, stride_3_4; 00117 vector float *W0, *W1, *W2, *W3; 00118 vector float *re0, *re1, *re2, *re3; 00119 vector float *im0, *im1, *im2, *im3; 00120 vector float *in0, *in1, *in2, *in3, *in4, *in5, *in6, *in7; 00121 vector float *out0, *out1, *out2, *out3; 00122 vector float tmp0, tmp1; 00123 vector float w0_re, w0_im, w1_re, w1_im; 00124 vector float w0, w1, w2, w3; 00125 vector float src_lo0, src_lo1, src_lo2, src_lo3; 00126 vector float src_hi0, src_hi1, src_hi2, src_hi3; 00127 vector float dst_lo0, dst_lo1, dst_lo2, dst_lo3; 00128 vector float dst_hi0, dst_hi1, dst_hi2, dst_hi3; 00129 vector float out_re_lo0, out_re_lo1, out_re_lo2, out_re_lo3; 00130 vector float out_im_lo0, out_im_lo1, out_im_lo2, out_im_lo3; 00131 vector float out_re_hi0, out_re_hi1, out_re_hi2, out_re_hi3; 00132 vector float out_im_hi0, out_im_hi1, out_im_hi2, out_im_hi3; 00133 vector float re_lo0, re_lo1, re_lo2, re_lo3; 00134 vector float im_lo0, im_lo1, im_lo2, im_lo3; 00135 vector float re_hi0, re_hi1, re_hi2, re_hi3; 00136 vector float im_hi0, im_hi1, im_hi2, im_hi3; 00137 vector float pq_lo0, pq_lo1, pq_lo2, pq_lo3; 00138 vector float pq_hi0, pq_hi1, pq_hi2, pq_hi3; 00139 vector float re[MAX_FFT_1D_SIZE/4], im[MAX_FFT_1D_SIZE/4]; /* real & imaginary working arrays */ 00140 vector float ppmm = (vector float) { 1.0f, 1.0f, -1.0f, -1.0f}; 00141 vector float pmmp = (vector float) { 1.0f, -1.0f, -1.0f, 1.0f}; 00142 vector unsigned char reverse; 00143 vector unsigned char shuf_lo = (vector unsigned char) { 00144 0, 1, 2, 3, 4, 5, 6, 7, 00145 16,17,18,19, 20,21,22,23}; 00146 vector unsigned char shuf_hi = (vector unsigned char) { 00147 8, 9,10,11, 12,13,14,15, 00148 24,25,26,27, 28,29,30,31}; 00149 vector unsigned char shuf_0202 = (vector unsigned char) { 00150 0, 1, 2, 3, 8, 9,10,11, 00151 0, 1, 2, 3, 8, 9,10,11}; 00152 vector unsigned char shuf_1313 = (vector unsigned char) { 00153 4, 5, 6, 7, 12,13,14,15, 00154 4, 5, 6, 7, 12,13,14,15}; 00155 vector unsigned char shuf_0303 = (vector unsigned char) { 00156 0, 1, 2, 3, 12,13,14,15, 00157 0, 1, 2, 3, 12,13,14,15}; 00158 vector unsigned char shuf_1212 = (vector unsigned char) { 00159 4, 5, 6, 7, 8, 9,10,11, 00160 4, 5, 6, 7, 8, 9,10,11}; 00161 vector unsigned char shuf_0415 = (vector unsigned char) { 00162 0, 1, 2, 3, 16,17,18,19, 00163 4, 5, 6, 7, 20,21,22,23}; 00164 vector unsigned char shuf_2637 = (vector unsigned char) { 00165 8, 9,10,11, 24,25,26,27, 00166 12,13,14,15,28,29,30,31}; 00167 vector unsigned char shuf_0246 = (vector unsigned char) { 00168 0, 1, 2, 3, 8, 9,10,11, 00169 16,17,18,19,24,25,26,27}; 00170 vector unsigned char shuf_1357 = (vector unsigned char) { 00171 4, 5, 6, 7, 12,13,14,15, 00172 20,21,22,23,28,29,30,31}; 00173 00174 n = 1 << log2_size; 00175 n_2 = n >> 1; 00176 n_4 = n >> 2; 00177 n_8 = n >> 3; 00178 n_16 = n >> 4; 00179 00180 n_3_16 = n_8 + n_16; 00181 00182 /* Compute a byte reverse shuffle pattern to be used to produce 00183 * an address bit swap. 00184 */ 00185 reverse = spu_or(spu_slqwbyte(spu_splats((unsigned char)0x80), log2_size), 00186 spu_rlmaskqwbyte(((vec_uchar16){15,14,13,12, 11,10,9,8, 00187 7, 6, 5, 4, 3, 2,1,0}), 00188 log2_size-16)); 00189 00190 /* Perform the first 3 stages of the FFT. These stages differs from 00191 * other stages in that the inputs are unscrambled and the data is 00192 * reformated into parallel arrays (ie, seperate real and imaginary 00193 * arrays). The term "unscramble" means the bit address reverse the 00194 * data array. In addition, the first three stages have simple twiddle 00195 * weighting factors. 00196 * stage 1: (1, 0) 00197 * stage 2: (1, 0) and (0, -1) 00198 * stage 3: (1, 0), (0.707, -0.707), (0, -1), (-0.707, -0.707) 00199 * 00200 * The arrays are processed as two halves, simultaneously. The lo (first 00201 * half) and hi (second half). This is done because the scramble 00202 * shares source value between each half of the output arrays. 00203 */ 00204 i = 0; 00205 i_rev = 0; 00206 00207 in0 = in; 00208 in1 = in + n_8; 00209 in2 = in + n_16; 00210 in3 = in + n_3_16; 00211 00212 in4 = in + n_4; 00213 in5 = in1 + n_4; 00214 in6 = in2 + n_4; 00215 in7 = in3 + n_4; 00216 00217 re0 = re; 00218 re1 = re + n_8; 00219 im0 = im; 00220 im1 = im + n_8; 00221 00222 w0_re = (vector float) { 1.0f, INV_SQRT_2, 0.0f, -INV_SQRT_2}; 00223 w0_im = (vector float) { 0.0f, -INV_SQRT_2, -1.0f, -INV_SQRT_2}; 00224 00225 do { 00226 src_lo0 = in0[i_rev]; 00227 src_lo1 = in1[i_rev]; 00228 src_lo2 = in2[i_rev]; 00229 src_lo3 = in3[i_rev]; 00230 00231 src_hi0 = in4[i_rev]; 00232 src_hi1 = in5[i_rev]; 00233 src_hi2 = in6[i_rev]; 00234 src_hi3 = in7[i_rev]; 00235 00236 /* Perform scramble. 00237 */ 00238 dst_lo0 = spu_shuffle(src_lo0, src_hi0, shuf_lo); 00239 dst_hi0 = spu_shuffle(src_lo0, src_hi0, shuf_hi); 00240 dst_lo1 = spu_shuffle(src_lo1, src_hi1, shuf_lo); 00241 dst_hi1 = spu_shuffle(src_lo1, src_hi1, shuf_hi); 00242 dst_lo2 = spu_shuffle(src_lo2, src_hi2, shuf_lo); 00243 dst_hi2 = spu_shuffle(src_lo2, src_hi2, shuf_hi); 00244 dst_lo3 = spu_shuffle(src_lo3, src_hi3, shuf_lo); 00245 dst_hi3 = spu_shuffle(src_lo3, src_hi3, shuf_hi); 00246 00247 /* Perform the stage 1 butterfly. The multiplier constant, ppmm, 00248 * is used to control the sign of the operands since a single 00249 * quadword contains both of P and Q valule of the butterfly. 00250 */ 00251 pq_lo0 = spu_madd(ppmm, dst_lo0, spu_rlqwbyte(dst_lo0, 8)); 00252 pq_hi0 = spu_madd(ppmm, dst_hi0, spu_rlqwbyte(dst_hi0, 8)); 00253 pq_lo1 = spu_madd(ppmm, dst_lo1, spu_rlqwbyte(dst_lo1, 8)); 00254 pq_hi1 = spu_madd(ppmm, dst_hi1, spu_rlqwbyte(dst_hi1, 8)); 00255 pq_lo2 = spu_madd(ppmm, dst_lo2, spu_rlqwbyte(dst_lo2, 8)); 00256 pq_hi2 = spu_madd(ppmm, dst_hi2, spu_rlqwbyte(dst_hi2, 8)); 00257 pq_lo3 = spu_madd(ppmm, dst_lo3, spu_rlqwbyte(dst_lo3, 8)); 00258 pq_hi3 = spu_madd(ppmm, dst_hi3, spu_rlqwbyte(dst_hi3, 8)); 00259 00260 /* Perfrom the stage 2 butterfly. For this stage, the 00261 * inputs pq are still interleaved (p.real, p.imag, q.real, 00262 * q.imag), so we must first re-order the data into 00263 * parallel arrays as well as perform the reorder 00264 * associated with the twiddle W[n/4], which equals 00265 * (0, -1). 00266 * 00267 * ie. (A, B) * (0, -1) => (B, -A) 00268 */ 00269 re_lo0 = spu_madd(ppmm, 00270 spu_shuffle(pq_lo1, pq_lo1, shuf_0303), 00271 spu_shuffle(pq_lo0, pq_lo0, shuf_0202)); 00272 im_lo0 = spu_madd(pmmp, 00273 spu_shuffle(pq_lo1, pq_lo1, shuf_1212), 00274 spu_shuffle(pq_lo0, pq_lo0, shuf_1313)); 00275 00276 re_lo1 = spu_madd(ppmm, 00277 spu_shuffle(pq_lo3, pq_lo3, shuf_0303), 00278 spu_shuffle(pq_lo2, pq_lo2, shuf_0202)); 00279 im_lo1 = spu_madd(pmmp, 00280 spu_shuffle(pq_lo3, pq_lo3, shuf_1212), 00281 spu_shuffle(pq_lo2, pq_lo2, shuf_1313)); 00282 00283 00284 re_hi0 = spu_madd(ppmm, 00285 spu_shuffle(pq_hi1, pq_hi1, shuf_0303), 00286 spu_shuffle(pq_hi0, pq_hi0, shuf_0202)); 00287 im_hi0 = spu_madd(pmmp, 00288 spu_shuffle(pq_hi1, pq_hi1, shuf_1212), 00289 spu_shuffle(pq_hi0, pq_hi0, shuf_1313)); 00290 00291 re_hi1 = spu_madd(ppmm, 00292 spu_shuffle(pq_hi3, pq_hi3, shuf_0303), 00293 spu_shuffle(pq_hi2, pq_hi2, shuf_0202)); 00294 im_hi1 = spu_madd(pmmp, 00295 spu_shuffle(pq_hi3, pq_hi3, shuf_1212), 00296 spu_shuffle(pq_hi2, pq_hi2, shuf_1313)); 00297 00298 00299 /* Perform stage 3 butterfly. 00300 */ 00301 FFT_1D_BUTTERFLY(re0[0], im0[0], re0[1], im0[1], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); 00302 FFT_1D_BUTTERFLY(re1[0], im1[0], re1[1], im1[1], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); 00303 00304 re0 += 2; 00305 re1 += 2; 00306 im0 += 2; 00307 im1 += 2; 00308 00309 i += 8; 00310 i_rev = BIT_SWAP(i, reverse) / 2; 00311 } while (i < n_2); 00312 00313 /* Process stages 4 to log2_size-2 00314 */ 00315 for (stage=4, stride=4; stage<log2_size-1; stage++, stride += stride) { 00316 w_stride = n_2 >> stage; 00317 w_2stride = n >> stage; 00318 w_3stride = w_stride + w_2stride; 00319 w_4stride = w_2stride + w_2stride; 00320 00321 W0 = W; 00322 W1 = W + w_stride; 00323 W2 = W + w_2stride; 00324 W3 = W + w_3stride; 00325 00326 stride_2 = stride >> 1; 00327 stride_4 = stride >> 2; 00328 stride_3_4 = stride_2 + stride_4; 00329 00330 re0 = re; im0 = im; 00331 re1 = re + stride_2; im1 = im + stride_2; 00332 re2 = re + stride_4; im2 = im + stride_4; 00333 re3 = re + stride_3_4; im3 = im + stride_3_4; 00334 00335 for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { 00336 /* Compute the twiddle factors 00337 */ 00338 w0 = W0[offset]; 00339 w1 = W1[offset]; 00340 w2 = W2[offset]; 00341 w3 = W3[offset]; 00342 00343 tmp0 = spu_shuffle(w0, w2, shuf_0415); 00344 tmp1 = spu_shuffle(w1, w3, shuf_0415); 00345 00346 w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); 00347 w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); 00348 00349 j = i; 00350 k = i + stride; 00351 do { 00352 re_lo0 = re0[j]; im_lo0 = im0[j]; 00353 re_lo1 = re1[j]; im_lo1 = im1[j]; 00354 00355 re_hi0 = re2[j]; im_hi0 = im2[j]; 00356 re_hi1 = re3[j]; im_hi1 = im3[j]; 00357 00358 re_lo2 = re0[k]; im_lo2 = im0[k]; 00359 re_lo3 = re1[k]; im_lo3 = im1[k]; 00360 00361 re_hi2 = re2[k]; im_hi2 = im2[k]; 00362 re_hi3 = re3[k]; im_hi3 = im3[k]; 00363 00364 FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); 00365 FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); 00366 00367 FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); 00368 FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); 00369 00370 j += 2 * stride; 00371 k += 2 * stride; 00372 } while (j < n_4); 00373 } 00374 } 00375 00376 /* Process stage log2_size-1. This is identical to the stage processing above 00377 * except for this stage the inner loop is only executed once so it is removed 00378 * entirely. 00379 */ 00380 w_stride = n_2 >> stage; 00381 w_2stride = n >> stage; 00382 w_3stride = w_stride + w_2stride; 00383 w_4stride = w_2stride + w_2stride; 00384 00385 stride_2 = stride >> 1; 00386 stride_4 = stride >> 2; 00387 00388 stride_3_4 = stride_2 + stride_4; 00389 00390 re0 = re; im0 = im; 00391 re1 = re + stride_2; im1 = im + stride_2; 00392 re2 = re + stride_4; im2 = im + stride_4; 00393 re3 = re + stride_3_4; im3 = im + stride_3_4; 00394 00395 for (i=0, offset=0; i<stride_4; i++, offset += w_4stride) { 00396 /* Compute the twiddle factors 00397 */ 00398 w0 = W[offset]; 00399 w1 = W[offset + w_stride]; 00400 w2 = W[offset + w_2stride]; 00401 w3 = W[offset + w_3stride]; 00402 00403 tmp0 = spu_shuffle(w0, w2, shuf_0415); 00404 tmp1 = spu_shuffle(w1, w3, shuf_0415); 00405 00406 w0_re = spu_shuffle(tmp0, tmp1, shuf_0415); 00407 w0_im = spu_shuffle(tmp0, tmp1, shuf_2637); 00408 00409 j = i; 00410 k = i + stride; 00411 00412 re_lo0 = re0[j]; im_lo0 = im0[j]; 00413 re_lo1 = re1[j]; im_lo1 = im1[j]; 00414 00415 re_hi0 = re2[j]; im_hi0 = im2[j]; 00416 re_hi1 = re3[j]; im_hi1 = im3[j]; 00417 00418 re_lo2 = re0[k]; im_lo2 = im0[k]; 00419 re_lo3 = re1[k]; im_lo3 = im1[k]; 00420 00421 re_hi2 = re2[k]; im_hi2 = im2[k]; 00422 re_hi3 = re3[k]; im_hi3 = im3[k]; 00423 00424 FFT_1D_BUTTERFLY (re0[j], im0[j], re1[j], im1[j], re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); 00425 FFT_1D_BUTTERFLY_HI(re2[j], im2[j], re3[j], im3[j], re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); 00426 00427 FFT_1D_BUTTERFLY (re0[k], im0[k], re1[k], im1[k], re_lo2, im_lo2, re_lo3, im_lo3, w0_re, w0_im); 00428 FFT_1D_BUTTERFLY_HI(re2[k], im2[k], re3[k], im3[k], re_hi2, im_hi2, re_hi3, im_hi3, w0_re, w0_im); 00429 } 00430 00431 00432 /* Process the final stage (stage log2_size). For this stage, 00433 * reformat the data from parallel arrays back into 00434 * interleaved arrays,storing the result into <in>. 00435 * 00436 * This loop has been manually unrolled by 2 to improve 00437 * dual issue rates and reduce stalls. This unrolling 00438 * forces a minimum FFT size of 32. 00439 */ 00440 re0 = re; 00441 re1 = re + n_8; 00442 re2 = re + n_16; 00443 re3 = re + n_3_16; 00444 00445 im0 = im; 00446 im1 = im + n_8; 00447 im2 = im + n_16; 00448 im3 = im + n_3_16; 00449 00450 out0 = out; 00451 out1 = out + n_4; 00452 out2 = out + n_8; 00453 out3 = out1 + n_8; 00454 00455 i = n_16; 00456 00457 do { 00458 /* Fetch the twiddle factors 00459 */ 00460 w0 = W[0]; 00461 w1 = W[1]; 00462 w2 = W[2]; 00463 w3 = W[3]; 00464 00465 W += 4; 00466 00467 w0_re = spu_shuffle(w0, w1, shuf_0246); 00468 w0_im = spu_shuffle(w0, w1, shuf_1357); 00469 w1_re = spu_shuffle(w2, w3, shuf_0246); 00470 w1_im = spu_shuffle(w2, w3, shuf_1357); 00471 00472 /* Fetch the butterfly inputs, reals and imaginaries 00473 */ 00474 re_lo0 = re0[0]; im_lo0 = im0[0]; 00475 re_lo1 = re1[0]; im_lo1 = im1[0]; 00476 re_lo2 = re0[1]; im_lo2 = im0[1]; 00477 re_lo3 = re1[1]; im_lo3 = im1[1]; 00478 00479 re_hi0 = re2[0]; im_hi0 = im2[0]; 00480 re_hi1 = re3[0]; im_hi1 = im3[0]; 00481 re_hi2 = re2[1]; im_hi2 = im2[1]; 00482 re_hi3 = re3[1]; im_hi3 = im3[1]; 00483 00484 re0 += 2; im0 += 2; 00485 re1 += 2; im1 += 2; 00486 re2 += 2; im2 += 2; 00487 re3 += 2; im3 += 2; 00488 00489 /* Perform the butterflys 00490 */ 00491 FFT_1D_BUTTERFLY (out_re_lo0, out_im_lo0, out_re_lo1, out_im_lo1, re_lo0, im_lo0, re_lo1, im_lo1, w0_re, w0_im); 00492 FFT_1D_BUTTERFLY (out_re_lo2, out_im_lo2, out_re_lo3, out_im_lo3, re_lo2, im_lo2, re_lo3, im_lo3, w1_re, w1_im); 00493 00494 FFT_1D_BUTTERFLY_HI(out_re_hi0, out_im_hi0, out_re_hi1, out_im_hi1, re_hi0, im_hi0, re_hi1, im_hi1, w0_re, w0_im); 00495 FFT_1D_BUTTERFLY_HI(out_re_hi2, out_im_hi2, out_re_hi3, out_im_hi3, re_hi2, im_hi2, re_hi3, im_hi3, w1_re, w1_im); 00496 00497 /* Interleave the results and store them into the output buffers (ie, 00498 * the original input buffers. 00499 */ 00500 out0[0] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_0415); 00501 out0[1] = spu_shuffle(out_re_lo0, out_im_lo0, shuf_2637); 00502 out0[2] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_0415); 00503 out0[3] = spu_shuffle(out_re_lo2, out_im_lo2, shuf_2637); 00504 00505 out1[0] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_0415); 00506 out1[1] = spu_shuffle(out_re_lo1, out_im_lo1, shuf_2637); 00507 out1[2] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_0415); 00508 out1[3] = spu_shuffle(out_re_lo3, out_im_lo3, shuf_2637); 00509 00510 out2[0] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_0415); 00511 out2[1] = spu_shuffle(out_re_hi0, out_im_hi0, shuf_2637); 00512 out2[2] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_0415); 00513 out2[3] = spu_shuffle(out_re_hi2, out_im_hi2, shuf_2637); 00514 00515 out3[0] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_0415); 00516 out3[1] = spu_shuffle(out_re_hi1, out_im_hi1, shuf_2637); 00517 out3[2] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_0415); 00518 out3[3] = spu_shuffle(out_re_hi3, out_im_hi3, shuf_2637); 00519 00520 out0 += 4; 00521 out1 += 4; 00522 out2 += 4; 00523 out3 += 4; 00524 00525 i -= 2; 00526 } while (i); 00527 } 00528 00529 #endif /* _FFT_1D_R2_H_ */