Changeset 8968
- Timestamp:
- 07/21/08 21:43:45
- Files:
-
- gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am (modified) (2 diffs)
- gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_altivec.c (added)
- gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_altivec.h (added)
- gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc (modified) (7 diffs)
- gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/general/gr_math.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
r8961 r8968 171 171 gr_fir_sysconfig_powerpc.cc \ 172 172 gr_cpu_powerpc.cc \ 173 gr_fir_fff_vmx.cc 173 gr_fir_fff_vmx.cc \ 174 gr_altivec.c 174 175 175 176 powerpc_qa_CODE = \ … … 263 264 float_dotprod_x86.h \ 264 265 gr_adaptive_fir_ccf.h \ 266 gr_altivec.h \ 265 267 gr_cma_equalizer_cc.h \ 266 268 gr_cpu.h \ gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
r8964 r8968 24 24 #endif 25 25 #include <gr_fir_fff_vmx.h> 26 #include <altivec.h>27 26 #include <stdlib.h> 28 27 #include <stdexcept> 29 28 #include <assert.h> 30 31 32 static const size_t VS = sizeof(vector float); 33 static const size_t FLOATS_PER_VEC = 4; 34 35 union v_float_u { 36 vector float v; 37 float f[FLOATS_PER_VEC]; 38 }; 39 40 void 41 print_vector_float(FILE *fp, vector float v) 42 { 43 v_float_u u; 44 u.v = v; 45 fprintf(fp, "{ %f, %f, %f, %f }", u.f[0], u.f[1], u.f[2], u.f[3]); 46 } 47 48 void 49 print_vector_float_nl(FILE *fp, vector float v) 50 { 51 print_vector_float(fp, v); 52 putc('\n', fp); 53 } 54 55 void 56 pvf(FILE *fp, const char *name, vector float v) 57 { 58 fprintf(fp, "%s = ", name); 59 print_vector_float_nl(fp, v); 60 } 61 62 static float 63 horizontal_add_f(vector float v) 64 { 65 v_float_u u; 66 vector float t0 = vec_add(v, vec_sld(v, v, 8)); 67 vector float t1 = vec_add(t0, vec_sld(t0, t0, 4)); 68 u.v = t1; 69 return u.f[0]; 70 } 71 72 static inline size_t 73 gr_round_down(size_t x, size_t pow2) 74 { 75 return x & -pow2; 76 } 77 78 static inline size_t 79 gr_round_up(size_t x, size_t pow2) 80 { 81 return gr_round_down(x + pow2 - 1, pow2); 82 } 83 84 static inline size_t 85 gr_modulo(size_t x, size_t pow2) 86 { 87 return x & (pow2 - 1); 88 } 89 90 static inline size_t 91 gr_modulo_neg(size_t x, size_t pow2) 92 { 93 return pow2 - gr_modulo(x, pow2); 94 } 29 #include <gr_math.h> 30 #include <gr_altivec.h> 31 32 extern "C" { 95 33 96 34 #if 0 … … 106 44 } 107 45 108 #elif 0109 /*110 * preconditions:111 *112 * n > 0 and a multiple of 4113 * a 4-byte aligned114 * b 4-byte aligned115 */116 float117 dotprod_fff_vmx(const float *a, const float *b, size_t n)118 {119 static const size_t FLOATS_PER_LOOP = 1 * FLOATS_PER_VEC;120 121 size_t loop_cnt = n / FLOATS_PER_LOOP;122 vector float acc0 = {0, 0, 0, 0};123 124 vector unsigned char lvsl_a;125 vector float msq_a0; // most significant quadword126 vector float lsq_a0; // least significant quadword127 128 vector unsigned char lvsl_b;129 vector float msq_b0; // most significant quadword130 vector float lsq_b0; // least significant quadword131 132 lvsl_a = vec_lvsl(0, a);133 msq_a0 = vec_ld(0, a);134 a += FLOATS_PER_VEC;135 136 lvsl_b = vec_lvsl(0, b);137 msq_b0 = vec_ld(0, b);138 b += FLOATS_PER_VEC;139 140 for (size_t i = 0; i < loop_cnt; i++){141 lsq_a0 = vec_ld(0, a);142 lsq_b0 = vec_ld(0, b);143 a += FLOATS_PER_VEC;144 b += FLOATS_PER_VEC;145 146 vector float va = vec_perm(msq_a0, lsq_a0, lvsl_a);147 msq_a0 = lsq_a0;148 149 vector float vb = vec_perm(msq_b0, lsq_b0, lvsl_b);150 msq_b0 = lsq_b0;151 152 acc0 = vec_madd(va, vb, acc0);153 }154 155 return horizontal_add_f(acc0);156 }157 158 46 #else 159 /*47 /* 160 48 * preconditions: 161 49 * … … 172 60 static const size_t UNROLL_CNT = 4; 173 61 62 n = gr_p2_round_down(n, 4); 174 63 size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC); 175 64 size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC); … … 190 79 // wind in 191 80 192 p0 = vec_ld(0*VS, a); 193 p1 = vec_ld(1*VS, a); 194 p2 = vec_ld(2*VS, a); 195 p3 = vec_ld(3*VS, a); 81 register int r0vs = 0 * VS; 82 register int r1vs = 1 * VS; 83 register int r2vs = 2 * VS; 84 register int r3vs = 3 * VS; 85 86 p0 = vec_ld(r0vs, a); 87 p1 = vec_ld(r1vs, a); 88 p2 = vec_ld(r2vs, a); 89 p3 = vec_ld(r3vs, a); 90 a += UNROLL_CNT; 196 91 197 92 a0 = vec_perm(p0, p1, lvsl_a); 198 b0 = vec_ld( 0*VS, b);199 p0 = vec_ld( (UNROLL_CNT + 0)*VS, a);93 b0 = vec_ld(r0vs, b); 94 p0 = vec_ld(r0vs, a); 200 95 201 96 for (size_t i = 0; i < loop_cnt; i++){ 202 97 203 98 a1 = vec_perm(p1, p2, lvsl_a); 204 b1 = vec_ld( 1*VS, b);205 p1 = vec_ld( (UNROLL_CNT + 1)*VS, a);99 b1 = vec_ld(r1vs, b); 100 p1 = vec_ld(r1vs, a); 206 101 acc0 = vec_madd(a0, b0, acc0); 207 102 208 103 a2 = vec_perm(p2, p3, lvsl_a); 209 b2 = vec_ld( 2*VS, b);210 p2 = vec_ld( (UNROLL_CNT + 2)*VS, a);104 b2 = vec_ld(r2vs, b); 105 p2 = vec_ld(r2vs, a); 211 106 acc1 = vec_madd(a1, b1, acc1); 212 107 213 108 a3 = vec_perm(p3, p0, lvsl_a); 214 b3 = vec_ld( 3*VS, b);215 p3 = vec_ld( (UNROLL_CNT + 3)*VS, a);109 b3 = vec_ld(r3vs, b); 110 p3 = vec_ld(r3vs, a); 216 111 acc2 = vec_madd(a2, b2, acc2); 217 112 … … 220 115 221 116 a0 = vec_perm(p0, p1, lvsl_a); 222 b0 = vec_ld( 0*VS, b);223 p0 = vec_ld( (UNROLL_CNT + 0)*VS, a);117 b0 = vec_ld(r0vs, b); 118 p0 = vec_ld(r0vs, a); 224 119 acc3 = vec_madd(a3, b3, acc3); 225 120 } 226 121 227 assert((nleft % 4) == 0); 228 229 switch (nleft/4){ 122 /* 123 * The compiler ought to be able to figure out that 0, 4, 8 and 12 124 * are the only possible values for nleft. 125 */ 126 switch (nleft){ 230 127 case 0: 231 128 break; 232 129 233 case 1:234 acc0 = vec_madd(a0, b0, acc0); 235 break; 236 237 case 2:130 case 4: 131 acc0 = vec_madd(a0, b0, acc0); 132 break; 133 134 case 8: 238 135 a1 = vec_perm(p1, p2, lvsl_a); 239 b1 = vec_ld( 1*VS, b);136 b1 = vec_ld(r1vs, b); 240 137 acc0 = vec_madd(a0, b0, acc0); 241 138 acc1 = vec_madd(a1, b1, acc1); 242 139 break; 243 140 244 case 3:141 case 12: 245 142 a1 = vec_perm(p1, p2, lvsl_a); 246 b1 = vec_ld( 1*VS, b);143 b1 = vec_ld(r1vs, b); 247 144 acc0 = vec_madd(a0, b0, acc0); 248 145 a2 = vec_perm(p2, p3, lvsl_a); 249 b2 = vec_ld( 2*VS, b);146 b2 = vec_ld(r2vs, b); 250 147 acc1 = vec_madd(a1, b1, acc1); 251 148 acc2 = vec_madd(a2, b2, acc2); 252 break;253 254 default:255 assert(0);256 149 break; 257 150 } … … 265 158 266 159 #endif 160 } 267 161 268 162 gr_fir_fff_vmx::gr_fir_fff_vmx() … … 291 185 { 292 186 gr_fir_fff_generic::set_taps(inew_taps); // call superclass 293 d_naligned_taps = gr_ round_up(ntaps(), FLOATS_PER_VEC);187 d_naligned_taps = gr_p2_round_up(ntaps(), FLOATS_PER_VEC); 294 188 295 189 if (d_aligned_taps){ gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/general/gr_math.h
r8244 r8968 175 175 } 176 176 177 /*! 178 * \param x any value 179 * \param pow2 must be a power of 2 180 * \returns \p x rounded down to a multiple of \p pow2. 181 */ 182 static inline size_t 183 gr_p2_round_down(size_t x, size_t pow2) 184 { 185 return x & -pow2; 186 } 187 188 /*! 189 * \param x any value 190 * \param pow2 must be a power of 2 191 * \returns \p x rounded up to a multiple of \p pow2. 192 */ 193 static inline size_t 194 gr_p2_round_up(size_t x, size_t pow2) 195 { 196 return gr_p2_round_down(x + pow2 - 1, pow2); 197 } 198 199 /*! 200 * \param x any value 201 * \param pow2 must be a power of 2 202 * \returns \p x modulo \p pow2. 203 */ 204 static inline size_t 205 gr_p2_modulo(size_t x, size_t pow2) 206 { 207 return x & (pow2 - 1); 208 } 209 210 /*! 211 * \param x any value 212 * \param pow2 must be a power of 2 213 * \returns \p pow2 - (\p x modulo \p pow2). 214 */ 215 static inline size_t 216 gr_p2_modulo_neg(size_t x, size_t pow2) 217 { 218 return pow2 - gr_p2_modulo(x, pow2); 219 } 220 177 221 #endif /* _GR_MATH_H_ */
