GNU Radio Manual and C++ API Reference  3.7.2.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
gc_spu_macs.h
Go to the documentation of this file.
1 /* -*- asm -*- */
2 /*
3  * Copyright 2008 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #ifndef INCLUDED_GC_SPU_MACS_H
23 #define INCLUDED_GC_SPU_MACS_H
24 
25 /*
26  * This file contains a set of macros that are generally useful when
27  * coding in SPU assembler
28  *
29  * Note that the multi-instruction macros in here may overwrite
30  * registers 77, 78, and 79 without warning.
31  */
32 
33 /*
34  * defines for all registers
35  */
36 #define r0 $0
37 #define r1 $1
38 #define r2 $2
39 #define r3 $3
40 #define r4 $4
41 #define r5 $5
42 #define r6 $6
43 #define r7 $7
44 #define r8 $8
45 #define r9 $9
46 #define r10 $10
47 #define r11 $11
48 #define r12 $12
49 #define r13 $13
50 #define r14 $14
51 #define r15 $15
52 #define r16 $16
53 #define r17 $17
54 #define r18 $18
55 #define r19 $19
56 #define r20 $20
57 #define r21 $21
58 #define r22 $22
59 #define r23 $23
60 #define r24 $24
61 #define r25 $25
62 #define r26 $26
63 #define r27 $27
64 #define r28 $28
65 #define r29 $29
66 #define r30 $30
67 #define r31 $31
68 #define r32 $32
69 #define r33 $33
70 #define r34 $34
71 #define r35 $35
72 #define r36 $36
73 #define r37 $37
74 #define r38 $38
75 #define r39 $39
76 #define r40 $40
77 #define r41 $41
78 #define r42 $42
79 #define r43 $43
80 #define r44 $44
81 #define r45 $45
82 #define r46 $46
83 #define r47 $47
84 #define r48 $48
85 #define r49 $49
86 #define r50 $50
87 #define r51 $51
88 #define r52 $52
89 #define r53 $53
90 #define r54 $54
91 #define r55 $55
92 #define r56 $56
93 #define r57 $57
94 #define r58 $58
95 #define r59 $59
96 #define r60 $60
97 #define r61 $61
98 #define r62 $62
99 #define r63 $63
100 #define r64 $64
101 #define r65 $65
102 #define r66 $66
103 #define r67 $67
104 #define r68 $68
105 #define r69 $69
106 #define r70 $70
107 #define r71 $71
108 #define r72 $72
109 #define r73 $73
110 #define r74 $74
111 #define r75 $75
112 #define r76 $76
113 #define r77 $77
114 #define r78 $78
115 #define r79 $79
116 #define r80 $80
117 #define r81 $81
118 #define r82 $82
119 #define r83 $83
120 #define r84 $84
121 #define r85 $85
122 #define r86 $86
123 #define r87 $87
124 #define r88 $88
125 #define r89 $89
126 #define r90 $90
127 #define r91 $91
128 #define r92 $92
129 #define r93 $93
130 #define r94 $94
131 #define r95 $95
132 #define r96 $96
133 #define r97 $97
134 #define r98 $98
135 #define r99 $99
136 #define r100 $100
137 #define r101 $101
138 #define r102 $102
139 #define r103 $103
140 #define r104 $104
141 #define r105 $105
142 #define r106 $106
143 #define r107 $107
144 #define r108 $108
145 #define r109 $109
146 #define r110 $110
147 #define r111 $111
148 #define r112 $112
149 #define r113 $113
150 #define r114 $114
151 #define r115 $115
152 #define r116 $116
153 #define r117 $117
154 #define r118 $118
155 #define r119 $119
156 #define r120 $120
157 #define r121 $121
158 #define r122 $122
159 #define r123 $123
160 #define r124 $124
161 #define r125 $125
162 #define r126 $126
163 #define r127 $127
164 
165 
166 #define lr r0 // link register
167 #define sp r1 // stack pointer
168  // r2 is environment pointer for langs that need it (ALGOL)
169 
170 #define retval r3 // return values are passed in regs starting at r3
171 
172 #define arg1 r3 // args are passed in regs starting at r3
173 #define arg2 r4
174 #define arg3 r5
175 #define arg4 r6
176 #define arg5 r7
177 #define arg6 r8
178 #define arg7 r9
179 #define arg8 r10
180 #define arg9 r11
181 #define arg10 r12
182 
183 // r3 - r74 are volatile (caller saves)
184 // r74 - r79 are volatile (scratch regs possibly destroyed by fct prolog/epilog)
185 // r80 - r127 are non-volatile (caller-saves)
186 
187 // scratch registers reserved for use by the macros in this file.
188 
189 #define _gc_t0 r79
190 #define _gc_t1 r78
191 #define _gc_t2 r77
192 
193 /*
194  * ----------------------------------------------------------------
195  * pseudo ops
196  * ----------------------------------------------------------------
197  */
198 #define PROC_ENTRY(name) \
199  .text; \
200  .p2align 4; \
201  .global name; \
202  .type name, @function; \
203 name:
204 
205 /*
206  * ----------------------------------------------------------------
207  * aliases for common operations
208  * ----------------------------------------------------------------
209  */
210 
211 // Move register (even pipe, 2 cycles)
212 #define MR(rt, ra) or rt, ra, ra;
213 
214 // Move register (odd pipe, 4 cycles)
215 #define LMR(rt, ra) rotqbyi rt, ra, 0;
216 
217 // return
218 #define RETURN() bi lr;
219 
220 // hint for a return
221 #define HINT_RETURN(ret_label) hbr ret_label, lr;
222 
223 // return if zero
224 #define BRZ_RETURN(rt) biz rt, lr;
225 
226 // return if not zero
227 #define BRNZ_RETURN(rt) binz rt, lr;
228 
229 // return if halfword zero
230 #define BRHZ_RETURN(rt) bihz rt, lr;
231 
232 // return if halfword not zero
233 #define BRHNZ_RETURN(rt) bihnz rt, lr;
234 
235 
236 /*
237  * ----------------------------------------------------------------
238  * modulo like things for constant moduli that are powers of 2
239  * ----------------------------------------------------------------
240  */
241 
242 // rt = ra & (pow2 - 1)
243 #define MODULO(rt, ra, pow2) \
244  andi rt, ra, (pow2)-1;
245 
246 // rt = pow2 - (ra & (pow2 - 1))
247 #define MODULO_NEG(rt, ra, pow2) \
248  andi rt, ra, (pow2)-1; \
249  sfi rt, rt, (pow2);
250 
251 // rt = ra & -(pow2)
252 #define ROUND_DOWN(rt, ra, pow2) \
253  andi rt, ra, -(pow2);
254 
255 // rt = (ra + (pow2 - 1)) & -(pow2)
256 #define ROUND_UP(rt, ra, pow2) \
257  ai rt, ra, (pow2)-1; \
258  andi rt, rt, -(pow2);
259 
260 /*
261  * ----------------------------------------------------------------
262  * Splat - replicate a particular slot into all slots
263  * Altivec analogs...
264  * ----------------------------------------------------------------
265  */
266 
267 // replicate byte from slot s [0,15]
268 #define VSPLTB(rt, ra, s) \
269  ilh _gc_t0, (s)*0x0101; \
270  shufb rt, ra, ra, _gc_t0;
271 
272 // replicate halfword from slot s [0,7]
273 #define VSPLTH(rt, ra, s) \
274  ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \
275  shufb rt, ra, ra, _gc_t0;
276 
277 // replicate word from slot s [0,3]
278 #define VSPLTW(rt, ra, s) \
279  iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \
280  iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \
281  shufb rt, ra, ra, _gc_t0;
282 
283 // replicate double from slot s [0,1]
284 #define VSPLTD(rt, ra, s) \
285  /* sp is always 16-byte aligned */ \
286  cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203 04050607 */ \
287  rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot */ \
288  shufb rt, rt, rt, _gc_t0;
289 
290 /*
291  * ----------------------------------------------------------------
292  * lots of min/max variations...
293  *
294  * On a slot by slot basis, compute the min or max
295  *
296  * U - unsigned, else signed
297  * B,H,{} - byte, halfword, word
298  * F float
299  * ----------------------------------------------------------------
300  */
301 
302 #define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc;
303 #define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc;
304 
305  // words
306 
307 #define MIN(rt, ra, rb) \
308  cgt _gc_t0, ra, rb; \
309  MIN_SELB(rt, ra, rb, _gc_t0)
310 
311 #define MAX(rt, ra, rb) \
312  cgt _gc_t0, ra, rb; \
313  MAX_SELB(rt, ra, rb, _gc_t0)
314 
315 #define UMIN(rt, ra, rb) \
316  clgt _gc_t0, ra, rb; \
317  MIN_SELB(rt, ra, rb, _gc_t0)
318 
319 #define UMAX(rt, ra, rb) \
320  clgt _gc_t0, ra, rb; \
321  MAX_SELB(rt, ra, rb, _gc_t0)
322 
323  // bytes
324 
325 #define MINB(rt, ra, rb) \
326  cgtb _gc_t0, ra, rb; \
327  MIN_SELB(rt, ra, rb, _gc_t0)
328 
329 #define MAXB(rt, ra, rb) \
330  cgtb _gc_t0, ra, rb; \
331  MAX_SELB(rt, ra, rb, _gc_t0)
332 
333 #define UMINB(rt, ra, rb) \
334  clgtb _gc_t0, ra, rb; \
335  MIN_SELB(rt, ra, rb, _gc_t0)
336 
337 #define UMAXB(rt, ra, rb) \
338  clgtb _gc_t0, ra, rb; \
339  MAX_SELB(rt, ra, rb, _gc_t0)
340 
341  // halfwords
342 
343 #define MINH(rt, ra, rb) \
344  cgth _gc_t0, ra, rb; \
345  MIN_SELB(rt, ra, rb, _gc_t0)
346 
347 #define MAXH(rt, ra, rb) \
348  cgth _gc_t0, ra, rb; \
349  MAX_SELB(rt, ra, rb, _gc_t0)
350 
351 #define UMINH(rt, ra, rb) \
352  clgth _gc_t0, ra, rb; \
353  MIN_SELB(rt, ra, rb, _gc_t0)
354 
355 #define UMAXH(rt, ra, rb) \
356  clgth _gc_t0, ra, rb; \
357  MAX_SELB(rt, ra, rb, _gc_t0)
358 
359  // floats
360 
361 #define FMIN(rt, ra, rb) \
362  fcgt _gc_t0, ra, rb; \
363  MIN_SELB(rt, ra, rb, _gc_t0)
364 
365 #define FMAX(rt, ra, rb) \
366  fcgt _gc_t0, ra, rb; \
367  MAX_SELB(rt, ra, rb, _gc_t0)
368 
369 // Ignoring the sign, select the values with the minimum magnitude
370 #define FMINMAG(rt, ra, rb) \
371  fcmgt _gc_t0, ra, rb; \
372  MIN_SELB(rt, ra, rb, _gc_t0)
373 
374 // Ignoring the sign, select the values with the maximum magnitude
375 #define FMAXMAG(rt, ra, rb) \
376  fcmgt _gc_t0, ra, rb; \
377  MAX_SELB(rt, ra, rb, _gc_t0)
378 
379 
380 #endif /* INCLUDED_GC_SPU_MACS_H */