GNU Radio 3.7.1 C++ API
gc_spu_macs.h
Go to the documentation of this file.
00001 /* -*- asm -*- */
00002 /*
00003  * Copyright 2008 Free Software Foundation, Inc.
00004  *
00005  * This file is part of GNU Radio
00006  *
00007  * GNU Radio is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 3, or (at your option)
00010  * any later version.
00011  *
00012  * GNU Radio is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License along
00018  * with this program; if not, write to the Free Software Foundation, Inc.,
00019  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00020  */
00021 
00022 #ifndef INCLUDED_GC_SPU_MACS_H
00023 #define INCLUDED_GC_SPU_MACS_H
00024 
00025 /*
00026  * This file contains a set of macros that are generally useful when
00027  * coding in SPU assembler
00028  *
00029  * Note that the multi-instruction macros in here may overwrite
00030  * registers 77, 78, and 79 without warning.
00031  */
00032 
00033 /*
00034  * defines for all registers
00035  */
00036 #define r0      $0
00037 #define r1      $1
00038 #define r2      $2
00039 #define r3      $3
00040 #define r4      $4
00041 #define r5      $5
00042 #define r6      $6
00043 #define r7      $7
00044 #define r8      $8
00045 #define r9      $9
00046 #define r10     $10
00047 #define r11     $11
00048 #define r12     $12
00049 #define r13     $13
00050 #define r14     $14
00051 #define r15     $15
00052 #define r16     $16
00053 #define r17     $17
00054 #define r18     $18
00055 #define r19     $19
00056 #define r20     $20
00057 #define r21     $21
00058 #define r22     $22
00059 #define r23     $23
00060 #define r24     $24
00061 #define r25     $25
00062 #define r26     $26
00063 #define r27     $27
00064 #define r28     $28
00065 #define r29     $29
00066 #define r30     $30
00067 #define r31     $31
00068 #define r32     $32
00069 #define r33     $33
00070 #define r34     $34
00071 #define r35     $35
00072 #define r36     $36
00073 #define r37     $37
00074 #define r38     $38
00075 #define r39     $39
00076 #define r40     $40
00077 #define r41     $41
00078 #define r42     $42
00079 #define r43     $43
00080 #define r44     $44
00081 #define r45     $45
00082 #define r46     $46
00083 #define r47     $47
00084 #define r48     $48
00085 #define r49     $49
00086 #define r50     $50
00087 #define r51     $51
00088 #define r52     $52
00089 #define r53     $53
00090 #define r54     $54
00091 #define r55     $55
00092 #define r56     $56
00093 #define r57     $57
00094 #define r58     $58
00095 #define r59     $59
00096 #define r60     $60
00097 #define r61     $61
00098 #define r62     $62
00099 #define r63     $63
00100 #define r64     $64
00101 #define r65     $65
00102 #define r66     $66
00103 #define r67     $67
00104 #define r68     $68
00105 #define r69     $69
00106 #define r70     $70
00107 #define r71     $71
00108 #define r72     $72
00109 #define r73     $73
00110 #define r74     $74
00111 #define r75     $75
00112 #define r76     $76
00113 #define r77     $77
00114 #define r78     $78
00115 #define r79     $79
00116 #define r80     $80
00117 #define r81     $81
00118 #define r82     $82
00119 #define r83     $83
00120 #define r84     $84
00121 #define r85     $85
00122 #define r86     $86
00123 #define r87     $87
00124 #define r88     $88
00125 #define r89     $89
00126 #define r90     $90
00127 #define r91     $91
00128 #define r92     $92
00129 #define r93     $93
00130 #define r94     $94
00131 #define r95     $95
00132 #define r96     $96
00133 #define r97     $97
00134 #define r98     $98
00135 #define r99     $99
00136 #define r100    $100
00137 #define r101    $101
00138 #define r102    $102
00139 #define r103    $103
00140 #define r104    $104
00141 #define r105    $105
00142 #define r106    $106
00143 #define r107    $107
00144 #define r108    $108
00145 #define r109    $109
00146 #define r110    $110
00147 #define r111    $111
00148 #define r112    $112
00149 #define r113    $113
00150 #define r114    $114
00151 #define r115    $115
00152 #define r116    $116
00153 #define r117    $117
00154 #define r118    $118
00155 #define r119    $119
00156 #define r120    $120
00157 #define r121    $121
00158 #define r122    $122
00159 #define r123    $123
00160 #define r124    $124
00161 #define r125    $125
00162 #define r126    $126
00163 #define r127    $127
00164 
00165 
00166 #define lr      r0      // link register
00167 #define sp      r1      // stack pointer
00168                         // r2 is environment pointer for langs that need it (ALGOL)
00169 
00170 #define retval  r3      // return values are passed in regs starting at r3
00171 
00172 #define arg1    r3      // args are passed in regs starting at r3
00173 #define arg2    r4
00174 #define arg3    r5
00175 #define arg4    r6
00176 #define arg5    r7
00177 #define arg6    r8
00178 #define arg7    r9
00179 #define arg8    r10
00180 #define arg9    r11
00181 #define arg10   r12
00182 
00183 //  r3 -  r74 are volatile (caller saves)
00184 // r74 -  r79 are volatile (scratch regs possibly destroyed by fct prolog/epilog)
00185 // r80 - r127 are non-volatile (caller-saves)
00186 
00187 // scratch registers reserved for use by the macros in this file.
00188 
00189 #define _gc_t0  r79
00190 #define _gc_t1  r78
00191 #define _gc_t2  r77
00192 
00193 /*
00194  * ----------------------------------------------------------------
00195  *                          pseudo ops
00196  * ----------------------------------------------------------------
00197  */
00198 #define PROC_ENTRY(name)                \
00199         .text;                          \
00200         .p2align 4;                     \
00201         .global name;                   \
00202         .type   name, @function;        \
00203 name:
00204 
00205 /*
00206  * ----------------------------------------------------------------
00207  *                  aliases for common operations
00208  * ----------------------------------------------------------------
00209  */
00210 
00211 // Move register (even pipe, 2 cycles)
00212 #define MR(rt, ra)                      or      rt, ra, ra;
00213 
00214 // Move register (odd pipe, 4 cycles)
00215 #define LMR(rt, ra)                     rotqbyi rt, ra, 0;
00216 
00217 // return
00218 #define RETURN()                        bi      lr;
00219 
00220 // hint for a return
00221 #define HINT_RETURN(ret_label)          hbr     ret_label, lr;
00222 
00223 // return if zero
00224 #define BRZ_RETURN(rt)                  biz     rt, lr;
00225 
00226 // return if not zero
00227 #define BRNZ_RETURN(rt)                 binz    rt, lr;
00228 
00229 // return if halfword zero
00230 #define BRHZ_RETURN(rt)                 bihz    rt, lr;
00231 
00232 // return if halfword not zero
00233 #define BRHNZ_RETURN(rt)                bihnz   rt, lr;
00234 
00235 
00236 /*
00237  * ----------------------------------------------------------------
00238  * modulo like things for constant moduli that are powers of 2
00239  * ----------------------------------------------------------------
00240  */
00241 
00242 // rt = ra & (pow2 - 1)
00243 #define MODULO(rt, ra, pow2) \
00244         andi    rt, ra, (pow2)-1;
00245 
00246 // rt = pow2 - (ra & (pow2 - 1))
00247 #define MODULO_NEG(rt, ra, pow2) \
00248         andi    rt, ra, (pow2)-1;               \
00249         sfi     rt, rt, (pow2);
00250 
00251 // rt = ra & -(pow2)
00252 #define ROUND_DOWN(rt, ra, pow2) \
00253         andi    rt, ra, -(pow2);
00254 
00255 // rt = (ra + (pow2 - 1)) & -(pow2)
00256 #define ROUND_UP(rt, ra, pow2) \
00257         ai      rt, ra, (pow2)-1;               \
00258         andi    rt, rt, -(pow2);
00259 
00260 /*
00261  * ----------------------------------------------------------------
00262  * Splat - replicate a particular slot into all slots
00263  * Altivec analogs...
00264  * ----------------------------------------------------------------
00265  */
00266 
00267 // replicate byte from slot s [0,15]
00268 #define VSPLTB(rt, ra, s) \
00269         ilh     _gc_t0, (s)*0x0101;             \
00270         shufb   rt, ra, ra, _gc_t0;
00271 
00272 // replicate halfword from slot s [0,7]
00273 #define VSPLTH(rt, ra, s) \
00274         ilh     _gc_t0, 2*(s)*0x0101 + 0x0001;  \
00275         shufb   rt, ra, ra, _gc_t0;
00276 
00277 // replicate word from slot s [0,3]
00278 #define VSPLTW(rt, ra, s) \
00279         iluh    _gc_t0, 4*(s)*0x0101 + 0x0001;  \
00280         iohl    _gc_t0, 4*(s)*0x0101 + 0x0203;  \
00281         shufb   rt, ra, ra, _gc_t0;
00282 
00283 // replicate double from slot s [0,1]
00284 #define VSPLTD(rt, ra, s) \
00285         /* sp is always 16-byte aligned */ \
00286         cdd     _gc_t0, 8(sp);          /* 0x10111213 14151617 00010203 04050607 */ \
00287         rotqbyi rt, ra, ra, (s) << 3;   /* rotate double into preferred slot     */ \
00288         shufb   rt, rt, rt, _gc_t0;
00289 
00290 /*
00291  * ----------------------------------------------------------------
00292  * lots of min/max variations...
00293  *
00294  * On a slot by slot basis, compute the min or max
00295  *
00296  * U - unsigned, else signed
00297  * B,H,{} - byte, halfword, word
00298  * F float
00299  * ----------------------------------------------------------------
00300  */
00301 
00302 #define MIN_SELB(rt, ra, rb, rc)        selb    rt, ra, rb, rc;
00303 #define MAX_SELB(rt, ra, rb, rc)        selb    rt, rb, ra, rc;
00304 
00305         // words
00306 
00307 #define MIN(rt, ra, rb) \
00308         cgt     _gc_t0, ra, rb; \
00309         MIN_SELB(rt, ra, rb, _gc_t0)
00310 
00311 #define MAX(rt, ra, rb) \
00312         cgt     _gc_t0, ra, rb; \
00313         MAX_SELB(rt, ra, rb, _gc_t0)
00314 
00315 #define UMIN(rt, ra, rb) \
00316         clgt    _gc_t0, ra, rb; \
00317         MIN_SELB(rt, ra, rb, _gc_t0)
00318 
00319 #define UMAX(rt, ra, rb) \
00320         clgt    _gc_t0, ra, rb; \
00321         MAX_SELB(rt, ra, rb, _gc_t0)
00322 
00323         // bytes
00324 
00325 #define MINB(rt, ra, rb) \
00326         cgtb    _gc_t0, ra, rb; \
00327         MIN_SELB(rt, ra, rb, _gc_t0)
00328 
00329 #define MAXB(rt, ra, rb) \
00330         cgtb    _gc_t0, ra, rb; \
00331         MAX_SELB(rt, ra, rb, _gc_t0)
00332 
00333 #define UMINB(rt, ra, rb) \
00334         clgtb   _gc_t0, ra, rb; \
00335         MIN_SELB(rt, ra, rb, _gc_t0)
00336 
00337 #define UMAXB(rt, ra, rb) \
00338         clgtb   _gc_t0, ra, rb; \
00339         MAX_SELB(rt, ra, rb, _gc_t0)
00340 
00341         // halfwords
00342 
00343 #define MINH(rt, ra, rb) \
00344         cgth    _gc_t0, ra, rb; \
00345         MIN_SELB(rt, ra, rb, _gc_t0)
00346 
00347 #define MAXH(rt, ra, rb) \
00348         cgth    _gc_t0, ra, rb; \
00349         MAX_SELB(rt, ra, rb, _gc_t0)
00350 
00351 #define UMINH(rt, ra, rb) \
00352         clgth   _gc_t0, ra, rb; \
00353         MIN_SELB(rt, ra, rb, _gc_t0)
00354 
00355 #define UMAXH(rt, ra, rb) \
00356         clgth   _gc_t0, ra, rb; \
00357         MAX_SELB(rt, ra, rb, _gc_t0)
00358 
00359         // floats
00360 
00361 #define FMIN(rt, ra, rb) \
00362         fcgt    _gc_t0, ra, rb; \
00363         MIN_SELB(rt, ra, rb, _gc_t0)
00364 
00365 #define FMAX(rt, ra, rb) \
00366         fcgt    _gc_t0, ra, rb; \
00367         MAX_SELB(rt, ra, rb, _gc_t0)
00368 
00369 // Ignoring the sign, select the values with the minimum magnitude
00370 #define FMINMAG(rt, ra, rb) \
00371         fcmgt   _gc_t0, ra, rb; \
00372         MIN_SELB(rt, ra, rb, _gc_t0)
00373 
00374 // Ignoring the sign, select the values with the maximum magnitude
00375 #define FMAXMAG(rt, ra, rb) \
00376         fcmgt   _gc_t0, ra, rb; \
00377         MAX_SELB(rt, ra, rb, _gc_t0)
00378 
00379 
00380 #endif /* INCLUDED_GC_SPU_MACS_H */