diff options
-rw-r--r-- | volk/spu_lib/gc_spu_macs.h | 380 | ||||
-rw-r--r-- | volk/spu_lib/spu_16s_cmpgt_unaligned.c | 160 | ||||
-rw-r--r-- | volk/spu_lib/spu_16s_vector_subtract_unaligned.c | 178 | ||||
-rw-r--r-- | volk/spu_lib/spu_16s_vector_sum_unaligned.c | 178 | ||||
-rw-r--r-- | volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c | 222 | ||||
-rw-r--r-- | volk/spu_lib/spu_memcpy_unaligned.c | 290 | ||||
-rw-r--r-- | volk/spu_lib/spu_memset_unaligned.S | 185 |
7 files changed, 0 insertions, 1593 deletions
diff --git a/volk/spu_lib/gc_spu_macs.h b/volk/spu_lib/gc_spu_macs.h deleted file mode 100644 index e86dce3f5e..0000000000 --- a/volk/spu_lib/gc_spu_macs.h +++ /dev/null @@ -1,380 +0,0 @@ -/* -*- asm -*- */ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef INCLUDED_GC_SPU_MACS_H -#define INCLUDED_GC_SPU_MACS_H - -/* - * This file contains a set of macros that are generally useful when - * coding in SPU assembler - * - * Note that the multi-instruction macros in here may overwrite - * registers 77, 78, and 79 without warning. - */ - -/* - * defines for all registers - */ -#define r0 $0 -#define r1 $1 -#define r2 $2 -#define r3 $3 -#define r4 $4 -#define r5 $5 -#define r6 $6 -#define r7 $7 -#define r8 $8 -#define r9 $9 -#define r10 $10 -#define r11 $11 -#define r12 $12 -#define r13 $13 -#define r14 $14 -#define r15 $15 -#define r16 $16 -#define r17 $17 -#define r18 $18 -#define r19 $19 -#define r20 $20 -#define r21 $21 -#define r22 $22 -#define r23 $23 -#define r24 $24 -#define r25 $25 -#define r26 $26 -#define r27 $27 -#define r28 $28 -#define r29 $29 -#define r30 $30 -#define r31 $31 -#define r32 $32 -#define r33 $33 -#define r34 $34 -#define r35 $35 -#define r36 $36 -#define r37 $37 -#define r38 $38 -#define r39 $39 -#define r40 $40 -#define r41 $41 -#define r42 $42 -#define r43 $43 -#define r44 $44 -#define r45 $45 -#define r46 $46 -#define r47 $47 -#define r48 $48 -#define r49 $49 -#define r50 $50 -#define r51 $51 -#define r52 $52 -#define r53 $53 -#define r54 $54 -#define r55 $55 -#define r56 $56 -#define r57 $57 -#define r58 $58 -#define r59 $59 -#define r60 $60 -#define r61 $61 -#define r62 $62 -#define r63 $63 -#define r64 $64 -#define r65 $65 -#define r66 $66 -#define r67 $67 -#define r68 $68 -#define r69 $69 -#define r70 $70 -#define r71 $71 -#define r72 $72 -#define r73 $73 -#define r74 $74 -#define r75 $75 -#define r76 $76 -#define r77 $77 -#define r78 $78 -#define r79 $79 -#define r80 $80 -#define r81 $81 -#define r82 $82 -#define r83 $83 -#define r84 $84 -#define r85 $85 -#define r86 $86 -#define r87 $87 -#define r88 $88 -#define r89 $89 -#define r90 $90 -#define r91 $91 -#define r92 $92 -#define r93 $93 -#define r94 $94 -#define r95 $95 -#define r96 $96 -#define r97 $97 -#define r98 $98 -#define r99 $99 -#define r100 $100 -#define r101 $101 -#define r102 $102 -#define r103 $103 -#define r104 $104 -#define r105 $105 -#define r106 $106 -#define r107 $107 -#define r108 $108 -#define r109 $109 -#define r110 $110 -#define r111 $111 -#define r112 $112 -#define r113 $113 -#define r114 $114 -#define r115 $115 -#define r116 $116 -#define r117 $117 -#define r118 $118 -#define r119 $119 -#define r120 $120 -#define r121 $121 -#define r122 $122 -#define r123 $123 -#define r124 $124 -#define r125 $125 -#define r126 $126 -#define r127 $127 - - -#define lr r0 // link register -#define sp r1 // stack pointer - // r2 is environment pointer for langs that need it (ALGOL) - -#define retval r3 // return values are passed in regs starting at r3 - -#define arg1 r3 // args are passed in regs starting at r3 -#define arg2 r4 -#define arg3 r5 -#define arg4 r6 -#define arg5 r7 -#define arg6 r8 -#define arg7 r9 -#define arg8 r10 -#define arg9 r11 -#define arg10 r12 - -// r3 - r74 are volatile (caller saves) -// r74 - r79 are volatile (scratch regs possibly destroyed by fct prolog/epilog) -// r80 - r127 are non-volatile (caller-saves) - -// scratch registers reserved for use by the macros in this file. - -#define _gc_t0 r79 -#define _gc_t1 r78 -#define _gc_t2 r77 - -/* - * ---------------------------------------------------------------- - * pseudo ops - * ---------------------------------------------------------------- - */ -#define PROC_ENTRY(name) \ - .text; \ - .p2align 4; \ - .global name; \ - .type name, @function; \ -name: - -/* - * ---------------------------------------------------------------- - * aliases for common operations - * ---------------------------------------------------------------- - */ - -// Move register (even pipe, 2 cycles) -#define MR(rt, ra) or rt, ra, ra; - -// Move register (odd pipe, 4 cycles) -#define LMR(rt, ra) rotqbyi rt, ra, 0; - -// return -#define RETURN() bi lr; - -// hint for a return -#define HINT_RETURN(ret_label) hbr ret_label, lr; - -// return if zero -#define BRZ_RETURN(rt) biz rt, lr; - -// return if not zero -#define BRNZ_RETURN(rt) binz rt, lr; - -// return if halfword zero -#define BRHZ_RETURN(rt) bihz rt, lr; - -// return if halfword not zero -#define BRHNZ_RETURN(rt) bihnz rt, lr; - - -/* - * ---------------------------------------------------------------- - * modulo like things for constant moduli that are powers of 2 - * ---------------------------------------------------------------- - */ - -// rt = ra & (pow2 - 1) -#define MODULO(rt, ra, pow2) \ - andi rt, ra, (pow2)-1; - -// rt = pow2 - (ra & (pow2 - 1)) -#define MODULO_NEG(rt, ra, pow2) \ - andi rt, ra, (pow2)-1; \ - sfi rt, rt, (pow2); - -// rt = ra & -(pow2) -#define ROUND_DOWN(rt, ra, pow2) \ - andi rt, ra, -(pow2); - -// rt = (ra + (pow2 - 1)) & -(pow2) -#define ROUND_UP(rt, ra, pow2) \ - ai rt, ra, (pow2)-1; \ - andi rt, rt, -(pow2); - -/* - * ---------------------------------------------------------------- - * Splat - replicate a particular slot into all slots - * Altivec analogs... - * ---------------------------------------------------------------- - */ - -// replicate byte from slot s [0,15] -#define VSPLTB(rt, ra, s) \ - ilh _gc_t0, (s)*0x0101; \ - shufb rt, ra, ra, _gc_t0; - -// replicate halfword from slot s [0,7] -#define VSPLTH(rt, ra, s) \ - ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \ - shufb rt, ra, ra, _gc_t0; - -// replicate word from slot s [0,3] -#define VSPLTW(rt, ra, s) \ - iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \ - iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \ - shufb rt, ra, ra, _gc_t0; - -// replicate double from slot s [0,1] -#define VSPLTD(rt, ra, s) \ - /* sp is always 16-byte aligned */ \ - cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203 04050607 */ \ - rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot */ \ - shufb rt, rt, rt, _gc_t0; - -/* - * ---------------------------------------------------------------- - * lots of min/max variations... - * - * On a slot by slot basis, compute the min or max - * - * U - unsigned, else signed - * B,H,{} - byte, halfword, word - * F float - * ---------------------------------------------------------------- - */ - -#define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc; -#define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc; - - // words - -#define MIN(rt, ra, rb) \ - cgt _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define MAX(rt, ra, rb) \ - cgt _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - -#define UMIN(rt, ra, rb) \ - clgt _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define UMAX(rt, ra, rb) \ - clgt _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - - // bytes - -#define MINB(rt, ra, rb) \ - cgtb _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define MAXB(rt, ra, rb) \ - cgtb _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - -#define UMINB(rt, ra, rb) \ - clgtb _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define UMAXB(rt, ra, rb) \ - clgtb _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - - // halfwords - -#define MINH(rt, ra, rb) \ - cgth _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define MAXH(rt, ra, rb) \ - cgth _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - -#define UMINH(rt, ra, rb) \ - clgth _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define UMAXH(rt, ra, rb) \ - clgth _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - - // floats - -#define FMIN(rt, ra, rb) \ - fcgt _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -#define FMAX(rt, ra, rb) \ - fcgt _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - -// Ignoring the sign, select the values with the minimum magnitude -#define FMINMAG(rt, ra, rb) \ - fcmgt _gc_t0, ra, rb; \ - MIN_SELB(rt, ra, rb, _gc_t0) - -// Ignoring the sign, select the values with the maximum magnitude -#define FMAXMAG(rt, ra, rb) \ - fcmgt _gc_t0, ra, rb; \ - MAX_SELB(rt, ra, rb, _gc_t0) - - -#endif /* INCLUDED_GC_SPU_MACS_H */ diff --git a/volk/spu_lib/spu_16s_cmpgt_unaligned.c b/volk/spu_lib/spu_16s_cmpgt_unaligned.c deleted file mode 100644 index 8811e68014..0000000000 --- a/volk/spu_lib/spu_16s_cmpgt_unaligned.c +++ /dev/null @@ -1,160 +0,0 @@ -#include<spu_intrinsics.h> - -void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, unsigned int num_bytes){ - //loop iterator i - int i = 0; - void* retval = target; - - - //put the target and source addresses into qwords - vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; - vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0}; - - //create shuffle masks - - //shuffle mask building blocks: - //all from the first vector - vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; - //all from the second vector - vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - - - //gamma: second half of the second, first half of the first, break at (unsigned int)src%16 - vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16)); - vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); - vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp); - vector unsigned char cmp_res = spu_or(gt_res, eq_res); - vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); - vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16); - - - - - vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); - vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - - //alpha: first half of first, second half of second, break at (unsigned int)target%16 - src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - //delta: first half of first, first half of second, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); - //epsilon: second half of second, second half of first, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); - //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); - - //beta: first half of first, second half of second, break at num_bytes%16 - src_cmp = spu_splats((unsigned char)(num_bytes%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - - - - - - qword src_past; - qword src_present; - qword tgt_past; - qword tgt_present; - - qword in_temp; - qword out_temp0; - qword out_temp1; - - src_past = si_lqd((qword)address_counter_src, 0); - tgt_past = si_lqd((qword)address_counter_tgt, 0); - - vector signed short vec_val = spu_splats(val); - vector unsigned short compare; - vector unsigned short ones = {1, 1, 1, 1, 1, 1, 1, 1}; - vector unsigned short after_and; - - for(i = 0; i < num_bytes/16; ++i) { - - src_present = si_lqd((qword)address_counter_src, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma); - - compare = spu_cmpgt((vector signed short) in_temp, vec_val); - after_and = spu_and(compare, ones); - - - out_temp0 = spu_shuffle(tgt_past, (qword)after_and, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, (qword)after_and, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - tgt_past = out_temp1; - src_past = src_present; - address_counter_src = spu_add(address_counter_src, 16); - address_counter_tgt = spu_add(address_counter_tgt, 16); - - - } - - src_present = si_lqd((qword)address_counter_src, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - - in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma); - - compare = spu_cmpgt((vector signed short) in_temp, vec_val); - after_and = spu_and(compare, ones); - - - qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); - qword meld = spu_shuffle((qword)after_and, target_temp, (vector unsigned char)shuffle_mask_beta); - - - - out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - return retval; -} - - - -/* -int main(){ - - signed short pooh[48]; - signed short bear[48]; - - int i = 0; - for(i = 0; i < 48; i += 2){ - bear[i] = i; - bear[i + 1] = -i; - } - - vector_gt_16bit(&pooh[0],&bear[0], 0, 48 * sizeof(signed short)); - - for(i = 0; i < 48; ++i) { - printf("%d, ", pooh[i]); - } - printf("\n"); -} -*/ - diff --git a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c deleted file mode 100644 index ea110c8d21..0000000000 --- a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c +++ /dev/null @@ -1,178 +0,0 @@ -#include<spu_intrinsics.h> - -void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void* src1, unsigned int num_bytes){ - //loop iterator i - int i = 0; - void* retval = target; - - - //put the target and source addresses into qwords - vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; - vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0}; - vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0}; - - //create shuffle masks - - //shuffle mask building blocks: - //all from the first vector - vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; - //all from the second vector - vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - - - //gamma: second half of the second, first half of the first, break at (unsigned int)src0%16 - vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16)); - vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); - vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp); - vector unsigned char cmp_res = spu_or(gt_res, eq_res); - vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); - vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16); - - //eta: second half of the second, first half of the first, break at (unsigned int)src1%16 - src_cmp = spu_splats((unsigned char)((unsigned int)src1%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - sixteen_uchar = spu_splats((unsigned char)16); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16); - - - - - - vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); - vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - - //alpha: first half of first, second half of second, break at (unsigned int)target%16 - src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - //delta: first half of first, first half of second, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); - //epsilon: second half of second, second half of first, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); - //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); - - //beta: first half of first, second half of second, break at num_bytes%16 - src_cmp = spu_splats((unsigned char)(num_bytes%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - - - - - - qword src0_past; - qword src0_present; - qword src1_past; - qword src1_present; - qword tgt_past; - qword tgt_present; - - qword in_temp0; - qword in_temp1; - qword out_temp0; - qword out_temp1; - - vector signed short sum; - - src0_past = si_lqd((qword)address_counter_src0, 0); - src1_past = si_lqd((qword)address_counter_src1, 0); - tgt_past = si_lqd((qword)address_counter_tgt, 0); - - for(i = 0; i < num_bytes/16; ++i) { - - src0_present = si_lqd((qword)address_counter_src0, 16); - src1_present = si_lqd((qword)address_counter_src1, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma); - in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta); - - sum = spu_sub((vector signed short)in_temp0, (vector signed short)in_temp1); - - - out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - tgt_past = out_temp1; - src0_past = src0_present; - src1_past = src1_present; - address_counter_src0 = spu_add(address_counter_src0, 16); - address_counter_src1 = spu_add(address_counter_src1, 16); - address_counter_tgt = spu_add(address_counter_tgt, 16); - - - } - - src0_present = si_lqd((qword)address_counter_src0, 16); - src1_present = si_lqd((qword)address_counter_src1, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - - in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma); - in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta); - sum = spu_sub((vector signed short)in_temp0, (vector signed short)in_temp1); - qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); - qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta); - - - - out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - return retval; -} - - - -/* -int main(){ - - signed short pooh[48]; - signed short bear[48]; - signed short res[48]; - - int i = 0; - for(i = 0; i < 48; ++i){ - pooh[i] = i; - } - for(i = 48; i < 96; ++i){ - bear[i - 48] = i; - } - - vector_subtract_16bit(res, &pooh[0], &bear[0], 48 * sizeof(signed short)); - - for(i = 0; i < 48; ++i) { - printf("%d, ", res[i]); - } - printf("\n"); -} -*/ - diff --git a/volk/spu_lib/spu_16s_vector_sum_unaligned.c b/volk/spu_lib/spu_16s_vector_sum_unaligned.c deleted file mode 100644 index 0097b4f56a..0000000000 --- a/volk/spu_lib/spu_16s_vector_sum_unaligned.c +++ /dev/null @@ -1,178 +0,0 @@ -#include<spu_intrinsics.h> - -void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void* src1, unsigned int num_bytes){ - //loop iterator i - int i = 0; - void* retval = target; - - - //put the target and source addresses into qwords - vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; - vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0}; - vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0}; - - //create shuffle masks - - //shuffle mask building blocks: - //all from the first vector - vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; - //all from the second vector - vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - - - //gamma: second half of the second, first half of the first, break at (unsigned int)src0%16 - vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16)); - vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); - vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp); - vector unsigned char cmp_res = spu_or(gt_res, eq_res); - vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); - vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16); - - //eta: second half of the second, first half of the first, break at (unsigned int)src1%16 - src_cmp = spu_splats((unsigned char)((unsigned int)src1%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - sixteen_uchar = spu_splats((unsigned char)16); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16); - - - - - - vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); - vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - - //alpha: first half of first, second half of second, break at (unsigned int)target%16 - src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - //delta: first half of first, first half of second, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); - //epsilon: second half of second, second half of first, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); - //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); - - //beta: first half of first, second half of second, break at num_bytes%16 - src_cmp = spu_splats((unsigned char)(num_bytes%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - - - - - - qword src0_past; - qword src0_present; - qword src1_past; - qword src1_present; - qword tgt_past; - qword tgt_present; - - qword in_temp0; - qword in_temp1; - qword out_temp0; - qword out_temp1; - - vector signed int sum; - - src0_past = si_lqd((qword)address_counter_src0, 0); - src1_past = si_lqd((qword)address_counter_src1, 0); - tgt_past = si_lqd((qword)address_counter_tgt, 0); - - for(i = 0; i < num_bytes/16; ++i) { - - src0_present = si_lqd((qword)address_counter_src0, 16); - src1_present = si_lqd((qword)address_counter_src1, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma); - in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta); - - sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1); - - - out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - tgt_past = out_temp1; - src0_past = src0_present; - src1_past = src1_present; - address_counter_src0 = spu_add(address_counter_src0, 16); - address_counter_src1 = spu_add(address_counter_src1, 16); - address_counter_tgt = spu_add(address_counter_tgt, 16); - - - } - - src0_present = si_lqd((qword)address_counter_src0, 16); - src1_present = si_lqd((qword)address_counter_src1, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - - in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma); - in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta); - sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1); - qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); - qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta); - - - - out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - return retval; -} - - - -/* -int main(){ - - signed short pooh[48]; - signed short bear[48]; - signed short res[48]; - - int i = 0; - for(i = 0; i < 48; ++i){ - pooh[i] = i; - } - for(i = 48; i < 96; ++i){ - bear[i - 48] = i; - } - - vector_sum(&pooh[9], &pooh[9], &bear[3], 30); - - for(i = 0; i < 48; ++i) { - printf("%d, ", pooh[i]); - } - printf("\n"); -} -*/ - diff --git a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c deleted file mode 100644 index d1c9604889..0000000000 --- a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c +++ /dev/null @@ -1,222 +0,0 @@ -#include<spu_intrinsics.h> - - - - -void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0, void* src1, unsigned int num_bytes){ - //loop iterator i - int i = 0; - void* retval = target; - - - //put the target and source addresses into qwords - vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; - vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 ,0}; - vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 0}; - - //create shuffle masks - - //shuffle mask building blocks: - //all from the first vector - vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; - //all from the second vector - vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - - - //gamma: second half of the second, first half of the first, break at (unsigned int)src0%16 - vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src0%16)); - vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); - vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp); - vector unsigned char cmp_res = spu_or(gt_res, eq_res); - vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); - vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src0%16); - - //eta: second half of the second, first half of the first, break at (unsigned int)src1%16 - src_cmp = spu_splats((unsigned char)((unsigned int)src1%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - sixteen_uchar = spu_splats((unsigned char)16); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_eta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned int)src1%16); - - - - - - vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); - vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - - //alpha: first half of first, second half of second, break at (unsigned int)target%16 - src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - //delta: first half of first, first half of second, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); - //epsilon: second half of second, second half of first, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); - //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); - - //beta: first half of first, second half of second, break at num_bytes%16 - src_cmp = spu_splats((unsigned char)(num_bytes%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - - - - - - qword src0_past; - qword src0_present; - qword src1_past; - qword src1_present; - qword tgt_past; - qword tgt_present; - - qword in_temp0; - qword in_temp1; - qword out_temp0; - qword out_temp1; - - - src0_past = si_lqd((qword)address_counter_src0, 0); - src1_past = si_lqd((qword)address_counter_src1, 0); - tgt_past = si_lqd((qword)address_counter_tgt, 0); - - vector unsigned char shuffle_mask_complexprod0 = {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, - 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b}; - vector unsigned char shuffle_mask_complexprod1 = {0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, - 0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b}; - vector unsigned char shuffle_mask_complexprod2 = {0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17, - 0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f}; - vector unsigned char sign_changer = {0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00}; - - vector float prod0; - qword shuf0; - vector float prod1; - vector float sign_change; - qword summand0; - qword summand1; - vector float sum; - - - for(i = 0; i < num_bytes/16; ++i) { - - src0_present = si_lqd((qword)address_counter_src0, 16); - src1_present = si_lqd((qword)address_counter_src1, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)shuffle_mask_gamma); - in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)shuffle_mask_eta); - - prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1); - shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, shuffle_mask_complexprod0); - prod1 = spu_mul((vector float)in_temp0, (vector float)shuf0); - sign_change = spu_xor(prod0, (vector float)sign_changer); - - summand0 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod1); - - summand1 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod2); - - sum = spu_add((vector float)summand0, (vector float)summand1); - - - out_temp0 = spu_shuffle(tgt_past, (qword)sum, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, (qword)sum, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - tgt_past = out_temp1; - src0_past = src0_present; - src1_past = src1_present; - address_counter_src0 = spu_add(address_counter_src0, 16); - address_counter_src1 = spu_add(address_counter_src1, 16); - address_counter_tgt = spu_add(address_counter_tgt, 16); - - - } - - src0_present = si_lqd((qword)address_counter_src0, 16); - src1_present = si_lqd((qword)address_counter_src1, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - - in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) shuffle_mask_gamma); - in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) shuffle_mask_eta); - - - prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1); - shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, shuffle_mask_complexprod0); - prod1 = spu_mul(prod0, (vector float)shuf0); - sign_change = spu_xor(prod0, (vector float)sign_changer); - summand0 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod1); - summand1 = spu_shuffle((qword)sign_change, (qword)prod1, shuffle_mask_complexprod2); - sum = spu_add((vector float)summand0, (vector float)summand1); - - - - qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); - qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned char)shuffle_mask_beta); - - - - out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - return retval; -} - - - -/* -int main(){ - - float pooh[48]; - float bear[48]; - float res[48]; - - int i = 0; - for(i = 0; i < 48; ++i){ - pooh[i] = (float) i; - } - for(i = 48; i < 96; ++i){ - bear[i - 48] = (float) i; - } - - vector_product_complex(res, pooh, bear, 48*sizeof(float)); - - - - for(i = 0; i < 48; ++i) { - printf("%f, ", res[i]); - } - printf("\n"); - - -} -*/ - diff --git a/volk/spu_lib/spu_memcpy_unaligned.c b/volk/spu_lib/spu_memcpy_unaligned.c deleted file mode 100644 index 0f15b5d807..0000000000 --- a/volk/spu_lib/spu_memcpy_unaligned.c +++ /dev/null @@ -1,290 +0,0 @@ -#include<libvector/libvector_memcpy_unaligned.h -#include<spu_intrinsics.h> - -void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes){ - //loop iterator i - int i = 0; - void* retval = target; - - - //put the target and source addresses into qwords - vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; - vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0}; - - //create shuffle masks - - //shuffle mask building blocks: - //all from the first vector - vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; - //all from the second vector - vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - - - //gamma: second half of the second, first half of the first, break at (unsigned int)src%16 - vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16)); - vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); - vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp); - vector unsigned char cmp_res = spu_or(gt_res, eq_res); - vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); - vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16); - - - - - vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16)); - vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16)); - - //alpha: first half of first, second half of second, break at (unsigned int)target%16 - src_cmp = spu_splats((unsigned char)((unsigned int)target%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - //delta: first half of first, first half of second, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); - //epsilon: second half of second, second half of first, break at (unsigned int)target%16 - vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); - //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16); - - //beta: first half of first, second half of second, break at num_bytes%16 - src_cmp = spu_splats((unsigned char)(num_bytes%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - - - - - - qword src_past; - qword src_present; - qword tgt_past; - qword tgt_present; - - qword in_temp; - qword out_temp0; - qword out_temp1; - - src_past = si_lqd((qword)address_counter_src, 0); - tgt_past = si_lqd((qword)address_counter_tgt, 0); - - for(i = 0; i < num_bytes/16; ++i) { - - src_present = si_lqd((qword)address_counter_src, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma); - - out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - tgt_past = out_temp1; - src_past = src_present; - address_counter_src = spu_add(address_counter_src, 16); - address_counter_tgt = spu_add(address_counter_tgt, 16); - - - } - - src_present = si_lqd((qword)address_counter_src, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - - in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma); - qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); - qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta); - - - - out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - return retval; -} - - - -/* -void* mcpy(void* target, void* src, size_t num_bytes){ - //loop iterator i - int i = 0; - void* retval = src; - - //put the target and source addresses into qwords - vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0}; - vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0}; - - //create shuffle masks - - //shuffle mask building blocks: - //all from the first vector - vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; - //all from the second vector - vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; - - - - //gamma: second half of the second, first half of the first, break at src%16 - vector unsigned char src_cmp = spu_splats((unsigned char)(src%16)); - vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp); - vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp); - vector unsigned char cmp_res = spu_or(gt_res, eq_res); - vector unsigned char sixteen_uchar = spu_splats((unsigned char)16); - vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, src%16); - - - - - vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -(target%16)); - vector unsigned char tgt_first = spu_rlqwbyte(oneup, -(target%16)); - - //alpha: first half of first, second half of second, break at target%16 - src_cmp = spu_splats((unsigned char)(target%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - //delta: first half of first, first half of second, break at target%16 - vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha); - //epsilon: second half of second, second half of first, break at target%16 - vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha); - //zeta: second half of second, first half of first, break at 16 - target%16 - vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, target%16); - - //beta: first half of first, second half of second, break at num_bytes%16 - src_cmp = spu_splats((unsigned char)(num_bytes%16)); - gt_res = spu_cmpgt(oneup, src_cmp); - eq_res = spu_cmpeq(oneup, src_cmp); - cmp_res = spu_or(gt_res, eq_res); - phase_change = spu_and(sixteen_uchar, cmp_res); - vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change, - (vector unsigned int)oneup); - - - printf("num_bytesmod16 %d\n", num_bytes%16); - printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n", - spu_extract((vector unsigned char) shuffle_mask_beta, 0), - spu_extract((vector unsigned char) shuffle_mask_beta, 1), - spu_extract((vector unsigned char) shuffle_mask_beta, 2), - spu_extract((vector unsigned char) shuffle_mask_beta, 3), - spu_extract((vector unsigned char) shuffle_mask_beta, 4), - spu_extract((vector unsigned char) shuffle_mask_beta, 5), - spu_extract((vector unsigned char) shuffle_mask_beta, 6), - spu_extract((vector unsigned char) shuffle_mask_beta, 7), - spu_extract((vector unsigned char) shuffle_mask_beta, 8), - spu_extract((vector unsigned char) shuffle_mask_beta, 9), - spu_extract((vector unsigned char) shuffle_mask_beta, 10), - spu_extract((vector unsigned char) shuffle_mask_beta, 11), - spu_extract((vector unsigned char) shuffle_mask_beta, 12), - spu_extract((vector unsigned char) shuffle_mask_beta, 13), - spu_extract((vector unsigned char) shuffle_mask_beta, 14), - spu_extract((vector unsigned char) shuffle_mask_beta, 15)); - - - - - - - - qword src_past; - qword src_present; - qword tgt_past; - qword tgt_present; - - qword in_temp; - qword out_temp0; - qword out_temp1; - - src_past = si_lqd((qword)address_counter_src, 0); - tgt_past = si_lqd((qword)address_counter_tgt, 0); - - for(i = 0; i < num_bytes/16; ++i) { - - src_present = si_lqd((qword)address_counter_src, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma); - - out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - tgt_past = out_temp1; - src_past = src_present; - address_counter_src = spu_add(address_counter_src, 16); - address_counter_tgt = spu_add(address_counter_tgt, 16); - - - } - - src_present = si_lqd((qword)address_counter_src, 16); - tgt_present = si_lqd((qword)address_counter_tgt, 16); - - - in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma); - qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta); - qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta); - - - - out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta); - out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon); - - si_stqd(out_temp0, (qword)address_counter_tgt, 0); - si_stqd(out_temp1, (qword)address_counter_tgt, 16); - - return retval; - -} -*/ -/* -int main(){ - - unsigned char pooh[48]; - unsigned char bear[48]; - - int i = 0; - for(i = 0; i < 48; ++i){ - pooh[i] = i; - bear[i] = i; - } - - spu_mcpy(&pooh[9],&bear[3], 15); - - for(i = 0; i < 48; ++i) { - printf("%d, ", pooh[i]); - } - printf("\n"); -} - -*/ diff --git a/volk/spu_lib/spu_memset_unaligned.S b/volk/spu_lib/spu_memset_unaligned.S deleted file mode 100644 index c260a125cd..0000000000 --- a/volk/spu_lib/spu_memset_unaligned.S +++ /dev/null @@ -1,185 +0,0 @@ -/* -*- asm -*- */ -/* - * Copyright 2008 Free Software Foundation, Inc. - * - * This file is part of GNU Radio - * - * GNU Radio is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * GNU Radio is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include "gc_spu_macs.h" - - .file "spu_memset_unaligned.S" - - /* - * Computes this, only a lot faster... - * - * void * - * libvector_memset_unaligned(void *pv, int c, size_t n) - * { - * unsigned char *p = (unsigned char *) pv; - * size_t i; - * for (i = 0; i < n; i++) - * p[i] = c; - * - * return pv; - * } - */ - -#define p_arg arg1 // we're going to clobber arg1 w/ the return value -#define c arg2 // the constant we're writing -#define n arg3 // how many bytes to write - -#define p r13 // where we're writing -#define t0 r14 -#define t1 r15 -#define mask r16 -#define old r17 -#define an r18 // aligned n (n rounded down to mod 16 boundary) -#define next_p r19 -#define cond1 r20 -#define cond2 r21 -#define m r22 -#define r r23 - - PROC_ENTRY(libvector_memset_unaligned) - - // Hint the return from do_head, in case we go that way. - // There's pretty much nothing to can do to hint the branch to it. - hbrr do_head_br, head_complete - - MR(p, p_arg) // leaves p, the return value, in the correct reg (r3) - BRZ_RETURN(n) - - MODULO(t0, p, 16) // is p%16 == 0? - VSPLTB(c, c, 3) // splat byte in preferred slot of c into all slots - brnz t0, do_head // no, handle it -head_complete: - - /* - * preconditions: - * p%16 == 0, n > 0 - */ - hbrr middle_loop_br, middle_loop - - ROUND_DOWN(an, n, 16) // an is "aligned n" - MODULO(n, n, 16) // what's left over in the last quad - brz an, do_tail // no whole quad words; skip to tail - clgti t0, an, 127 // an >= 128? - brz t0, middle2 // nope, go handle the cases between 0 and 112 - - /* - * 128 bytes / iteration - */ - .p2align 4 -middle_loop: - ai an, an, -128 - stqd c, 0*16(p) - ai next_p, p, 128 - stqd c, 1*16(p) - cgti cond1, an, 127 - stqd c, 2*16(p) - - stqd c, 3*16(p) - stqd c, 4*16(p) - stqd c, 5*16(p) - stqd c, 6*16(p) - - MR(p, next_p) - stqd c, 7*16-128(next_p) - or cond2, n, an -middle_loop_br: - brnz cond1, middle_loop - - /* - * if an and n are both zero, return now - */ - BRZ_RETURN(cond2) - - /* - * otherwise handle last of full quad words - * - * 0 <= an < 128, p%16 == 0 - */ -middle2: - /* - * if an == 0, go handle the final non-full quadword - */ - brz an, do_tail - hbrr middle2_loop_br, middle2_loop - - .p2align 3 -middle2_loop: - ai next_p, p, 16 - stqd c, 0(p) - ai an, an, -16 - LMR(p, next_p) -middle2_loop_br: - brnz an, middle2_loop - - /* We're done with the full quadwords. */ - - /* - * Handle the final partial quadword. - * We'll be modifying only the left hand portion of the quad. - * - * preconditions: - * an == 0, 0 <= n < 16, p%16 == 0 - */ -do_tail: - HINT_RETURN(do_tail_ret) - il mask, -1 - sfi t1, n, 16 // t1 = 16 - n - lqd old, 0(p) - shlqby mask, mask, t1 - selb t0, old, c, mask - stqd t0, 0(p) -do_tail_ret: - RETURN() - - /* - * ---------------------------------------------------------------- - * Handle the first partial quadword - * - * preconditions: - * p%16 != 0 - * - * postconditions: - * p%16 == 0 or n == 0 - * - * |-- m --| - * +----------------+----------------+ - * | //////// | | - * +----------------+----------------+ - * |----- r -----| - * p - * ---------------------------------------------------------------- - */ -do_head: - lqd old, 0(p) - MODULO_NEG(r, p, 16) - il mask, -1 - UMIN(m, r, n) - shlqby mask, mask, m // 1's in the top, m*8 0's in the bottom - MR(t1, p) - sf t0, m, r // t0 = r - m - a p, p, m // p += m - rotqby mask, mask, t0 // rotate 0's to the right place - sf n, m, n // n -= m - selb t0, c, old, mask // merge - stqd t0, 0(t1) - BRZ_RETURN(n) -do_head_br: - br head_complete |