Statistics
| Branch: | Tag: | Revision:

root / volk / spu_lib / spu_memcpy_unaligned.c @ cacfd391

History | View | Annotate | Download (10.3 kB)

1
#include<libvector/libvector_memcpy_unaligned.h
2
#include<spu_intrinsics.h>
3
4
void* libvector_memcpy_unaligned(void* target, void* src, unsigned int num_bytes){
5
        //loop iterator i
6
        int i = 0;
7
        void* retval = target;
8
9
10
        //put the target and source addresses into qwords
11
        vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
12
        vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
13
14
        //create shuffle masks
15
16
        //shuffle mask building blocks:
17
        //all from the first vector
18
        vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19
                                                                  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
20
        //all from the second vector
21
        vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
22
                                                                                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
23
24
25
26
        //gamma: second half of the second, first half of the first, break at (unsigned int)src%16
27
        vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned int)src%16));
28
        vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
29
        vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
30
        vector unsigned char cmp_res = spu_or(gt_res, eq_res);
31
        vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
32
        vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
33
        vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
34
                                                                                                 (vector unsigned int)oneup);
35
        shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned int)src%16);
36
37
38
39
40
        vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -((unsigned int)target%16));
41
        vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned int)target%16));
42
43
        //alpha: first half of first, second half of second, break at (unsigned int)target%16
44
        src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
45
        gt_res = spu_cmpgt(oneup, src_cmp);
46
        eq_res = spu_cmpeq(oneup, src_cmp);
47
        cmp_res = spu_or(gt_res, eq_res);
48
        phase_change = spu_and(sixteen_uchar, cmp_res);
49
        vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
50
                                                                                                         (vector unsigned int)oneup);
51
52
        //delta: first half of first, first half of second, break at (unsigned int)target%16
53
        vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
54
        //epsilon: second half of second, second half of first, break at (unsigned int)target%16
55
        vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
56
        //zeta: second half of second, first half of first, break at 16 - (unsigned int)target%16
57
        vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
58
59
        //beta: first half of first, second half of second, break at num_bytes%16
60
        src_cmp = spu_splats((unsigned char)(num_bytes%16));
61
        gt_res = spu_cmpgt(oneup, src_cmp);
62
        eq_res = spu_cmpeq(oneup, src_cmp);
63
        cmp_res = spu_or(gt_res, eq_res);
64
        phase_change = spu_and(sixteen_uchar, cmp_res);
65
        vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
66
                                                                                                         (vector unsigned int)oneup);
67
68
69
70
71
72
73
        qword src_past;
74
        qword src_present;
75
        qword tgt_past;
76
        qword tgt_present;
77
78
        qword in_temp;
79
        qword out_temp0;
80
        qword out_temp1;
81
82
        src_past = si_lqd((qword)address_counter_src, 0);
83
        tgt_past = si_lqd((qword)address_counter_tgt, 0);
84
85
        for(i = 0; i < num_bytes/16; ++i) {
86
87
                src_present = si_lqd((qword)address_counter_src, 16);
88
                tgt_present = si_lqd((qword)address_counter_tgt, 16);
89
90
                in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma);
91
92
                out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
93
                out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon);
94
95
                si_stqd(out_temp0, (qword)address_counter_tgt, 0);
96
                si_stqd(out_temp1, (qword)address_counter_tgt, 16);
97
98
                tgt_past = out_temp1;
99
                src_past = src_present;
100
                address_counter_src = spu_add(address_counter_src, 16);
101
                address_counter_tgt = spu_add(address_counter_tgt, 16);
102
103
104
        }
105
106
        src_present = si_lqd((qword)address_counter_src, 16);
107
        tgt_present = si_lqd((qword)address_counter_tgt, 16);
108
109
110
        in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma);
111
        qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
112
        qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta);
113
114
115
116
        out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
117
        out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
118
119
        si_stqd(out_temp0, (qword)address_counter_tgt, 0);
120
        si_stqd(out_temp1, (qword)address_counter_tgt, 16);
121
122
        return retval;
123
}
124
125
126
127
/*
128
void* mcpy(void* target, void* src, size_t num_bytes){
129
        //loop iterator i
130
        int i = 0;
131
        void* retval = src;
132
133
        //put the target and source addresses into qwords
134
        vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 0};
135
        vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
136
137
        //create shuffle masks
138
139
        //shuffle mask building blocks:
140
        //all from the first vector
141
        vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
142
                                                                  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
143
        //all from the second vector
144
        vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
145
                                                                                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
146
147
148
149
        //gamma: second half of the second, first half of the first, break at src%16
150
        vector unsigned char src_cmp = spu_splats((unsigned char)(src%16));
151
        vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
152
        vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
153
        vector unsigned char cmp_res = spu_or(gt_res, eq_res);
154
        vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
155
        vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
156
        vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned int)phase_change,
157
                                                                                                 (vector unsigned int)oneup);
158
        shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, src%16);
159
160
161
162
163
        vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, -(target%16));
164
        vector unsigned char tgt_first = spu_rlqwbyte(oneup, -(target%16));
165
166
        //alpha: first half of first, second half of second, break at target%16
167
        src_cmp = spu_splats((unsigned char)(target%16));
168
        gt_res = spu_cmpgt(oneup, src_cmp);
169
        eq_res = spu_cmpeq(oneup, src_cmp);
170
        cmp_res = spu_or(gt_res, eq_res);
171
        phase_change = spu_and(sixteen_uchar, cmp_res);
172
        vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned int)phase_change,
173
                                                                                                         (vector unsigned int)oneup);
174
175
        //delta: first half of first, first half of second, break at target%16
176
        vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, tgt_second, (vector unsigned char)shuffle_mask_alpha);
177
        //epsilon: second half of second, second half of first, break at target%16
178
        vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, oneup, (vector unsigned char)shuffle_mask_alpha);
179
        //zeta: second half of second, first half of first, break at 16 - target%16
180
        vector unsigned int shuffle_mask_zeta = spu_rlqwbyte(shuffle_mask_alpha, target%16);
181
182
        //beta: first half of first, second half of second, break at num_bytes%16
183
        src_cmp = spu_splats((unsigned char)(num_bytes%16));
184
        gt_res = spu_cmpgt(oneup, src_cmp);
185
        eq_res = spu_cmpeq(oneup, src_cmp);
186
        cmp_res = spu_or(gt_res, eq_res);
187
        phase_change = spu_and(sixteen_uchar, cmp_res);
188
        vector unsigned int shuffle_mask_beta = spu_add((vector unsigned int)phase_change,
189
                                                                                                         (vector unsigned int)oneup);
190
191
192
          printf("num_bytesmod16 %d\n", num_bytes%16);
193
        printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d\n",
194
                   spu_extract((vector unsigned char) shuffle_mask_beta, 0),
195
                   spu_extract((vector unsigned char) shuffle_mask_beta, 1),
196
                   spu_extract((vector unsigned char) shuffle_mask_beta, 2),
197
                   spu_extract((vector unsigned char) shuffle_mask_beta, 3),
198
                   spu_extract((vector unsigned char) shuffle_mask_beta, 4),
199
                   spu_extract((vector unsigned char) shuffle_mask_beta, 5),
200
                   spu_extract((vector unsigned char) shuffle_mask_beta, 6),
201
                   spu_extract((vector unsigned char) shuffle_mask_beta, 7),
202
                   spu_extract((vector unsigned char) shuffle_mask_beta, 8),
203
                   spu_extract((vector unsigned char) shuffle_mask_beta, 9),
204
                   spu_extract((vector unsigned char) shuffle_mask_beta, 10),
205
                   spu_extract((vector unsigned char) shuffle_mask_beta, 11),
206
                   spu_extract((vector unsigned char) shuffle_mask_beta, 12),
207
                   spu_extract((vector unsigned char) shuffle_mask_beta, 13),
208
                   spu_extract((vector unsigned char) shuffle_mask_beta, 14),
209
                   spu_extract((vector unsigned char) shuffle_mask_beta, 15));
210
211
212
213
214
215
216
217
        qword src_past;
218
        qword src_present;
219
        qword tgt_past;
220
        qword tgt_present;
221
222
        qword in_temp;
223
        qword out_temp0;
224
        qword out_temp1;
225
226
        src_past = si_lqd((qword)address_counter_src, 0);
227
        tgt_past = si_lqd((qword)address_counter_tgt, 0);
228
229
        for(i = 0; i < num_bytes/16; ++i) {
230
231
                src_present = si_lqd((qword)address_counter_src, 16);
232
                tgt_present = si_lqd((qword)address_counter_tgt, 16);
233
234
                in_temp = spu_shuffle(src_present, src_past, (vector unsigned char)shuffle_mask_gamma);
235
236
                out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
237
                out_temp1 = spu_shuffle(tgt_present, in_temp, shuffle_mask_epsilon);
238
239
                si_stqd(out_temp0, (qword)address_counter_tgt, 0);
240
                si_stqd(out_temp1, (qword)address_counter_tgt, 16);
241
242
                tgt_past = out_temp1;
243
                src_past = src_present;
244
                address_counter_src = spu_add(address_counter_src, 16);
245
                address_counter_tgt = spu_add(address_counter_tgt, 16);
246
247
248
        }
249
250
        src_present = si_lqd((qword)address_counter_src, 16);
251
        tgt_present = si_lqd((qword)address_counter_tgt, 16);
252
253
254
        in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) shuffle_mask_gamma);
255
        qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned char) shuffle_mask_zeta);
256
        qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned char)shuffle_mask_beta);
257
258
259
260
        out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
261
        out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
262
263
        si_stqd(out_temp0, (qword)address_counter_tgt, 0);
264
        si_stqd(out_temp1, (qword)address_counter_tgt, 16);
265
266
        return retval;
267
268
}
269
*/
270
/*
271
int main(){
272
273
        unsigned char pooh[48];
274
        unsigned char bear[48];
275
276
        int i = 0;
277
        for(i = 0; i < 48; ++i){
278
                pooh[i] = i;
279
                bear[i] = i;
280
        }
281
282
        spu_mcpy(&pooh[9],&bear[3], 15);
283
284
        for(i = 0; i < 48; ++i) {
285
                printf("%d, ", pooh[i]);
286
        }
287
        printf("\n");
288
}
289
290
*/