Statistics
| Branch: | Tag: | Revision:

root / volk / include / volk / volk_64u_popcnt_aligned16.h @ 74f206ed

History | View | Annotate | Download (2 kB)

1
#ifndef INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H
2
#define INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H
3
4
#include <stdio.h>
5
#include <inttypes.h>
6
7
8
#if LV_HAVE_GENERIC
9
10
11
static inline void volk_64u_popcnt_aligned16_generic(uint64_t* ret, const uint64_t value) {
12
13
  const uint32_t* valueVector = (const uint32_t*)&value;
14
  
15
  // This is faster than a lookup table
16
  uint32_t retVal = valueVector[0];
17
18
  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
19
  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
20
  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
21
  retVal = (retVal + (retVal >> 8));
22
  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
23
  uint64_t retVal64  = retVal;
24
25
  retVal = valueVector[1];
26
  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
27
  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
28
  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
29
  retVal = (retVal + (retVal >> 8));
30
  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
31
  retVal64 += retVal;
32
33
  *ret = retVal64;
34
35
}
36
37
#endif /*LV_HAVE_GENERIC*/
38
39
#if LV_HAVE_SSE4_2
40
41
#include <nmmintrin.h>
42
43
static inline void volk_64u_popcnt_aligned16_sse4_2(uint64_t* ret, const uint64_t value) {
44
#if LV_64
45
  *ret = _mm_popcnt_u64(value);
46
#else
47
  const uint32_t* valueVector = (const uint32_t*)&value;
48
  
49
  // This is faster than a lookup table
50
  uint32_t retVal = valueVector[0];
51
52
  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
53
  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
54
  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
55
  retVal = (retVal + (retVal >> 8));
56
  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
57
  uint64_t retVal64  = retVal;
58
59
  retVal = valueVector[1];
60
  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
61
  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
62
  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
63
  retVal = (retVal + (retVal >> 8));
64
  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
65
  retVal64 += retVal;
66
67
  *ret = retVal64;
68
69
#endif
70
}
71
72
#endif /*LV_HAVE_SSE4_2*/
73
74
#endif /*INCLUDED_VOLK_64u_POPCNT_ALIGNED16_H*/