11
11
#include < detail/platform_util.hpp>
12
12
13
13
#if defined(SYCL_RT_OS_LINUX)
14
+ #include < errno.h>
15
+ #include < unistd.h>
16
+ #if defined(__x86_64__) || defined(__i386__)
14
17
#include < cpuid.h>
18
+ #endif
15
19
#elif defined(SYCL_RT_OS_WINDOWS)
16
20
#include < intrin.h>
17
21
#endif
@@ -20,6 +24,7 @@ __SYCL_INLINE_NAMESPACE(cl) {
20
24
namespace sycl {
21
25
namespace detail {
22
26
27
+ #if defined(__x86_64__) || defined(__i386__)
23
28
// Used by methods that duplicate OpenCL behaviour in order to get CPU info
24
29
static void cpuid (uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0 ) {
25
30
#if defined(SYCL_RT_OS_LINUX)
@@ -28,11 +33,13 @@ static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) {
28
33
__cpuidex (reinterpret_cast <int *>(CPUInfo), Type, SubType);
29
34
#endif
30
35
}
36
+ #endif
31
37
32
38
uint32_t PlatformUtil::getMaxClockFrequency () {
33
39
throw runtime_error (
34
40
" max_clock_frequency parameter is not supported for host device" ,
35
41
PI_INVALID_DEVICE);
42
+ #if defined(__x86_64__) || defined(__i386__)
36
43
uint32_t CPUInfo[4 ];
37
44
string_class Buff (sizeof (CPUInfo) * 3 + 1 , 0 );
38
45
size_t Offset = 0 ;
@@ -62,21 +69,43 @@ uint32_t PlatformUtil::getMaxClockFrequency() {
62
69
Buff = Buff.substr (Buff.rfind (' ' ), Buff.length ());
63
70
Freq *= std::stod (Buff);
64
71
return Freq;
72
+ #endif
73
+ return 0 ;
65
74
}
66
75
67
76
uint32_t PlatformUtil::getMemCacheLineSize () {
77
+ #if defined(__x86_64__) || defined(__i386__)
68
78
uint32_t CPUInfo[4 ];
69
79
cpuid (CPUInfo, 0x80000006 );
70
80
return CPUInfo[2 ] & 0xff ;
81
+ #elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE)
82
+ long lineSize = sysconf (_SC_LEVEL2_DCACHE_LINESIZE);
83
+ if (lineSize > 0 ) {
84
+ return lineSize;
85
+ }
86
+ #endif
87
+ return 8 ;
71
88
}
72
89
73
90
uint64_t PlatformUtil::getMemCacheSize () {
91
+ #if defined(__x86_64__) || defined(__i386__)
74
92
uint32_t CPUInfo[4 ];
75
93
cpuid (CPUInfo, 0x80000006 );
76
94
return static_cast <uint64_t >(CPUInfo[2 ] >> 16 ) * 1024 ;
95
+ #elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE)
96
+ long cacheSize = sysconf (_SC_LEVEL2_DCACHE_SIZE);
97
+ if (cacheSize > 0 ) {
98
+ return cacheSize;
99
+ }
100
+ #endif
101
+ return static_cast <uint64_t >(16 * 1024 );
77
102
}
78
103
79
104
uint32_t PlatformUtil::getNativeVectorWidth (PlatformUtil::TypeIndex TIndex) {
105
+
106
+ #if defined(__x86_64__) || defined(__i386__)
107
+ uint32_t Index = static_cast <uint32_t >(TIndex);
108
+
80
109
// SSE4.2 has 16 byte (XMM) registers
81
110
static constexpr uint32_t VECTOR_WIDTH_SSE42[] = {16 , 8 , 4 , 2 , 4 , 2 , 0 };
82
111
// AVX supports 32 byte (YMM) registers only for floats and doubles
@@ -86,8 +115,6 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {
86
115
// AVX512 has 64 byte (ZMM) registers
87
116
static constexpr uint32_t VECTOR_WIDTH_AVX512[] = {64 , 32 , 16 , 8 , 16 , 8 , 0 };
88
117
89
- uint32_t Index = static_cast <uint32_t >(TIndex);
90
-
91
118
#if defined(SYCL_RT_OS_LINUX)
92
119
if (__builtin_cpu_supports (" avx512f" ))
93
120
return VECTOR_WIDTH_AVX512[Index];
@@ -119,14 +146,23 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {
119
146
#endif
120
147
121
148
return VECTOR_WIDTH_SSE42[Index];
149
+
150
+ #elif defined(__ARM_NEON)
151
+ uint32_t Index = static_cast <uint32_t >(TIndex);
152
+
153
+ // NEON has 16 byte registers
154
+ static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16 , 8 , 4 , 2 , 4 , 2 , 0 };
155
+ return VECTOR_WIDTH_NEON[Index];
156
+
157
+ #endif
158
+ return 0 ;
122
159
}
123
160
124
161
void PlatformUtil::prefetch (const char *Ptr, size_t NumBytes) {
125
162
if (!Ptr)
126
163
return ;
127
164
128
- // The current implementation assumes 64-byte x86 cache lines.
129
- const size_t CacheLineSize = 64 ;
165
+ const size_t CacheLineSize = PlatformUtil::getMemCacheLineSize ();
130
166
const size_t CacheLineMask = ~(CacheLineSize - 1 );
131
167
const char *PtrEnd = Ptr + NumBytes;
132
168
0 commit comments