27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__) 31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__) 35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__) 39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__) 43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__) 47 #if !defined(_MSC_VER) || __has_feature(modules) || \ 48 (defined(__SSE4_2__) || defined(__SSE4_1__)) 52 #if !defined(_MSC_VER) || __has_feature(modules) || \ 53 (defined(__AES__) || defined(__PCLMUL__)) 57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__) 61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) 65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) 69 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) 99 #define _mm256_cvtps_ph(a, imm) __extension__ ({ \ 100 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); }) 114 static __inline __m256
__attribute__((__always_inline__, __nodebug__, __target__(
"f16c")))
115 _mm256_cvtph_ps(__m128i __a)
117 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) 125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) 129 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) 133 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__) 137 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) 141 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__) 145 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__) 149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__) 153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) 157 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) 161 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) 165 #if !defined(_MSC_VER) || __has_feature(modules) || \ 166 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) 170 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) 174 #if !defined(_MSC_VER) || __has_feature(modules) || \ 175 (defined(__AVX512VL__) && defined(__AVX512VNNI__)) 179 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) 183 #if !defined(_MSC_VER) || __has_feature(modules) || \ 184 (defined(__AVX512VL__) && defined(__AVX512BITALG__)) 188 #if !defined(_MSC_VER) || __has_feature(modules) || \ 189 (defined(__AVX512VL__) && defined(__AVX512BW__)) 193 #if !defined(_MSC_VER) || __has_feature(modules) || \ 194 (defined(__AVX512VL__) && defined(__AVX512CD__)) 198 #if !defined(_MSC_VER) || __has_feature(modules) || \ 199 (defined(__AVX512VL__) && defined(__AVX512DQ__)) 203 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__) 207 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__) 211 #if !defined(_MSC_VER) || __has_feature(modules) || \ 212 (defined(__AVX512IFMA__) && defined(__AVX512VL__)) 216 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__) 220 #if !defined(_MSC_VER) || __has_feature(modules) || \ 221 (defined(__AVX512VBMI__) && defined(__AVX512VL__)) 225 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) 229 #if !defined(_MSC_VER) || __has_feature(modules) || \ 230 (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) 234 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) 238 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) 242 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) 246 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) 250 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) 251 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
252 _rdrand16_step(
unsigned short *
__p)
254 return __builtin_ia32_rdrand16_step(__p);
257 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
258 _rdrand32_step(
unsigned int *
__p)
260 return __builtin_ia32_rdrand32_step(__p);
264 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
265 _rdrand64_step(
unsigned long long *__p)
267 return __builtin_ia32_rdrand64_step(__p);
274 _bit_scan_forward(
int __A) {
275 return __builtin_ctz(__A);
279 static __inline__
int __attribute__((__always_inline__, __nodebug__))
280 _bit_scan_reverse(
int __A) {
281 return 31 - __builtin_clz(__A);
284 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) 286 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
287 _readfsbase_u32(
void)
289 return __builtin_ia32_rdfsbase32();
292 static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
293 _readfsbase_u64(
void)
295 return __builtin_ia32_rdfsbase64();
298 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
299 _readgsbase_u32(
void)
301 return __builtin_ia32_rdgsbase32();
304 static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
305 _readgsbase_u64(
void)
307 return __builtin_ia32_rdgsbase64();
310 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
311 _writefsbase_u32(
unsigned int __V)
313 return __builtin_ia32_wrfsbase32(__V);
316 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
317 _writefsbase_u64(
unsigned long long __V)
319 return __builtin_ia32_wrfsbase64(__V);
322 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
323 _writegsbase_u32(
unsigned int __V)
325 return __builtin_ia32_wrgsbase32(__V);
328 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
329 _writegsbase_u64(
unsigned long long __V)
331 return __builtin_ia32_wrgsbase64(__V);
337 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__) 342 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__) 346 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__) 350 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) 354 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) 358 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__) 362 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__) 366 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) _mm256_cvtph_ps(__m128i __a)
Converts a 128-bit vector containing 16-bit half-precision float values into a 256-bit vector of [8 x...