24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 27 #ifndef __AVX512FINTRIN_H 28 #define __AVX512FINTRIN_H 34 typedef long long __v8di
__attribute__((__vector_size__(64)));
38 typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
45 typedef long long __m512i
__attribute__((__vector_size__(64)));
51 #define _MM_FROUND_TO_NEAREST_INT 0x00 52 #define _MM_FROUND_TO_NEG_INF 0x01 53 #define _MM_FROUND_TO_POS_INF 0x02 54 #define _MM_FROUND_TO_ZERO 0x03 55 #define _MM_FROUND_CUR_DIRECTION 0x04 65 #define _MM_CMPINT_GE _MM_CMPINT_NLT 67 #define _MM_CMPINT_GT _MM_CMPINT_NLE 176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 183 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
186 #define _mm512_setzero_epi32 _mm512_setzero_si512 191 return (__m512d)__builtin_ia32_undef512();
197 return (__m512)__builtin_ia32_undef512();
203 return (__m512)__builtin_ia32_undef512();
209 return (__m512i)__builtin_ia32_undef512();
215 return (__m512i)__builtin_shufflevector((__v4si) __A,
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
223 return (__m512i)__builtin_ia32_selectd_512(__M,
231 return (__m512i)__builtin_ia32_selectd_512(__M,
239 return (__m512i)__builtin_shufflevector((__v2di) __A,
241 0, 0, 0, 0, 0, 0, 0, 0);
247 return (__m512i)__builtin_ia32_selectq_512(__M,
256 return (__m512i)__builtin_ia32_selectq_512(__M,
265 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
269 #define _mm512_setzero _mm512_setzero_ps 274 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
280 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281 __w, __w, __w, __w, __w, __w, __w, __w };
287 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
293 return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w };
306 return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w,
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w };
315 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
316 __s, __s, __s, __s, __s, __s, __s, __s };
322 return (__m512i)__builtin_ia32_selectd_512(__M,
330 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
335 _mm512_maskz_set1_epi64(__mmask8 __M,
long long __A)
337 return (__m512i)__builtin_ia32_selectq_512(__M,
346 return (__m512)__builtin_shufflevector((__v4sf) __A,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
354 return (__m512i)(__v16si)
355 { __D, __C, __B, __A, __D, __C, __B, __A,
356 __D, __C, __B, __A, __D, __C, __B, __A };
363 return (__m512i) (__v8di)
364 { __D, __C, __B, __A, __D, __C, __B, __A };
371 { __D, __C, __B, __A, __D, __C, __B, __A };
378 { __D, __C, __B, __A, __D, __C, __B, __A,
379 __D, __C, __B, __A, __D, __C, __B, __A };
382 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 383 _mm512_set4_epi32((e3),(e2),(e1),(e0)) 385 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 386 _mm512_set4_epi64((e3),(e2),(e1),(e0)) 388 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 389 _mm512_set4_pd((e3),(e2),(e1),(e0)) 391 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 392 _mm512_set4_ps((e3),(e2),(e1),(e0)) 397 return (__m512d)__builtin_shufflevector((__v2df) __A,
399 0, 0, 0, 0, 0, 0, 0, 0);
407 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
413 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
414 -1, -1, -1, -1, -1, -1, -1, -1);
420 return __builtin_shufflevector(__a, __a, 0, 1);
426 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
432 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
438 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
444 return (__m512) (__A);
450 return (__m512i) (__A);
456 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
462 return (__m512d) (__A);
468 return (__m512i) (__A);
474 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
480 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
486 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
492 return (__m512) (__A);
498 return (__m512d) (__A);
504 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
510 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
516 return (__mmask16)__a;
541 return __builtin_shufflevector((__v2df)__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
560 return __builtin_shufflevector((__v4df)__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
578 return __builtin_shufflevector((__v4sf)__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
596 return __builtin_shufflevector((__v8sf)__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
614 return __builtin_shufflevector((__v2di)__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
632 return __builtin_shufflevector((__v4di)__a, (__v4di)
_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
639 return (__m512i)((__v16su)__a & (__v16su)
__b);
645 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
660 return (__m512i)((__v8du)__a & (__v8du)
__b);
666 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
681 return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
687 return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
693 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
708 return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
714 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
729 return (__m512i)((__v16su)__a | (__v16su)
__b);
735 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
749 return (__m512i)((__v8du)__a | (__v8du)
__b);
755 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
769 return (__m512i)((__v16su)__a ^ (__v16su)
__b);
775 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
789 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
795 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
809 return (__m512i)((__v8du)__a & (__v8du)
__b);
815 return (__m512i)((__v8du)__a | (__v8du)
__b);
821 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
829 return (__m512d)((__v8df)__a + (__v8df)
__b);
835 return (__m512)((__v16sf)__a + (__v16sf)
__b);
841 return (__m512d)((__v8df)__a * (__v8df)
__b);
847 return (__m512)((__v16sf)__a * (__v16sf)
__b);
853 return (__m512d)((__v8df)__a - (__v8df)
__b);
859 return (__m512)((__v16sf)__a - (__v16sf)
__b);
865 return (__m512i) ((__v8du) __A + (__v8du) __B);
871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
887 return (__m512i) ((__v8du) __A - (__v8du) __B);
893 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
901 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
909 return (__m512i) ((__v16su) __A + (__v16su) __B);
915 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
923 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
931 return (__m512i) ((__v16su) __A - (__v16su) __B);
937 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
945 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
950 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \ 951 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 952 (__v8df)(__m512d)(B), \ 953 (__v8df)(__m512d)(W), (__mmask8)(U), \ 956 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \ 957 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 958 (__v8df)(__m512d)(B), \ 959 (__v8df)_mm512_setzero_pd(), \ 960 (__mmask8)(U), (int)(R)); }) 962 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \ 963 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \ 964 (__v8df)(__m512d)(B), \ 965 (__v8df)_mm512_undefined_pd(), \ 966 (__mmask8)-1, (int)(R)); }) 971 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
982 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
992 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
1000 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \ 1001 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 1002 (__v16sf)(__m512)(B), \ 1003 (__v16sf)(__m512)(W), (__mmask16)(U), \ 1006 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \ 1007 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 1008 (__v16sf)(__m512)(B), \ 1009 (__v16sf)_mm512_setzero_ps(), \ 1010 (__mmask16)(U), (int)(R)); }) 1012 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \ 1013 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \ 1014 (__v16sf)(__m512)(B), \ 1015 (__v16sf)_mm512_undefined_ps(), \ 1016 (__mmask16)-1, (int)(R)); }) 1021 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1032 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1042 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1052 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1061 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1068 #define _mm_max_round_ss(A, B, R) __extension__ ({ \ 1069 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1070 (__v4sf)(__m128)(B), \ 1071 (__v4sf)_mm_setzero_ps(), \ 1072 (__mmask8)-1, (int)(R)); }) 1074 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ 1075 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1076 (__v4sf)(__m128)(B), \ 1077 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1080 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ 1081 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1082 (__v4sf)(__m128)(B), \ 1083 (__v4sf)_mm_setzero_ps(), \ 1084 (__mmask8)(U), (int)(R)); }) 1088 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1097 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1104 #define _mm_max_round_sd(A, B, R) __extension__ ({ \ 1105 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1106 (__v2df)(__m128d)(B), \ 1107 (__v2df)_mm_setzero_pd(), \ 1108 (__mmask8)-1, (int)(R)); }) 1110 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ 1111 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1112 (__v2df)(__m128d)(B), \ 1113 (__v2df)(__m128d)(W), \ 1114 (__mmask8)(U), (int)(R)); }) 1116 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ 1117 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1118 (__v2df)(__m128d)(B), \ 1119 (__v2df)_mm_setzero_pd(), \ 1120 (__mmask8)(U), (int)(R)); }) 1122 static __inline __m512i
1126 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1136 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1138 (__v16si) __W, __M);
1144 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1154 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1164 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1166 (__v16si) __W, __M);
1172 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1182 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1192 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1200 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1210 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1220 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1228 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1235 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \ 1236 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1237 (__v8df)(__m512d)(B), \ 1238 (__v8df)(__m512d)(W), (__mmask8)(U), \ 1241 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \ 1242 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1243 (__v8df)(__m512d)(B), \ 1244 (__v8df)_mm512_setzero_pd(), \ 1245 (__mmask8)(U), (int)(R)); }) 1247 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \ 1248 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \ 1249 (__v8df)(__m512d)(B), \ 1250 (__v8df)_mm512_undefined_pd(), \ 1251 (__mmask8)-1, (int)(R)); }) 1256 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1267 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1274 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \ 1275 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1276 (__v16sf)(__m512)(B), \ 1277 (__v16sf)(__m512)(W), (__mmask16)(U), \ 1280 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \ 1281 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1282 (__v16sf)(__m512)(B), \ 1283 (__v16sf)_mm512_setzero_ps(), \ 1284 (__mmask16)(U), (int)(R)); }) 1286 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \ 1287 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \ 1288 (__v16sf)(__m512)(B), \ 1289 (__v16sf)_mm512_undefined_ps(), \ 1290 (__mmask16)-1, (int)(R)); }) 1295 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1306 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1317 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1327 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1337 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1346 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1353 #define _mm_min_round_ss(A, B, R) __extension__ ({ \ 1354 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1355 (__v4sf)(__m128)(B), \ 1356 (__v4sf)_mm_setzero_ps(), \ 1357 (__mmask8)-1, (int)(R)); }) 1359 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ 1360 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1361 (__v4sf)(__m128)(B), \ 1362 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1365 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ 1366 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1367 (__v4sf)(__m128)(B), \ 1368 (__v4sf)_mm_setzero_ps(), \ 1369 (__mmask8)(U), (int)(R)); }) 1373 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1382 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1389 #define _mm_min_round_sd(A, B, R) __extension__ ({ \ 1390 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1391 (__v2df)(__m128d)(B), \ 1392 (__v2df)_mm_setzero_pd(), \ 1393 (__mmask8)-1, (int)(R)); }) 1395 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ 1396 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1397 (__v2df)(__m128d)(B), \ 1398 (__v2df)(__m128d)(W), \ 1399 (__mmask8)(U), (int)(R)); }) 1401 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ 1402 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1403 (__v2df)(__m128d)(B), \ 1404 (__v2df)_mm_setzero_pd(), \ 1405 (__mmask8)(U), (int)(R)); }) 1407 static __inline __m512i
1411 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1421 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1423 (__v16si) __W, __M);
1429 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1439 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1449 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1451 (__v16si) __W, __M);
1457 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1467 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1477 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1485 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1495 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1505 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1513 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1523 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1529 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1537 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1545 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1551 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1559 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1567 return (__m512i) ((__v16su) __A * (__v16su) __B);
1573 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1581 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1586 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \ 1587 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1588 (__v8df)(__m512d)(W), (__mmask8)(U), \ 1591 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \ 1592 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1593 (__v8df)_mm512_setzero_pd(), \ 1594 (__mmask8)(U), (int)(R)); }) 1596 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \ 1597 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ 1598 (__v8df)_mm512_undefined_pd(), \ 1599 (__mmask8)-1, (int)(R)); }) 1604 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1613 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1622 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1629 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \ 1630 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1631 (__v16sf)(__m512)(W), (__mmask16)(U), \ 1634 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \ 1635 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1636 (__v16sf)_mm512_setzero_ps(), \ 1637 (__mmask16)(U), (int)(R)); }) 1639 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \ 1640 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ 1641 (__v16sf)_mm512_undefined_ps(), \ 1642 (__mmask16)-1, (int)(R)); }) 1647 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1656 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1665 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1674 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1682 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1690 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1699 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1708 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1716 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1725 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1735 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1744 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1753 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1763 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1772 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1781 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1790 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1798 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1807 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1816 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1824 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1833 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1843 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1852 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1861 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1871 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1880 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1889 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1898 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1907 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1916 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1925 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1934 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1943 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1952 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1961 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1970 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1978 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1987 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1996 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2004 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2012 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2021 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2028 #define _mm_add_round_ss(A, B, R) __extension__ ({ \ 2029 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 2030 (__v4sf)(__m128)(B), \ 2031 (__v4sf)_mm_setzero_ps(), \ 2032 (__mmask8)-1, (int)(R)); }) 2034 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ 2035 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 2036 (__v4sf)(__m128)(B), \ 2037 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2040 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ 2041 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 2042 (__v4sf)(__m128)(B), \ 2043 (__v4sf)_mm_setzero_ps(), \ 2044 (__mmask8)(U), (int)(R)); }) 2048 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2057 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2063 #define _mm_add_round_sd(A, B, R) __extension__ ({ \ 2064 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 2065 (__v2df)(__m128d)(B), \ 2066 (__v2df)_mm_setzero_pd(), \ 2067 (__mmask8)-1, (int)(R)); }) 2069 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ 2070 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 2071 (__v2df)(__m128d)(B), \ 2072 (__v2df)(__m128d)(W), \ 2073 (__mmask8)(U), (int)(R)); }) 2075 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ 2076 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 2077 (__v2df)(__m128d)(B), \ 2078 (__v2df)_mm_setzero_pd(), \ 2079 (__mmask8)(U), (int)(R)); }) 2083 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2090 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2097 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2104 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2109 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \ 2110 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2111 (__v8df)(__m512d)(B), \ 2112 (__v8df)_mm512_setzero_pd(), \ 2113 (__mmask8)-1, (int)(R)); }) 2115 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ 2116 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2117 (__v8df)(__m512d)(B), \ 2118 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2121 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ 2122 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2123 (__v8df)(__m512d)(B), \ 2124 (__v8df)_mm512_setzero_pd(), \ 2125 (__mmask8)(U), (int)(R)); }) 2127 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \ 2128 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2129 (__v16sf)(__m512)(B), \ 2130 (__v16sf)_mm512_setzero_ps(), \ 2131 (__mmask16)-1, (int)(R)); }) 2133 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ 2134 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2135 (__v16sf)(__m512)(B), \ 2136 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2139 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ 2140 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2141 (__v16sf)(__m512)(B), \ 2142 (__v16sf)_mm512_setzero_ps(), \ 2143 (__mmask16)(U), (int)(R)); }) 2147 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2156 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2162 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \ 2163 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2164 (__v4sf)(__m128)(B), \ 2165 (__v4sf)_mm_setzero_ps(), \ 2166 (__mmask8)-1, (int)(R)); }) 2168 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ 2169 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2170 (__v4sf)(__m128)(B), \ 2171 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2174 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ 2175 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2176 (__v4sf)(__m128)(B), \ 2177 (__v4sf)_mm_setzero_ps(), \ 2178 (__mmask8)(U), (int)(R)); }) 2182 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2191 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2198 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \ 2199 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2200 (__v2df)(__m128d)(B), \ 2201 (__v2df)_mm_setzero_pd(), \ 2202 (__mmask8)-1, (int)(R)); }) 2204 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ 2205 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2206 (__v2df)(__m128d)(B), \ 2207 (__v2df)(__m128d)(W), \ 2208 (__mmask8)(U), (int)(R)); }) 2210 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ 2211 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2212 (__v2df)(__m128d)(B), \ 2213 (__v2df)_mm_setzero_pd(), \ 2214 (__mmask8)(U), (int)(R)); }) 2218 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2225 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2232 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2239 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2244 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ 2245 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2246 (__v8df)(__m512d)(B), \ 2247 (__v8df)_mm512_setzero_pd(), \ 2248 (__mmask8)-1, (int)(R)); }) 2250 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ 2251 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2252 (__v8df)(__m512d)(B), \ 2253 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2256 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ 2257 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2258 (__v8df)(__m512d)(B), \ 2259 (__v8df)_mm512_setzero_pd(), \ 2260 (__mmask8)(U), (int)(R)); }) 2262 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ 2263 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2264 (__v16sf)(__m512)(B), \ 2265 (__v16sf)_mm512_setzero_ps(), \ 2266 (__mmask16)-1, (int)(R)); }) 2268 #define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ 2269 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2270 (__v16sf)(__m512)(B), \ 2271 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2274 #define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ 2275 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2276 (__v16sf)(__m512)(B), \ 2277 (__v16sf)_mm512_setzero_ps(), \ 2278 (__mmask16)(U), (int)(R)); }); 2282 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2291 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2297 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \ 2298 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2299 (__v4sf)(__m128)(B), \ 2300 (__v4sf)_mm_setzero_ps(), \ 2301 (__mmask8)-1, (int)(R)); }) 2303 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ 2304 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2305 (__v4sf)(__m128)(B), \ 2306 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2309 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ 2310 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2311 (__v4sf)(__m128)(B), \ 2312 (__v4sf)_mm_setzero_ps(), \ 2313 (__mmask8)(U), (int)(R)); }) 2317 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2326 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2333 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \ 2334 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2335 (__v2df)(__m128d)(B), \ 2336 (__v2df)_mm_setzero_pd(), \ 2337 (__mmask8)-1, (int)(R)); }) 2339 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ 2340 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2341 (__v2df)(__m128d)(B), \ 2342 (__v2df)(__m128d)(W), \ 2343 (__mmask8)(U), (int)(R)); }) 2345 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ 2346 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2347 (__v2df)(__m128d)(B), \ 2348 (__v2df)_mm_setzero_pd(), \ 2349 (__mmask8)(U), (int)(R)); }) 2353 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2360 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2367 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2374 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2379 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ 2380 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2381 (__v8df)(__m512d)(B), \ 2382 (__v8df)_mm512_setzero_pd(), \ 2383 (__mmask8)-1, (int)(R)); }) 2385 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ 2386 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2387 (__v8df)(__m512d)(B), \ 2388 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2391 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ 2392 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2393 (__v8df)(__m512d)(B), \ 2394 (__v8df)_mm512_setzero_pd(), \ 2395 (__mmask8)(U), (int)(R)); }) 2397 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ 2398 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2399 (__v16sf)(__m512)(B), \ 2400 (__v16sf)_mm512_setzero_ps(), \ 2401 (__mmask16)-1, (int)(R)); }) 2403 #define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ 2404 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2405 (__v16sf)(__m512)(B), \ 2406 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2409 #define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ 2410 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2411 (__v16sf)(__m512)(B), \ 2412 (__v16sf)_mm512_setzero_ps(), \ 2413 (__mmask16)(U), (int)(R)); }); 2417 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2426 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2433 #define _mm_div_round_ss(A, B, R) __extension__ ({ \ 2434 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2435 (__v4sf)(__m128)(B), \ 2436 (__v4sf)_mm_setzero_ps(), \ 2437 (__mmask8)-1, (int)(R)); }) 2439 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ 2440 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2441 (__v4sf)(__m128)(B), \ 2442 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2445 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ 2446 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2447 (__v4sf)(__m128)(B), \ 2448 (__v4sf)_mm_setzero_ps(), \ 2449 (__mmask8)(U), (int)(R)); }) 2453 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2462 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2469 #define _mm_div_round_sd(A, B, R) __extension__ ({ \ 2470 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2471 (__v2df)(__m128d)(B), \ 2472 (__v2df)_mm_setzero_pd(), \ 2473 (__mmask8)-1, (int)(R)); }) 2475 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ 2476 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2477 (__v2df)(__m128d)(B), \ 2478 (__v2df)(__m128d)(W), \ 2479 (__mmask8)(U), (int)(R)); }) 2481 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ 2482 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2483 (__v2df)(__m128d)(B), \ 2484 (__v2df)_mm_setzero_pd(), \ 2485 (__mmask8)(U), (int)(R)); }) 2490 return (__m512d)((__v8df)__a/(__v8df)
__b);
2495 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2502 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2510 return (__m512)((__v16sf)__a/(__v16sf)
__b);
2515 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2522 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2527 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \ 2528 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2529 (__v8df)(__m512d)(B), \ 2530 (__v8df)_mm512_setzero_pd(), \ 2531 (__mmask8)-1, (int)(R)); }) 2533 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ 2534 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2535 (__v8df)(__m512d)(B), \ 2536 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2539 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ 2540 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2541 (__v8df)(__m512d)(B), \ 2542 (__v8df)_mm512_setzero_pd(), \ 2543 (__mmask8)(U), (int)(R)); }) 2545 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \ 2546 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2547 (__v16sf)(__m512)(B), \ 2548 (__v16sf)_mm512_setzero_ps(), \ 2549 (__mmask16)-1, (int)(R)); }) 2551 #define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ 2552 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2553 (__v16sf)(__m512)(B), \ 2554 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2557 #define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ 2558 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2559 (__v16sf)(__m512)(B), \ 2560 (__v16sf)_mm512_setzero_ps(), \ 2561 (__mmask16)(U), (int)(R)); }); 2563 #define _mm512_roundscale_ps(A, B) __extension__ ({ \ 2564 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2565 (__v16sf)(__m512)(A), (__mmask16)-1, \ 2566 _MM_FROUND_CUR_DIRECTION); }) 2568 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ 2569 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2570 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2571 _MM_FROUND_CUR_DIRECTION); }) 2573 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ 2574 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2575 (__v16sf)_mm512_setzero_ps(), \ 2577 _MM_FROUND_CUR_DIRECTION); }) 2579 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \ 2580 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2581 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2584 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \ 2585 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2586 (__v16sf)_mm512_setzero_ps(), \ 2587 (__mmask16)(A), (int)(R)); }) 2589 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \ 2590 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2591 (__v16sf)_mm512_undefined_ps(), \ 2592 (__mmask16)-1, (int)(R)); }) 2594 #define _mm512_roundscale_pd(A, B) __extension__ ({ \ 2595 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2596 (__v8df)(__m512d)(A), (__mmask8)-1, \ 2597 _MM_FROUND_CUR_DIRECTION); }) 2599 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ 2600 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2601 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2602 _MM_FROUND_CUR_DIRECTION); }) 2604 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ 2605 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2606 (__v8df)_mm512_setzero_pd(), \ 2608 _MM_FROUND_CUR_DIRECTION); }) 2610 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \ 2611 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2612 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2615 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \ 2616 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2617 (__v8df)_mm512_setzero_pd(), \ 2618 (__mmask8)(A), (int)(R)); }) 2620 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \ 2621 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2622 (__v8df)_mm512_undefined_pd(), \ 2623 (__mmask8)-1, (int)(R)); }) 2625 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 2626 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2627 (__v8df)(__m512d)(B), \ 2628 (__v8df)(__m512d)(C), (__mmask8)-1, \ 2632 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2633 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2634 (__v8df)(__m512d)(B), \ 2635 (__v8df)(__m512d)(C), \ 2636 (__mmask8)(U), (int)(R)); }) 2639 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2640 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2641 (__v8df)(__m512d)(B), \ 2642 (__v8df)(__m512d)(C), \ 2643 (__mmask8)(U), (int)(R)); }) 2646 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2647 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2648 (__v8df)(__m512d)(B), \ 2649 (__v8df)(__m512d)(C), \ 2650 (__mmask8)(U), (int)(R)); }) 2653 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 2654 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2655 (__v8df)(__m512d)(B), \ 2656 -(__v8df)(__m512d)(C), \ 2657 (__mmask8)-1, (int)(R)); }) 2660 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2661 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2662 (__v8df)(__m512d)(B), \ 2663 -(__v8df)(__m512d)(C), \ 2664 (__mmask8)(U), (int)(R)); }) 2667 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2668 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2669 (__v8df)(__m512d)(B), \ 2670 -(__v8df)(__m512d)(C), \ 2671 (__mmask8)(U), (int)(R)); }) 2674 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 2675 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2676 (__v8df)(__m512d)(B), \ 2677 (__v8df)(__m512d)(C), (__mmask8)-1, \ 2681 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2682 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2683 (__v8df)(__m512d)(B), \ 2684 (__v8df)(__m512d)(C), \ 2685 (__mmask8)(U), (int)(R)); }) 2688 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2689 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2690 (__v8df)(__m512d)(B), \ 2691 (__v8df)(__m512d)(C), \ 2692 (__mmask8)(U), (int)(R)); }) 2695 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 2696 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2697 (__v8df)(__m512d)(B), \ 2698 -(__v8df)(__m512d)(C), \ 2699 (__mmask8)-1, (int)(R)); }) 2702 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2703 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2704 (__v8df)(__m512d)(B), \ 2705 -(__v8df)(__m512d)(C), \ 2706 (__mmask8)(U), (int)(R)); }) 2712 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2722 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2732 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2742 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2752 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2762 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2772 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2782 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2792 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2802 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2812 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2822 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2829 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 2830 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2831 (__v16sf)(__m512)(B), \ 2832 (__v16sf)(__m512)(C), (__mmask16)-1, \ 2836 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2837 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2838 (__v16sf)(__m512)(B), \ 2839 (__v16sf)(__m512)(C), \ 2840 (__mmask16)(U), (int)(R)); }) 2843 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2844 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2845 (__v16sf)(__m512)(B), \ 2846 (__v16sf)(__m512)(C), \ 2847 (__mmask16)(U), (int)(R)); }) 2850 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2851 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2852 (__v16sf)(__m512)(B), \ 2853 (__v16sf)(__m512)(C), \ 2854 (__mmask16)(U), (int)(R)); }) 2857 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 2858 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2859 (__v16sf)(__m512)(B), \ 2860 -(__v16sf)(__m512)(C), \ 2861 (__mmask16)-1, (int)(R)); }) 2864 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2865 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2866 (__v16sf)(__m512)(B), \ 2867 -(__v16sf)(__m512)(C), \ 2868 (__mmask16)(U), (int)(R)); }) 2871 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2872 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2873 (__v16sf)(__m512)(B), \ 2874 -(__v16sf)(__m512)(C), \ 2875 (__mmask16)(U), (int)(R)); }) 2878 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 2879 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2880 (__v16sf)(__m512)(B), \ 2881 (__v16sf)(__m512)(C), (__mmask16)-1, \ 2885 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2886 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2887 (__v16sf)(__m512)(B), \ 2888 (__v16sf)(__m512)(C), \ 2889 (__mmask16)(U), (int)(R)); }) 2892 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2893 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2894 (__v16sf)(__m512)(B), \ 2895 (__v16sf)(__m512)(C), \ 2896 (__mmask16)(U), (int)(R)); }) 2899 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 2900 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2901 (__v16sf)(__m512)(B), \ 2902 -(__v16sf)(__m512)(C), \ 2903 (__mmask16)-1, (int)(R)); }) 2906 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2907 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2908 (__v16sf)(__m512)(B), \ 2909 -(__v16sf)(__m512)(C), \ 2910 (__mmask16)(U), (int)(R)); }) 2916 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2926 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2936 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2946 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2956 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2966 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2976 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2986 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2996 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3006 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3016 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3026 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3033 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 3034 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3035 (__v8df)(__m512d)(B), \ 3036 (__v8df)(__m512d)(C), \ 3037 (__mmask8)-1, (int)(R)); }) 3040 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3041 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3042 (__v8df)(__m512d)(B), \ 3043 (__v8df)(__m512d)(C), \ 3044 (__mmask8)(U), (int)(R)); }) 3047 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3048 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 3049 (__v8df)(__m512d)(B), \ 3050 (__v8df)(__m512d)(C), \ 3051 (__mmask8)(U), (int)(R)); }) 3054 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 3055 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 3056 (__v8df)(__m512d)(B), \ 3057 (__v8df)(__m512d)(C), \ 3058 (__mmask8)(U), (int)(R)); }) 3061 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 3062 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3063 (__v8df)(__m512d)(B), \ 3064 -(__v8df)(__m512d)(C), \ 3065 (__mmask8)-1, (int)(R)); }) 3068 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3069 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3070 (__v8df)(__m512d)(B), \ 3071 -(__v8df)(__m512d)(C), \ 3072 (__mmask8)(U), (int)(R)); }) 3075 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 3076 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 3077 (__v8df)(__m512d)(B), \ 3078 -(__v8df)(__m512d)(C), \ 3079 (__mmask8)(U), (int)(R)); }) 3085 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3095 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3105 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3115 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3125 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3135 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3145 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3152 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 3153 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3154 (__v16sf)(__m512)(B), \ 3155 (__v16sf)(__m512)(C), \ 3156 (__mmask16)-1, (int)(R)); }) 3159 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3160 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3161 (__v16sf)(__m512)(B), \ 3162 (__v16sf)(__m512)(C), \ 3163 (__mmask16)(U), (int)(R)); }) 3166 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3167 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 3168 (__v16sf)(__m512)(B), \ 3169 (__v16sf)(__m512)(C), \ 3170 (__mmask16)(U), (int)(R)); }) 3173 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 3174 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3175 (__v16sf)(__m512)(B), \ 3176 (__v16sf)(__m512)(C), \ 3177 (__mmask16)(U), (int)(R)); }) 3180 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 3181 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3182 (__v16sf)(__m512)(B), \ 3183 -(__v16sf)(__m512)(C), \ 3184 (__mmask16)-1, (int)(R)); }) 3187 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3188 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3189 (__v16sf)(__m512)(B), \ 3190 -(__v16sf)(__m512)(C), \ 3191 (__mmask16)(U), (int)(R)); }) 3194 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 3195 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3196 (__v16sf)(__m512)(B), \ 3197 -(__v16sf)(__m512)(C), \ 3198 (__mmask16)(U), (int)(R)); }) 3204 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3214 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3224 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3234 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3244 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3254 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3264 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3271 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3272 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3273 (__v8df)(__m512d)(B), \ 3274 (__v8df)(__m512d)(C), \ 3275 (__mmask8)(U), (int)(R)); }) 3281 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3288 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3289 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3290 (__v16sf)(__m512)(B), \ 3291 (__v16sf)(__m512)(C), \ 3292 (__mmask16)(U), (int)(R)); }) 3298 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3305 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 3306 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3307 (__v8df)(__m512d)(B), \ 3308 (__v8df)(__m512d)(C), \ 3309 (__mmask8)(U), (int)(R)); }) 3315 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3322 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 3323 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3324 (__v16sf)(__m512)(B), \ 3325 (__v16sf)(__m512)(C), \ 3326 (__mmask16)(U), (int)(R)); }) 3332 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3339 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3340 (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \ 3341 (__v8df)(__m512d)(B), \ 3342 (__v8df)(__m512d)(C), \ 3343 (__mmask8)(U), (int)(R)); }) 3349 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3356 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3357 (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \ 3358 (__v16sf)(__m512)(B), \ 3359 (__v16sf)(__m512)(C), \ 3360 (__mmask16)(U), (int)(R)); }) 3366 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3373 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3374 (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \ 3375 (__v8df)(__m512d)(B), \ 3376 (__v8df)(__m512d)(C), \ 3377 (__mmask8)(U), (int)(R)); }) 3380 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3381 (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \ 3382 (__v8df)(__m512d)(B), \ 3383 (__v8df)(__m512d)(C), \ 3384 (__mmask8)(U), (int)(R)); }) 3390 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3400 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3407 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3408 (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \ 3409 (__v16sf)(__m512)(B), \ 3410 (__v16sf)(__m512)(C), \ 3411 (__mmask16)(U), (int)(R)); }) 3414 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3415 (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \ 3416 (__v16sf)(__m512)(B), \ 3417 (__v16sf)(__m512)(C), \ 3418 (__mmask16)(U), (int)(R)); }) 3424 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3434 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3448 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3457 __m512i __I, __m512i __B)
3459 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3468 __m512i __I, __m512i __B)
3470 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3480 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3491 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3501 __m512i __I, __m512i __B)
3503 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3510 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 3511 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ 3512 (__v8di)(__m512i)(A), \ 3513 ((int)(I) & 0x7) + 0, \ 3514 ((int)(I) & 0x7) + 1, \ 3515 ((int)(I) & 0x7) + 2, \ 3516 ((int)(I) & 0x7) + 3, \ 3517 ((int)(I) & 0x7) + 4, \ 3518 ((int)(I) & 0x7) + 5, \ 3519 ((int)(I) & 0x7) + 6, \ 3520 ((int)(I) & 0x7) + 7); }) 3522 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ 3523 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3524 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3525 (__v8di)(__m512i)(W)); }) 3527 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ 3528 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3529 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3530 (__v8di)_mm512_setzero_si512()); }) 3532 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 3533 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ 3534 (__v16si)(__m512i)(A), \ 3535 ((int)(I) & 0xf) + 0, \ 3536 ((int)(I) & 0xf) + 1, \ 3537 ((int)(I) & 0xf) + 2, \ 3538 ((int)(I) & 0xf) + 3, \ 3539 ((int)(I) & 0xf) + 4, \ 3540 ((int)(I) & 0xf) + 5, \ 3541 ((int)(I) & 0xf) + 6, \ 3542 ((int)(I) & 0xf) + 7, \ 3543 ((int)(I) & 0xf) + 8, \ 3544 ((int)(I) & 0xf) + 9, \ 3545 ((int)(I) & 0xf) + 10, \ 3546 ((int)(I) & 0xf) + 11, \ 3547 ((int)(I) & 0xf) + 12, \ 3548 ((int)(I) & 0xf) + 13, \ 3549 ((int)(I) & 0xf) + 14, \ 3550 ((int)(I) & 0xf) + 15); }) 3552 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ 3553 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3554 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3555 (__v16si)(__m512i)(W)); }) 3557 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ 3558 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3559 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3560 (__v16si)_mm512_setzero_si512()); }) 3563 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 3564 (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 3565 (__v8df)_mm512_undefined_pd(), \ 3566 ((I) & 1) ? 4 : 0, \ 3567 ((I) & 1) ? 5 : 1, \ 3568 ((I) & 1) ? 6 : 2, \ 3569 ((I) & 1) ? 7 : 3); }) 3571 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ 3572 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3573 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3576 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ 3577 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 3578 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ 3579 (__v4df)_mm256_setzero_pd()); }) 3581 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 3582 (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ 3583 (__v16sf)_mm512_undefined_ps(), \ 3584 0 + ((I) & 0x3) * 4, \ 3585 1 + ((I) & 0x3) * 4, \ 3586 2 + ((I) & 0x3) * 4, \ 3587 3 + ((I) & 0x3) * 4); }) 3589 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ 3590 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3591 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3594 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ 3595 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 3596 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ 3597 (__v4sf)_mm_setzero_ps()); }) 3604 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3612 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3620 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3628 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3635 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 3636 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3637 (__v16sf)(__m512)(B), (int)(P), \ 3638 (__mmask16)-1, (int)(R)); }) 3640 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 3641 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3642 (__v16sf)(__m512)(B), (int)(P), \ 3643 (__mmask16)(U), (int)(R)); }) 3645 #define _mm512_cmp_ps_mask(A, B, P) \ 3646 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3647 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3648 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3650 #define _mm512_cmpeq_ps_mask(A, B) \ 3651 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3652 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3653 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3655 #define _mm512_cmplt_ps_mask(A, B) \ 3656 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3657 #define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3658 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3660 #define _mm512_cmple_ps_mask(A, B) \ 3661 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3662 #define _mm512_mask_cmple_ps_mask(k, A, B) \ 3663 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3665 #define _mm512_cmpunord_ps_mask(A, B) \ 3666 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3667 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3668 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3670 #define _mm512_cmpneq_ps_mask(A, B) \ 3671 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3672 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3673 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3675 #define _mm512_cmpnlt_ps_mask(A, B) \ 3676 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3677 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3678 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3680 #define _mm512_cmpnle_ps_mask(A, B) \ 3681 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3682 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3683 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3685 #define _mm512_cmpord_ps_mask(A, B) \ 3686 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3687 #define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3688 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3690 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 3691 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3692 (__v8df)(__m512d)(B), (int)(P), \ 3693 (__mmask8)-1, (int)(R)); }) 3695 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 3696 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3697 (__v8df)(__m512d)(B), (int)(P), \ 3698 (__mmask8)(U), (int)(R)); }) 3700 #define _mm512_cmp_pd_mask(A, B, P) \ 3701 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3702 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3703 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3705 #define _mm512_cmpeq_pd_mask(A, B) \ 3706 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3707 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3708 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3710 #define _mm512_cmplt_pd_mask(A, B) \ 3711 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3712 #define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3713 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3715 #define _mm512_cmple_pd_mask(A, B) \ 3716 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3717 #define _mm512_mask_cmple_pd_mask(k, A, B) \ 3718 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3720 #define _mm512_cmpunord_pd_mask(A, B) \ 3721 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3722 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3723 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3725 #define _mm512_cmpneq_pd_mask(A, B) \ 3726 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3727 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3728 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3730 #define _mm512_cmpnlt_pd_mask(A, B) \ 3731 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3732 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3733 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3735 #define _mm512_cmpnle_pd_mask(A, B) \ 3736 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3737 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3738 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3740 #define _mm512_cmpord_pd_mask(A, B) \ 3741 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3742 #define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3743 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3747 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ 3748 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3749 (__v16si)_mm512_undefined_epi32(), \ 3750 (__mmask16)-1, (int)(R)); }) 3752 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \ 3753 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3754 (__v16si)(__m512i)(W), \ 3755 (__mmask16)(U), (int)(R)); }) 3757 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \ 3758 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3759 (__v16si)_mm512_setzero_si512(), \ 3760 (__mmask16)(U), (int)(R)); }) 3766 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3776 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3785 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3791 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 3792 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3793 (__v16sf)_mm512_setzero_ps(), \ 3794 (__mmask16)-1, (int)(R)); }) 3796 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \ 3797 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3798 (__v16sf)(__m512)(W), \ 3799 (__mmask16)(U), (int)(R)); }) 3801 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \ 3802 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3803 (__v16sf)_mm512_setzero_ps(), \ 3804 (__mmask16)(U), (int)(R)); }) 3806 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 3807 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3808 (__v16sf)_mm512_setzero_ps(), \ 3809 (__mmask16)-1, (int)(R)); }) 3811 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \ 3812 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3813 (__v16sf)(__m512)(W), \ 3814 (__mmask16)(U), (int)(R)); }) 3816 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \ 3817 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3818 (__v16sf)_mm512_setzero_ps(), \ 3819 (__mmask16)(U), (int)(R)); }) 3824 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3833 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3842 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3851 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3857 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3865 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3885 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3894 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3903 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3912 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3918 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3926 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3943 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 3944 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3945 (__v8sf)_mm256_setzero_ps(), \ 3946 (__mmask8)-1, (int)(R)); }) 3948 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ 3949 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3950 (__v8sf)(__m256)(W), (__mmask8)(U), \ 3953 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ 3954 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3955 (__v8sf)_mm256_setzero_ps(), \ 3956 (__mmask8)(U), (int)(R)); }) 3961 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3970 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3979 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3988 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3990 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3996 return (__m512) __builtin_shufflevector (
4000 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4003 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \ 4004 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4005 (__v16hi)_mm256_undefined_si256(), \ 4008 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \ 4009 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4010 (__v16hi)(__m256i)(U), \ 4013 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \ 4014 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4015 (__v16hi)_mm256_setzero_si256(), \ 4018 #define _mm512_cvtps_ph(A, I) __extension__ ({ \ 4019 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4020 (__v16hi)_mm256_setzero_si256(), \ 4023 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ 4024 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4025 (__v16hi)(__m256i)(U), \ 4028 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ 4029 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 4030 (__v16hi)_mm256_setzero_si256(), \ 4033 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \ 4034 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 4035 (__v16sf)_mm512_undefined_ps(), \ 4036 (__mmask16)-1, (int)(R)); }) 4038 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \ 4039 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 4040 (__v16sf)(__m512)(W), \ 4041 (__mmask16)(U), (int)(R)); }) 4043 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \ 4044 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 4045 (__v16sf)_mm512_setzero_ps(), \ 4046 (__mmask16)(U), (int)(R)); }) 4052 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4062 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4071 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4077 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 4078 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 4079 (__v8si)_mm256_setzero_si256(), \ 4080 (__mmask8)-1, (int)(R)); }) 4082 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 4083 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 4084 (__v8si)(__m256i)(W), \ 4085 (__mmask8)(U), (int)(R)); }) 4087 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \ 4088 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 4089 (__v8si)_mm256_setzero_si256(), \ 4090 (__mmask8)(U), (int)(R)); }) 4095 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4104 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4113 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4119 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 4120 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4121 (__v16si)_mm512_setzero_si512(), \ 4122 (__mmask16)-1, (int)(R)); }) 4124 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4125 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4126 (__v16si)(__m512i)(W), \ 4127 (__mmask16)(U), (int)(R)); }) 4129 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \ 4130 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 4131 (__v16si)_mm512_setzero_si512(), \ 4132 (__mmask16)(U), (int)(R)); }) 4138 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4146 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4155 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4161 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 4162 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4163 (__v16si)_mm512_setzero_si512(), \ 4164 (__mmask16)-1, (int)(R)); }) 4166 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \ 4167 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4168 (__v16si)(__m512i)(W), \ 4169 (__mmask16)(U), (int)(R)); }) 4171 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \ 4172 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 4173 (__v16si)_mm512_setzero_si512(), \ 4174 (__mmask16)(U), (int)(R)); }) 4179 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4188 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4197 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4204 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 4205 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4206 (__v8si)_mm256_setzero_si256(), \ 4207 (__mmask8)-1, (int)(R)); }) 4209 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 4210 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4211 (__v8si)(__m256i)(W), \ 4212 (__mmask8)(U), (int)(R)); }) 4214 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ 4215 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 4216 (__v8si)_mm256_setzero_si256(), \ 4217 (__mmask8)(U), (int)(R)); }) 4222 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4232 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4241 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4248 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 4249 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4250 (__v16si)_mm512_setzero_si512(), \ 4251 (__mmask16)-1, (int)(R)); }) 4253 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \ 4254 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4255 (__v16si)(__m512i)(W), \ 4256 (__mmask16)(U), (int)(R)); }) 4258 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \ 4259 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4260 (__v16si)_mm512_setzero_si512(), \ 4261 (__mmask16)(U), (int)(R)); }) 4266 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4276 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4285 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4292 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 4293 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4294 (__v8si)_mm256_setzero_si256(), \ 4295 (__mmask8)-1, (int)(R)); }) 4297 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 4298 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4300 (__mmask8)(U), (int)(R)); }) 4302 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ 4303 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4304 (__v8si)_mm256_setzero_si256(), \ 4305 (__mmask8)(U), (int)(R)); }) 4310 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4320 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4329 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4353 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4354 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4360 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4368 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4376 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4377 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4383 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4391 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4399 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4401 2+4, 18+4, 3+4, 19+4,
4402 2+8, 18+8, 3+8, 19+8,
4403 2+12, 18+12, 3+12, 19+12);
4409 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4417 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4425 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4427 0+4, 16+4, 1+4, 17+4,
4428 0+8, 16+8, 1+8, 17+8,
4429 0+12, 16+12, 1+12, 17+12);
4435 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4443 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4451 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4453 2+4, 18+4, 3+4, 19+4,
4454 2+8, 18+8, 3+8, 19+8,
4455 2+12, 18+12, 3+12, 19+12);
4461 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4469 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4477 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4479 0+4, 16+4, 1+4, 17+4,
4480 0+8, 16+8, 1+8, 17+8,
4481 0+12, 16+12, 1+12, 17+12);
4487 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4495 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4503 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4504 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4510 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4518 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4526 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4527 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4533 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4541 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4552 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *) __P,
4561 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *) __P,
4570 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)__P,
4579 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *) __P,
4587 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)__P,
4596 return (__m512) __builtin_ia32_loadups512_mask ((
const float *) __P,
4604 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)__P,
4613 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *) __P,
4621 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)__P,
4633 return ((
struct __loadu_pd*)__p)->__v;
4642 return ((
struct __loadu_ps*)__p)->__v;
4648 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__p,
4657 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *) __P,
4665 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__P,
4674 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__p,
4683 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *) __P,
4691 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__P,
4700 return *(__m512i *) __P;
4706 return *(__m512i *) __P;
4712 return *(__m512i *) __P;
4720 __builtin_ia32_storedqudi512_mask ((
long long *)__P, (__v8di) __A,
4727 __builtin_ia32_storedqusi512_mask ((
int *) __P, (__v16si) __A,
4734 __builtin_ia32_storedqusi512_mask ((
int *)__P, (__v16si) __A,
4741 __builtin_ia32_storeupd512_mask ((
double *)__P, (__v8df) __A, (__mmask8) __U);
4747 __builtin_ia32_storeupd512_mask((
double *)__P, (__v8df)__A, (__mmask8)-1);
4753 __builtin_ia32_storeups512_mask ((
float *)__P, (__v16sf) __A,
4760 __builtin_ia32_storeups512_mask((
float *)__P, (__v16sf)__A, (__mmask16)-1);
4766 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4772 *(__m512d*)__P = __A;
4778 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4785 *(__m512*)__P = __A;
4791 *(__m512i *) __P = __A;
4797 *(__m512i *) __P = __A;
4803 *(__m512i *) __P = __A;
4811 return __builtin_ia32_knothi(__M);
4816 #define _mm512_cmpeq_epi32_mask(A, B) \ 4817 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 4818 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ 4819 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 4820 #define _mm512_cmpge_epi32_mask(A, B) \ 4821 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 4822 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ 4823 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 4824 #define _mm512_cmpgt_epi32_mask(A, B) \ 4825 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 4826 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ 4827 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 4828 #define _mm512_cmple_epi32_mask(A, B) \ 4829 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 4830 #define _mm512_mask_cmple_epi32_mask(k, A, B) \ 4831 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 4832 #define _mm512_cmplt_epi32_mask(A, B) \ 4833 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 4834 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ 4835 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 4836 #define _mm512_cmpneq_epi32_mask(A, B) \ 4837 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 4838 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ 4839 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 4841 #define _mm512_cmpeq_epu32_mask(A, B) \ 4842 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 4843 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ 4844 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 4845 #define _mm512_cmpge_epu32_mask(A, B) \ 4846 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 4847 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ 4848 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 4849 #define _mm512_cmpgt_epu32_mask(A, B) \ 4850 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 4851 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ 4852 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 4853 #define _mm512_cmple_epu32_mask(A, B) \ 4854 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 4855 #define _mm512_mask_cmple_epu32_mask(k, A, B) \ 4856 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 4857 #define _mm512_cmplt_epu32_mask(A, B) \ 4858 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 4859 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ 4860 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 4861 #define _mm512_cmpneq_epu32_mask(A, B) \ 4862 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 4863 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ 4864 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 4866 #define _mm512_cmpeq_epi64_mask(A, B) \ 4867 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 4868 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ 4869 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 4870 #define _mm512_cmpge_epi64_mask(A, B) \ 4871 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 4872 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ 4873 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 4874 #define _mm512_cmpgt_epi64_mask(A, B) \ 4875 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 4876 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ 4877 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 4878 #define _mm512_cmple_epi64_mask(A, B) \ 4879 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 4880 #define _mm512_mask_cmple_epi64_mask(k, A, B) \ 4881 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 4882 #define _mm512_cmplt_epi64_mask(A, B) \ 4883 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 4884 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ 4885 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 4886 #define _mm512_cmpneq_epi64_mask(A, B) \ 4887 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 4888 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ 4889 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 4891 #define _mm512_cmpeq_epu64_mask(A, B) \ 4892 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 4893 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ 4894 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 4895 #define _mm512_cmpge_epu64_mask(A, B) \ 4896 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 4897 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ 4898 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 4899 #define _mm512_cmpgt_epu64_mask(A, B) \ 4900 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 4901 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ 4902 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 4903 #define _mm512_cmple_epu64_mask(A, B) \ 4904 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 4905 #define _mm512_mask_cmple_epu64_mask(k, A, B) \ 4906 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 4907 #define _mm512_cmplt_epu64_mask(A, B) \ 4908 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 4909 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ 4910 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 4911 #define _mm512_cmpneq_epu64_mask(A, B) \ 4912 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 4913 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ 4914 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 4921 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4927 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4935 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4945 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4951 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4959 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4967 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4973 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4981 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4989 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4995 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5003 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5011 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5017 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5025 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5033 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5039 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5047 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5055 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5061 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5069 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5077 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5083 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5091 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5099 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5105 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5113 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5121 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5127 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5135 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5143 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5153 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5162 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5172 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5182 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5191 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5200 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 5201 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5202 (__v16si)(__m512i)(b), (int)(p), \ 5205 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 5206 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5207 (__v16si)(__m512i)(b), (int)(p), \ 5210 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 5211 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5212 (__v8di)(__m512i)(b), (int)(p), \ 5215 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 5216 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5217 (__v8di)(__m512i)(b), (int)(p), \ 5220 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 5221 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5222 (__v16si)(__m512i)(b), (int)(p), \ 5225 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 5226 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5227 (__v16si)(__m512i)(b), (int)(p), \ 5230 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 5231 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5232 (__v8di)(__m512i)(b), (int)(p), \ 5235 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 5236 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5237 (__v8di)(__m512i)(b), (int)(p), \ 5240 #define _mm512_rol_epi32(a, b) __extension__ ({ \ 5241 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5242 (__v16si)_mm512_setzero_si512(), \ 5245 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ 5246 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5247 (__v16si)(__m512i)(W), \ 5250 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ 5251 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5252 (__v16si)_mm512_setzero_si512(), \ 5255 #define _mm512_rol_epi64(a, b) __extension__ ({ \ 5256 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5257 (__v8di)_mm512_setzero_si512(), \ 5260 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ 5261 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5262 (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5264 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ 5265 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5266 (__v8di)_mm512_setzero_si512(), \ 5271 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5281 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5290 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5300 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5310 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5319 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5326 #define _mm512_ror_epi32(A, B) __extension__ ({ \ 5327 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5328 (__v16si)_mm512_setzero_si512(), \ 5331 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5332 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5333 (__v16si)(__m512i)(W), \ 5336 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5337 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5338 (__v16si)_mm512_setzero_si512(), \ 5341 #define _mm512_ror_epi64(A, B) __extension__ ({ \ 5342 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5343 (__v8di)_mm512_setzero_si512(), \ 5346 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5347 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5348 (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5350 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5351 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5352 (__v8di)_mm512_setzero_si512(), \ 5358 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5364 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5371 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5379 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5385 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5393 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5401 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5407 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5414 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5422 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5428 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5436 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5444 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5452 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5461 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5468 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5476 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5484 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5492 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5500 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5508 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5517 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5524 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5525 0, 0, 2, 2, 4, 4, 6, 6);
5531 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5539 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5544 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ 5545 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5546 (__v8df)(__m512d)(B), \ 5547 (__v8di)(__m512i)(C), (int)(imm), \ 5548 (__mmask8)-1, (int)(R)); }) 5550 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ 5551 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5552 (__v8df)(__m512d)(B), \ 5553 (__v8di)(__m512i)(C), (int)(imm), \ 5554 (__mmask8)(U), (int)(R)); }) 5556 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5557 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5558 (__v8df)(__m512d)(B), \ 5559 (__v8di)(__m512i)(C), (int)(imm), \ 5561 _MM_FROUND_CUR_DIRECTION); }) 5563 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5564 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5565 (__v8df)(__m512d)(B), \ 5566 (__v8di)(__m512i)(C), (int)(imm), \ 5568 _MM_FROUND_CUR_DIRECTION); }) 5570 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ 5571 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5572 (__v8df)(__m512d)(B), \ 5573 (__v8di)(__m512i)(C), \ 5574 (int)(imm), (__mmask8)(U), \ 5577 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5578 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5579 (__v8df)(__m512d)(B), \ 5580 (__v8di)(__m512i)(C), \ 5581 (int)(imm), (__mmask8)(U), \ 5582 _MM_FROUND_CUR_DIRECTION); }) 5584 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ 5585 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5586 (__v16sf)(__m512)(B), \ 5587 (__v16si)(__m512i)(C), (int)(imm), \ 5588 (__mmask16)-1, (int)(R)); }) 5590 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ 5591 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5592 (__v16sf)(__m512)(B), \ 5593 (__v16si)(__m512i)(C), (int)(imm), \ 5594 (__mmask16)(U), (int)(R)); }) 5596 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5597 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5598 (__v16sf)(__m512)(B), \ 5599 (__v16si)(__m512i)(C), (int)(imm), \ 5601 _MM_FROUND_CUR_DIRECTION); }) 5603 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5604 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5605 (__v16sf)(__m512)(B), \ 5606 (__v16si)(__m512i)(C), (int)(imm), \ 5608 _MM_FROUND_CUR_DIRECTION); }) 5610 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ 5611 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5612 (__v16sf)(__m512)(B), \ 5613 (__v16si)(__m512i)(C), \ 5614 (int)(imm), (__mmask16)(U), \ 5617 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5618 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5619 (__v16sf)(__m512)(B), \ 5620 (__v16si)(__m512i)(C), \ 5621 (int)(imm), (__mmask16)(U), \ 5622 _MM_FROUND_CUR_DIRECTION); }) 5624 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ 5625 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5626 (__v2df)(__m128d)(B), \ 5627 (__v2di)(__m128i)(C), (int)(imm), \ 5628 (__mmask8)-1, (int)(R)); }) 5630 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ 5631 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5632 (__v2df)(__m128d)(B), \ 5633 (__v2di)(__m128i)(C), (int)(imm), \ 5634 (__mmask8)(U), (int)(R)); }) 5636 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ 5637 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5638 (__v2df)(__m128d)(B), \ 5639 (__v2di)(__m128i)(C), (int)(imm), \ 5641 _MM_FROUND_CUR_DIRECTION); }) 5643 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ 5644 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5645 (__v2df)(__m128d)(B), \ 5646 (__v2di)(__m128i)(C), (int)(imm), \ 5648 _MM_FROUND_CUR_DIRECTION); }) 5650 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ 5651 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5652 (__v2df)(__m128d)(B), \ 5653 (__v2di)(__m128i)(C), (int)(imm), \ 5654 (__mmask8)(U), (int)(R)); }) 5656 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ 5657 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5658 (__v2df)(__m128d)(B), \ 5659 (__v2di)(__m128i)(C), (int)(imm), \ 5661 _MM_FROUND_CUR_DIRECTION); }) 5663 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ 5664 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5665 (__v4sf)(__m128)(B), \ 5666 (__v4si)(__m128i)(C), (int)(imm), \ 5667 (__mmask8)-1, (int)(R)); }) 5669 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ 5670 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5671 (__v4sf)(__m128)(B), \ 5672 (__v4si)(__m128i)(C), (int)(imm), \ 5673 (__mmask8)(U), (int)(R)); }) 5675 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ 5676 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5677 (__v4sf)(__m128)(B), \ 5678 (__v4si)(__m128i)(C), (int)(imm), \ 5680 _MM_FROUND_CUR_DIRECTION); }) 5682 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ 5683 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5684 (__v4sf)(__m128)(B), \ 5685 (__v4si)(__m128i)(C), (int)(imm), \ 5687 _MM_FROUND_CUR_DIRECTION); }) 5689 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ 5690 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5691 (__v4sf)(__m128)(B), \ 5692 (__v4si)(__m128i)(C), (int)(imm), \ 5693 (__mmask8)(U), (int)(R)); }) 5695 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ 5696 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5697 (__v4sf)(__m128)(B), \ 5698 (__v4si)(__m128i)(C), (int)(imm), \ 5700 _MM_FROUND_CUR_DIRECTION); }) 5702 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ 5703 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5704 (__v2df)(__m128d)(B), \ 5705 (__v2df)_mm_setzero_pd(), \ 5706 (__mmask8)-1, (int)(R)); }) 5712 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5719 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5726 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ 5727 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5728 (__v2df)(__m128d)(B), \ 5729 (__v2df)(__m128d)(W), \ 5730 (__mmask8)(U), (int)(R)); }) 5735 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5742 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ 5743 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5744 (__v2df)(__m128d)(B), \ 5745 (__v2df)_mm_setzero_pd(), \ 5746 (__mmask8)(U), (int)(R)); }) 5748 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ 5749 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5750 (__v4sf)(__m128)(B), \ 5751 (__v4sf)_mm_setzero_ps(), \ 5752 (__mmask8)-1, (int)(R)); }) 5757 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5764 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5771 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ 5772 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5773 (__v4sf)(__m128)(B), \ 5774 (__v4sf)(__m128)(W), \ 5775 (__mmask8)(U), (int)(R)); }) 5780 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5787 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ 5788 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5789 (__v4sf)(__m128)(B), \ 5790 (__v4sf)_mm_setzero_ps(), \ 5791 (__mmask8)(U), (int)(R)); }) 5793 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ 5794 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5795 (__v2df)(__m128d)(B), \ 5796 (int)(((D)<<2) | (C)), \ 5797 (__v2df)_mm_setzero_pd(), \ 5798 (__mmask8)-1, (int)(R)); }) 5800 #define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ 5801 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5802 (__v2df)(__m128d)(B), \ 5803 (int)(((D)<<2) | (C)), \ 5804 (__v2df)_mm_setzero_pd(), \ 5806 _MM_FROUND_CUR_DIRECTION); }) 5808 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ 5809 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5810 (__v2df)(__m128d)(B), \ 5811 (int)(((D)<<2) | (C)), \ 5812 (__v2df)(__m128d)(W), \ 5814 _MM_FROUND_CUR_DIRECTION); }) 5816 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ 5817 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5818 (__v2df)(__m128d)(B), \ 5819 (int)(((D)<<2) | (C)), \ 5820 (__v2df)(__m128d)(W), \ 5821 (__mmask8)(U), (int)(R)); }) 5823 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ 5824 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5825 (__v2df)(__m128d)(B), \ 5826 (int)(((D)<<2) | (C)), \ 5827 (__v2df)_mm_setzero_pd(), \ 5829 _MM_FROUND_CUR_DIRECTION); }) 5831 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ 5832 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5833 (__v2df)(__m128d)(B), \ 5834 (int)(((D)<<2) | (C)), \ 5835 (__v2df)_mm_setzero_pd(), \ 5836 (__mmask8)(U), (int)(R)); }) 5838 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ 5839 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5840 (__v4sf)(__m128)(B), \ 5841 (int)(((D)<<2) | (C)), \ 5842 (__v4sf)_mm_setzero_ps(), \ 5843 (__mmask8)-1, (int)(R)); }) 5845 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ 5846 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5847 (__v4sf)(__m128)(B), \ 5848 (int)(((D)<<2) | (C)), \ 5849 (__v4sf)_mm_setzero_ps(), \ 5851 _MM_FROUND_CUR_DIRECTION); }) 5853 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ 5854 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5855 (__v4sf)(__m128)(B), \ 5856 (int)(((D)<<2) | (C)), \ 5857 (__v4sf)(__m128)(W), \ 5859 _MM_FROUND_CUR_DIRECTION); }) 5861 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ 5862 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5863 (__v4sf)(__m128)(B), \ 5864 (int)(((D)<<2) | (C)), \ 5865 (__v4sf)(__m128)(W), \ 5866 (__mmask8)(U), (int)(R)); }) 5868 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ 5869 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5870 (__v4sf)(__m128)(B), \ 5871 (int)(((D)<<2) | (C)), \ 5872 (__v4sf)_mm_setzero_pd(), \ 5874 _MM_FROUND_CUR_DIRECTION); }) 5876 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ 5877 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5878 (__v4sf)(__m128)(B), \ 5879 (int)(((D)<<2) | (C)), \ 5880 (__v4sf)_mm_setzero_ps(), \ 5881 (__mmask8)(U), (int)(R)); }) 5889 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ 5890 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5891 (int)(P), (int)(R)); }) 5893 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ 5894 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5895 (int)(P), (int)(R)); }) 5898 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ 5899 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 5904 __mmask16 __U, __m512i __B)
5906 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5916 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5922 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5930 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5938 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5944 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5952 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5960 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5966 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5974 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5982 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5988 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5996 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6004 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6010 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6018 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6026 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6032 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6040 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6048 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6054 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6062 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6070 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6076 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6084 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6092 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6098 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6106 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6114 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6120 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6128 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6136 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6142 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6150 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6158 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6164 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6172 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6177 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6178 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6179 (__v16si)(__m512i)(B), \ 6180 (__v16si)(__m512i)(C), (int)(imm), \ 6183 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6184 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6185 (__v16si)(__m512i)(B), \ 6186 (__v16si)(__m512i)(C), (int)(imm), \ 6189 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6190 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 6191 (__v16si)(__m512i)(B), \ 6192 (__v16si)(__m512i)(C), \ 6193 (int)(imm), (__mmask16)(U)); }) 6195 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6196 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6197 (__v8di)(__m512i)(B), \ 6198 (__v8di)(__m512i)(C), (int)(imm), \ 6201 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6202 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6203 (__v8di)(__m512i)(B), \ 6204 (__v8di)(__m512i)(C), (int)(imm), \ 6207 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6208 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 6209 (__v8di)(__m512i)(B), \ 6210 (__v8di)(__m512i)(C), (int)(imm), \ 6214 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ 6215 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6218 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ 6219 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6221 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ 6222 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6224 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ 6225 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6230 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6235 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ 6236 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6240 _mm_cvtsd_u64 (__m128d __A)
6242 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6248 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ 6249 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6251 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ 6252 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6255 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ 6256 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6258 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ 6259 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6262 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ 6263 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6268 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6273 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ 6274 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6278 _mm_cvtss_u64 (__m128 __A)
6280 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6286 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ 6287 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6289 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ 6290 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6295 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6300 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ 6301 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6303 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ 6304 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6307 _mm_cvttsd_i64 (__m128d __A)
6309 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6314 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ 6315 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6320 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6325 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ 6326 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6330 _mm_cvttsd_u64 (__m128d __A)
6332 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6338 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ 6339 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6341 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ 6342 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6347 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6352 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ 6353 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6355 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ 6356 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6359 _mm_cvttss_i64 (__m128 __A)
6361 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6366 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ 6367 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6372 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6377 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ 6378 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6382 _mm_cvttss_u64 (__m128 __A)
6384 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6394 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6405 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6414 __mmask8 __U, __m512i __B)
6416 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6423 #define _mm512_permute_pd(X, C) __extension__ ({ \ 6424 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 6425 (__v8df)_mm512_undefined_pd(), \ 6426 0 + (((C) >> 0) & 0x1), \ 6427 0 + (((C) >> 1) & 0x1), \ 6428 2 + (((C) >> 2) & 0x1), \ 6429 2 + (((C) >> 3) & 0x1), \ 6430 4 + (((C) >> 4) & 0x1), \ 6431 4 + (((C) >> 5) & 0x1), \ 6432 6 + (((C) >> 6) & 0x1), \ 6433 6 + (((C) >> 7) & 0x1)); }) 6435 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6436 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6437 (__v8df)_mm512_permute_pd((X), (C)), \ 6438 (__v8df)(__m512d)(W)); }) 6440 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ 6441 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6442 (__v8df)_mm512_permute_pd((X), (C)), \ 6443 (__v8df)_mm512_setzero_pd()); }) 6445 #define _mm512_permute_ps(X, C) __extension__ ({ \ 6446 (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ 6447 (__v16sf)_mm512_undefined_ps(), \ 6448 0 + (((C) >> 0) & 0x3), \ 6449 0 + (((C) >> 2) & 0x3), \ 6450 0 + (((C) >> 4) & 0x3), \ 6451 0 + (((C) >> 6) & 0x3), \ 6452 4 + (((C) >> 0) & 0x3), \ 6453 4 + (((C) >> 2) & 0x3), \ 6454 4 + (((C) >> 4) & 0x3), \ 6455 4 + (((C) >> 6) & 0x3), \ 6456 8 + (((C) >> 0) & 0x3), \ 6457 8 + (((C) >> 2) & 0x3), \ 6458 8 + (((C) >> 4) & 0x3), \ 6459 8 + (((C) >> 6) & 0x3), \ 6460 12 + (((C) >> 0) & 0x3), \ 6461 12 + (((C) >> 2) & 0x3), \ 6462 12 + (((C) >> 4) & 0x3), \ 6463 12 + (((C) >> 6) & 0x3)); }) 6465 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6466 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6467 (__v16sf)_mm512_permute_ps((X), (C)), \ 6468 (__v16sf)(__m512)(W)); }) 6470 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ 6471 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6472 (__v16sf)_mm512_permute_ps((X), (C)), \ 6473 (__v16sf)_mm512_setzero_ps()); }) 6478 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6484 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6492 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6500 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6506 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6514 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6522 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6532 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6543 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6553 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6563 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6574 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6582 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ 6583 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6584 (__v8si)_mm256_undefined_si256(), \ 6585 (__mmask8)-1, (int)(R)); }) 6587 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 6588 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6589 (__v8si)(__m256i)(W), \ 6590 (__mmask8)(U), (int)(R)); }) 6592 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ 6593 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6594 (__v8si)_mm256_setzero_si256(), \ 6595 (__mmask8)(U), (int)(R)); }) 6600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6610 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6619 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6626 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ 6627 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6628 (__v2df)(__m128d)(B), \ 6629 (__v2df)_mm_setzero_pd(), \ 6630 (__mmask8)-1, (int)(imm), \ 6633 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ 6634 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6635 (__v2df)(__m128d)(B), \ 6636 (__v2df)_mm_setzero_pd(), \ 6637 (__mmask8)-1, (int)(imm), \ 6638 _MM_FROUND_CUR_DIRECTION); }) 6640 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ 6641 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6642 (__v2df)(__m128d)(B), \ 6643 (__v2df)(__m128d)(W), \ 6644 (__mmask8)(U), (int)(imm), \ 6645 _MM_FROUND_CUR_DIRECTION); }) 6647 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ 6648 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6649 (__v2df)(__m128d)(B), \ 6650 (__v2df)(__m128d)(W), \ 6651 (__mmask8)(U), (int)(I), \ 6654 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ 6655 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6656 (__v2df)(__m128d)(B), \ 6657 (__v2df)_mm_setzero_pd(), \ 6658 (__mmask8)(U), (int)(I), \ 6659 _MM_FROUND_CUR_DIRECTION); }) 6661 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ 6662 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6663 (__v2df)(__m128d)(B), \ 6664 (__v2df)_mm_setzero_pd(), \ 6665 (__mmask8)(U), (int)(I), \ 6668 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ 6669 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6670 (__v4sf)(__m128)(B), \ 6671 (__v4sf)_mm_setzero_ps(), \ 6672 (__mmask8)-1, (int)(imm), \ 6675 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ 6676 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6677 (__v4sf)(__m128)(B), \ 6678 (__v4sf)_mm_setzero_ps(), \ 6679 (__mmask8)-1, (int)(imm), \ 6680 _MM_FROUND_CUR_DIRECTION); }) 6682 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ 6683 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6684 (__v4sf)(__m128)(B), \ 6685 (__v4sf)(__m128)(W), \ 6686 (__mmask8)(U), (int)(I), \ 6687 _MM_FROUND_CUR_DIRECTION); }) 6689 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ 6690 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6691 (__v4sf)(__m128)(B), \ 6692 (__v4sf)(__m128)(W), \ 6693 (__mmask8)(U), (int)(I), \ 6696 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ 6697 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6698 (__v4sf)(__m128)(B), \ 6699 (__v4sf)_mm_setzero_ps(), \ 6700 (__mmask8)(U), (int)(I), \ 6701 _MM_FROUND_CUR_DIRECTION); }) 6703 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ 6704 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6705 (__v4sf)(__m128)(B), \ 6706 (__v4sf)_mm_setzero_ps(), \ 6707 (__mmask8)(U), (int)(I), \ 6710 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ 6711 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6712 (__v8df)(__m512d)(B), \ 6713 (__v8df)_mm512_undefined_pd(), \ 6714 (__mmask8)-1, (int)(R)); }) 6716 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ 6717 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6718 (__v8df)(__m512d)(B), \ 6719 (__v8df)(__m512d)(W), \ 6720 (__mmask8)(U), (int)(R)); }) 6722 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ 6723 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6724 (__v8df)(__m512d)(B), \ 6725 (__v8df)_mm512_setzero_pd(), \ 6726 (__mmask8)(U), (int)(R)); }) 6731 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6742 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6752 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6760 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ 6761 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6762 (__v16sf)(__m512)(B), \ 6763 (__v16sf)_mm512_undefined_ps(), \ 6764 (__mmask16)-1, (int)(R)); }) 6766 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ 6767 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6768 (__v16sf)(__m512)(B), \ 6769 (__v16sf)(__m512)(W), \ 6770 (__mmask16)(U), (int)(R)); }) 6772 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ 6773 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6774 (__v16sf)(__m512)(B), \ 6775 (__v16sf)_mm512_setzero_ps(), \ 6776 (__mmask16)(U), (int)(R)); }) 6781 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6792 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6802 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6810 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ 6811 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6812 (__v2df)(__m128d)(B), \ 6813 (__v2df)_mm_setzero_pd(), \ 6814 (__mmask8)-1, (int)(R)); }) 6819 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6828 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6835 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ 6836 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6837 (__v2df)(__m128d)(B), \ 6838 (__v2df)(__m128d)(W), \ 6839 (__mmask8)(U), (int)(R)); }) 6844 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6851 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ 6852 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6853 (__v2df)(__m128d)(B), \ 6854 (__v2df)_mm_setzero_pd(), \ 6855 (__mmask8)(U), (int)(R)); }) 6857 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ 6858 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6859 (__v4sf)(__m128)(B), \ 6860 (__v4sf)_mm_setzero_ps(), \ 6861 (__mmask8)-1, (int)(R)); }) 6866 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6875 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6882 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ 6883 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6884 (__v4sf)(__m128)(B), \ 6885 (__v4sf)(__m128)(W), \ 6886 (__mmask8)(U), (int)(R)); }) 6891 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6898 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ 6899 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6900 (__v4sf)(__m128)(B), \ 6901 (__v4sf)_mm_setzero_ps(), \ 6903 _MM_FROUND_CUR_DIRECTION); }) 6908 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6914 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6921 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6929 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6935 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6943 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6948 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ 6949 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 6950 (__v16sf)(__m512)(B), \ 6951 0 + ((((imm) >> 0) & 0x3) * 4), \ 6952 1 + ((((imm) >> 0) & 0x3) * 4), \ 6953 2 + ((((imm) >> 0) & 0x3) * 4), \ 6954 3 + ((((imm) >> 0) & 0x3) * 4), \ 6955 0 + ((((imm) >> 2) & 0x3) * 4), \ 6956 1 + ((((imm) >> 2) & 0x3) * 4), \ 6957 2 + ((((imm) >> 2) & 0x3) * 4), \ 6958 3 + ((((imm) >> 2) & 0x3) * 4), \ 6959 16 + ((((imm) >> 4) & 0x3) * 4), \ 6960 17 + ((((imm) >> 4) & 0x3) * 4), \ 6961 18 + ((((imm) >> 4) & 0x3) * 4), \ 6962 19 + ((((imm) >> 4) & 0x3) * 4), \ 6963 16 + ((((imm) >> 6) & 0x3) * 4), \ 6964 17 + ((((imm) >> 6) & 0x3) * 4), \ 6965 18 + ((((imm) >> 6) & 0x3) * 4), \ 6966 19 + ((((imm) >> 6) & 0x3) * 4)); }) 6968 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 6969 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6970 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6971 (__v16sf)(__m512)(W)); }) 6973 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 6974 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6975 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6976 (__v16sf)_mm512_setzero_ps()); }) 6978 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ 6979 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 6980 (__v8df)(__m512d)(B), \ 6981 0 + ((((imm) >> 0) & 0x3) * 2), \ 6982 1 + ((((imm) >> 0) & 0x3) * 2), \ 6983 0 + ((((imm) >> 2) & 0x3) * 2), \ 6984 1 + ((((imm) >> 2) & 0x3) * 2), \ 6985 8 + ((((imm) >> 4) & 0x3) * 2), \ 6986 9 + ((((imm) >> 4) & 0x3) * 2), \ 6987 8 + ((((imm) >> 6) & 0x3) * 2), \ 6988 9 + ((((imm) >> 6) & 0x3) * 2)); }) 6990 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 6991 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6992 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6993 (__v8df)(__m512d)(W)); }) 6995 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 6996 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6997 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6998 (__v8df)_mm512_setzero_pd()); }) 7000 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7001 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7002 (__v8di)(__m512i)(B), \ 7003 0 + ((((imm) >> 0) & 0x3) * 2), \ 7004 1 + ((((imm) >> 0) & 0x3) * 2), \ 7005 0 + ((((imm) >> 2) & 0x3) * 2), \ 7006 1 + ((((imm) >> 2) & 0x3) * 2), \ 7007 8 + ((((imm) >> 4) & 0x3) * 2), \ 7008 9 + ((((imm) >> 4) & 0x3) * 2), \ 7009 8 + ((((imm) >> 6) & 0x3) * 2), \ 7010 9 + ((((imm) >> 6) & 0x3) * 2)); }) 7012 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7013 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7014 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 7015 (__v16si)(__m512i)(W)); }) 7017 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7018 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7019 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 7020 (__v16si)_mm512_setzero_si512()); }) 7022 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7023 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7024 (__v8di)(__m512i)(B), \ 7025 0 + ((((imm) >> 0) & 0x3) * 2), \ 7026 1 + ((((imm) >> 0) & 0x3) * 2), \ 7027 0 + ((((imm) >> 2) & 0x3) * 2), \ 7028 1 + ((((imm) >> 2) & 0x3) * 2), \ 7029 8 + ((((imm) >> 4) & 0x3) * 2), \ 7030 9 + ((((imm) >> 4) & 0x3) * 2), \ 7031 8 + ((((imm) >> 6) & 0x3) * 2), \ 7032 9 + ((((imm) >> 6) & 0x3) * 2)); }) 7034 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7035 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7036 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 7037 (__v8di)(__m512i)(W)); }) 7039 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7040 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7041 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 7042 (__v8di)_mm512_setzero_si512()); }) 7044 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ 7045 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7046 (__v8df)(__m512d)(B), \ 7047 0 + (((M) >> 0) & 0x1), \ 7048 8 + (((M) >> 1) & 0x1), \ 7049 2 + (((M) >> 2) & 0x1), \ 7050 10 + (((M) >> 3) & 0x1), \ 7051 4 + (((M) >> 4) & 0x1), \ 7052 12 + (((M) >> 5) & 0x1), \ 7053 6 + (((M) >> 6) & 0x1), \ 7054 14 + (((M) >> 7) & 0x1)); }) 7056 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7057 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7058 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7059 (__v8df)(__m512d)(W)); }) 7061 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7062 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7063 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 7064 (__v8df)_mm512_setzero_pd()); }) 7066 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \ 7067 (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7068 (__v16sf)(__m512)(B), \ 7069 0 + (((M) >> 0) & 0x3), \ 7070 0 + (((M) >> 2) & 0x3), \ 7071 16 + (((M) >> 4) & 0x3), \ 7072 16 + (((M) >> 6) & 0x3), \ 7073 4 + (((M) >> 0) & 0x3), \ 7074 4 + (((M) >> 2) & 0x3), \ 7075 20 + (((M) >> 4) & 0x3), \ 7076 20 + (((M) >> 6) & 0x3), \ 7077 8 + (((M) >> 0) & 0x3), \ 7078 8 + (((M) >> 2) & 0x3), \ 7079 24 + (((M) >> 4) & 0x3), \ 7080 24 + (((M) >> 6) & 0x3), \ 7081 12 + (((M) >> 0) & 0x3), \ 7082 12 + (((M) >> 2) & 0x3), \ 7083 28 + (((M) >> 4) & 0x3), \ 7084 28 + (((M) >> 6) & 0x3)); }) 7086 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7087 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7088 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7089 (__v16sf)(__m512)(W)); }) 7091 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7092 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7093 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 7094 (__v16sf)_mm512_setzero_ps()); }) 7096 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ 7097 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7098 (__v2df)(__m128d)(B), \ 7099 (__v2df)_mm_setzero_pd(), \ 7100 (__mmask8)-1, (int)(R)); }) 7105 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7112 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ 7113 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7114 (__v2df)(__m128d)(B), \ 7115 (__v2df)(__m128d)(W), \ 7116 (__mmask8)(U), (int)(R)); }) 7121 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7128 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ 7129 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 7130 (__v2df)(__m128d)(B), \ 7131 (__v2df)_mm_setzero_pd(), \ 7132 (__mmask8)(U), (int)(R)); }) 7134 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ 7135 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7136 (__v4sf)(__m128)(B), \ 7137 (__v4sf)_mm_setzero_ps(), \ 7138 (__mmask8)-1, (int)(R)); }) 7143 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7150 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ 7151 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7152 (__v4sf)(__m128)(B), \ 7153 (__v4sf)(__m128)(W), (__mmask8)(U), \ 7159 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7166 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ 7167 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 7168 (__v4sf)(__m128)(B), \ 7169 (__v4sf)_mm_setzero_ps(), \ 7170 (__mmask8)(U), (int)(R)); }) 7175 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7176 0, 1, 2, 3, 0, 1, 2, 3,
7177 0, 1, 2, 3, 0, 1, 2, 3);
7183 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7191 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7199 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7200 0, 1, 2, 3, 0, 1, 2, 3);
7206 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7214 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7222 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7223 0, 1, 2, 3, 0, 1, 2, 3,
7224 0, 1, 2, 3, 0, 1, 2, 3);
7230 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7238 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7246 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7247 0, 1, 2, 3, 0, 1, 2, 3);
7253 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7261 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7269 return (__m512d)__builtin_ia32_selectpd_512(__M,
7277 return (__m512d)__builtin_ia32_selectpd_512(__M,
7285 return (__m512)__builtin_ia32_selectps_512(__M,
7293 return (__m512)__builtin_ia32_selectps_512(__M,
7301 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7309 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7310 (__v16qi) __O, __M);
7316 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7324 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7330 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7338 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7339 (__v16hi) __O, __M);
7345 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7353 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7359 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7367 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7368 (__v16qi) __O, __M);
7374 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7382 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7388 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7396 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7403 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7411 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7417 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7425 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7432 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7440 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7446 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7454 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7462 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7470 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7476 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7484 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7492 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7500 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7506 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7514 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7522 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7530 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7536 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7544 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7551 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7559 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7565 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7573 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7580 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7588 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7594 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7602 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7603 (__v16qi) __O, __M);
7609 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7617 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7623 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7631 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7632 (__v16hi) __O, __M);
7638 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7646 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7652 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7660 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7661 (__v16qi) __O, __M);
7667 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7675 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7681 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7689 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7696 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7704 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7710 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7718 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7725 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7733 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7736 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ 7737 (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7738 (__v16si)_mm512_undefined_epi32(), \ 7739 0 + ((imm) & 0x3) * 4, \ 7740 1 + ((imm) & 0x3) * 4, \ 7741 2 + ((imm) & 0x3) * 4, \ 7742 3 + ((imm) & 0x3) * 4); }) 7744 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 7745 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7746 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7749 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 7750 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7751 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ 7752 (__v4si)_mm_setzero_si128()); }) 7754 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ 7755 (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7756 (__v8di)_mm512_undefined_epi32(), \ 7757 ((imm) & 1) ? 4 : 0, \ 7758 ((imm) & 1) ? 5 : 1, \ 7759 ((imm) & 1) ? 6 : 2, \ 7760 ((imm) & 1) ? 7 : 3); }) 7762 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ 7763 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 7764 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7767 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ 7768 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 7769 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ 7770 (__v4di)_mm256_setzero_si256()); }) 7772 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ 7773 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 7774 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ 7775 ((imm) & 0x1) ? 0 : 8, \ 7776 ((imm) & 0x1) ? 1 : 9, \ 7777 ((imm) & 0x1) ? 2 : 10, \ 7778 ((imm) & 0x1) ? 3 : 11, \ 7779 ((imm) & 0x1) ? 8 : 4, \ 7780 ((imm) & 0x1) ? 9 : 5, \ 7781 ((imm) & 0x1) ? 10 : 6, \ 7782 ((imm) & 0x1) ? 11 : 7); }) 7784 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ 7785 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7786 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7789 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ 7790 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7791 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7792 (__v8df)_mm512_setzero_pd()); }) 7794 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ 7795 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 7796 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ 7797 ((imm) & 0x1) ? 0 : 8, \ 7798 ((imm) & 0x1) ? 1 : 9, \ 7799 ((imm) & 0x1) ? 2 : 10, \ 7800 ((imm) & 0x1) ? 3 : 11, \ 7801 ((imm) & 0x1) ? 8 : 4, \ 7802 ((imm) & 0x1) ? 9 : 5, \ 7803 ((imm) & 0x1) ? 10 : 6, \ 7804 ((imm) & 0x1) ? 11 : 7); }) 7806 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ 7807 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7808 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7811 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ 7812 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7813 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7814 (__v8di)_mm512_setzero_si512()); }) 7816 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ 7817 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 7818 (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ 7819 (((imm) & 0x3) == 0) ? 16 : 0, \ 7820 (((imm) & 0x3) == 0) ? 17 : 1, \ 7821 (((imm) & 0x3) == 0) ? 18 : 2, \ 7822 (((imm) & 0x3) == 0) ? 19 : 3, \ 7823 (((imm) & 0x3) == 1) ? 16 : 4, \ 7824 (((imm) & 0x3) == 1) ? 17 : 5, \ 7825 (((imm) & 0x3) == 1) ? 18 : 6, \ 7826 (((imm) & 0x3) == 1) ? 19 : 7, \ 7827 (((imm) & 0x3) == 2) ? 16 : 8, \ 7828 (((imm) & 0x3) == 2) ? 17 : 9, \ 7829 (((imm) & 0x3) == 2) ? 18 : 10, \ 7830 (((imm) & 0x3) == 2) ? 19 : 11, \ 7831 (((imm) & 0x3) == 3) ? 16 : 12, \ 7832 (((imm) & 0x3) == 3) ? 17 : 13, \ 7833 (((imm) & 0x3) == 3) ? 18 : 14, \ 7834 (((imm) & 0x3) == 3) ? 19 : 15); }) 7836 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 7837 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7838 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7841 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 7842 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7843 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7844 (__v16sf)_mm512_setzero_ps()); }) 7846 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ 7847 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 7848 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ 7849 (((imm) & 0x3) == 0) ? 16 : 0, \ 7850 (((imm) & 0x3) == 0) ? 17 : 1, \ 7851 (((imm) & 0x3) == 0) ? 18 : 2, \ 7852 (((imm) & 0x3) == 0) ? 19 : 3, \ 7853 (((imm) & 0x3) == 1) ? 16 : 4, \ 7854 (((imm) & 0x3) == 1) ? 17 : 5, \ 7855 (((imm) & 0x3) == 1) ? 18 : 6, \ 7856 (((imm) & 0x3) == 1) ? 19 : 7, \ 7857 (((imm) & 0x3) == 2) ? 16 : 8, \ 7858 (((imm) & 0x3) == 2) ? 17 : 9, \ 7859 (((imm) & 0x3) == 2) ? 18 : 10, \ 7860 (((imm) & 0x3) == 2) ? 19 : 11, \ 7861 (((imm) & 0x3) == 3) ? 16 : 12, \ 7862 (((imm) & 0x3) == 3) ? 17 : 13, \ 7863 (((imm) & 0x3) == 3) ? 18 : 14, \ 7864 (((imm) & 0x3) == 3) ? 19 : 15); }) 7866 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 7867 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7868 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7871 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 7872 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7873 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7874 (__v16si)_mm512_setzero_si512()); }) 7876 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ 7877 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7878 (int)(((C)<<2) | (B)), \ 7879 (__v8df)_mm512_undefined_pd(), \ 7880 (__mmask8)-1, (int)(R)); }) 7882 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ 7883 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7884 (int)(((C)<<2) | (B)), \ 7885 (__v8df)(__m512d)(W), \ 7886 (__mmask8)(U), (int)(R)); }) 7888 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ 7889 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7890 (int)(((C)<<2) | (B)), \ 7891 (__v8df)_mm512_setzero_pd(), \ 7892 (__mmask8)(U), (int)(R)); }) 7894 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \ 7895 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7896 (int)(((C)<<2) | (B)), \ 7897 (__v8df)_mm512_setzero_pd(), \ 7899 _MM_FROUND_CUR_DIRECTION); }) 7901 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 7902 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7903 (int)(((C)<<2) | (B)), \ 7904 (__v8df)(__m512d)(W), \ 7906 _MM_FROUND_CUR_DIRECTION); }) 7908 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 7909 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7910 (int)(((C)<<2) | (B)), \ 7911 (__v8df)_mm512_setzero_pd(), \ 7913 _MM_FROUND_CUR_DIRECTION); }) 7915 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ 7916 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7917 (int)(((C)<<2) | (B)), \ 7918 (__v16sf)_mm512_undefined_ps(), \ 7919 (__mmask16)-1, (int)(R)); }) 7921 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ 7922 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7923 (int)(((C)<<2) | (B)), \ 7924 (__v16sf)(__m512)(W), \ 7925 (__mmask16)(U), (int)(R)); }) 7927 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ 7928 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7929 (int)(((C)<<2) | (B)), \ 7930 (__v16sf)_mm512_setzero_ps(), \ 7931 (__mmask16)(U), (int)(R)); }) 7933 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \ 7934 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7935 (int)(((C)<<2)|(B)), \ 7936 (__v16sf)_mm512_undefined_ps(), \ 7938 _MM_FROUND_CUR_DIRECTION); }) 7940 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 7941 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7942 (int)(((C)<<2)|(B)), \ 7943 (__v16sf)(__m512)(W), \ 7945 _MM_FROUND_CUR_DIRECTION); }) 7947 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 7948 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7949 (int)(((C)<<2)|(B)), \ 7950 (__v16sf)_mm512_setzero_ps(), \ 7952 _MM_FROUND_CUR_DIRECTION); }) 7954 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \ 7955 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7956 (__v8df)_mm512_undefined_pd(), \ 7957 (__mmask8)-1, (int)(R)); }) 7959 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ 7960 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7961 (__v8df)(__m512d)(W), \ 7962 (__mmask8)(U), (int)(R)); }) 7964 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ 7965 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7966 (__v8df)_mm512_setzero_pd(), \ 7967 (__mmask8)(U), (int)(R)); }) 7972 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7981 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7990 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7996 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \ 7997 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7998 (__v16sf)_mm512_undefined_ps(), \ 7999 (__mmask16)-1, (int)(R)); }) 8001 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ 8002 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8003 (__v16sf)(__m512)(W), \ 8004 (__mmask16)(U), (int)(R)); }) 8006 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ 8007 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8008 (__v16sf)_mm512_setzero_ps(), \ 8009 (__mmask16)(U), (int)(R)); }) 8014 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8023 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8032 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8038 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ 8039 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 8040 (float const *)(addr), \ 8041 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8044 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\ 8045 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 8046 (float const *)(addr), \ 8047 (__v8di)(__m512i)(index), \ 8048 (__mmask8)(mask), (int)(scale)); }) 8050 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ 8051 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ 8052 (int const *)(addr), \ 8053 (__v8di)(__m512i)(index), \ 8054 (__mmask8)-1, (int)(scale)); }) 8056 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8057 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 8058 (int const *)(addr), \ 8059 (__v8di)(__m512i)(index), \ 8060 (__mmask8)(mask), (int)(scale)); }) 8062 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ 8063 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 8064 (double const *)(addr), \ 8065 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8068 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8069 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 8070 (double const *)(addr), \ 8071 (__v8di)(__m512i)(index), \ 8072 (__mmask8)(mask), (int)(scale)); }) 8074 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ 8075 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ 8076 (long long const *)(addr), \ 8077 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8080 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8081 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 8082 (long long const *)(addr), \ 8083 (__v8di)(__m512i)(index), \ 8084 (__mmask8)(mask), (int)(scale)); }) 8086 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ 8087 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 8088 (float const *)(addr), \ 8089 (__v16sf)(__m512)(index), \ 8090 (__mmask16)-1, (int)(scale)); }) 8092 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8093 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 8094 (float const *)(addr), \ 8095 (__v16sf)(__m512)(index), \ 8096 (__mmask16)(mask), (int)(scale)); }) 8098 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ 8099 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 8100 (int const *)(addr), \ 8101 (__v16si)(__m512i)(index), \ 8102 (__mmask16)-1, (int)(scale)); }) 8104 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8105 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 8106 (int const *)(addr), \ 8107 (__v16si)(__m512i)(index), \ 8108 (__mmask16)(mask), (int)(scale)); }) 8110 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ 8111 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 8112 (double const *)(addr), \ 8113 (__v8si)(__m256i)(index), (__mmask8)-1, \ 8116 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8117 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 8118 (double const *)(addr), \ 8119 (__v8si)(__m256i)(index), \ 8120 (__mmask8)(mask), (int)(scale)); }) 8122 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ 8123 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 8124 (long long const *)(addr), \ 8125 (__v8si)(__m256i)(index), (__mmask8)-1, \ 8128 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8129 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 8130 (long long const *)(addr), \ 8131 (__v8si)(__m256i)(index), \ 8132 (__mmask8)(mask), (int)(scale)); }) 8134 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ 8135 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ 8136 (__v8di)(__m512i)(index), \ 8137 (__v8sf)(__m256)(v1), (int)(scale)); }) 8139 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8140 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ 8141 (__v8di)(__m512i)(index), \ 8142 (__v8sf)(__m256)(v1), (int)(scale)); }) 8144 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8145 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ 8146 (__v8di)(__m512i)(index), \ 8147 (__v8si)(__m256i)(v1), (int)(scale)); }) 8149 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8150 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ 8151 (__v8di)(__m512i)(index), \ 8152 (__v8si)(__m256i)(v1), (int)(scale)); }) 8154 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ 8155 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ 8156 (__v8di)(__m512i)(index), \ 8157 (__v8df)(__m512d)(v1), (int)(scale)); }) 8159 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8160 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ 8161 (__v8di)(__m512i)(index), \ 8162 (__v8df)(__m512d)(v1), (int)(scale)); }) 8164 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8165 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ 8166 (__v8di)(__m512i)(index), \ 8167 (__v8di)(__m512i)(v1), (int)(scale)); }) 8169 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8170 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ 8171 (__v8di)(__m512i)(index), \ 8172 (__v8di)(__m512i)(v1), (int)(scale)); }) 8174 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ 8175 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ 8176 (__v16si)(__m512i)(index), \ 8177 (__v16sf)(__m512)(v1), (int)(scale)); }) 8179 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8180 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ 8181 (__v16si)(__m512i)(index), \ 8182 (__v16sf)(__m512)(v1), (int)(scale)); }) 8184 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8185 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ 8186 (__v16si)(__m512i)(index), \ 8187 (__v16si)(__m512i)(v1), (int)(scale)); }) 8189 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8190 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ 8191 (__v16si)(__m512i)(index), \ 8192 (__v16si)(__m512i)(v1), (int)(scale)); }) 8194 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ 8195 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ 8196 (__v8si)(__m256i)(index), \ 8197 (__v8df)(__m512d)(v1), (int)(scale)); }) 8199 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8200 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ 8201 (__v8si)(__m256i)(index), \ 8202 (__v8df)(__m512d)(v1), (int)(scale)); }) 8204 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8205 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ 8206 (__v8si)(__m256i)(index), \ 8207 (__v8di)(__m512i)(v1), (int)(scale)); }) 8209 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8210 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ 8211 (__v8si)(__m256i)(index), \ 8212 (__v8di)(__m512i)(v1), (int)(scale)); }) 8217 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8224 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ 8225 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8226 (__v4sf)(__m128)(A), \ 8227 (__v4sf)(__m128)(B), (__mmask8)(U), \ 8233 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8240 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8241 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8242 (__v4sf)(__m128)(B), \ 8243 (__v4sf)(__m128)(C), (__mmask8)(U), \ 8244 _MM_FROUND_CUR_DIRECTION); }) 8249 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8256 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ 8257 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 8258 (__v4sf)(__m128)(X), \ 8259 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8265 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8272 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8273 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8274 (__v4sf)(__m128)(A), \ 8275 (__v4sf)(__m128)(B), (__mmask8)(U), \ 8281 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8288 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8289 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8290 (__v4sf)(__m128)(B), \ 8291 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8297 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8304 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ 8305 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 8306 (__v4sf)(__m128)(X), \ 8307 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8313 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8320 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ 8321 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8322 -(__v4sf)(__m128)(A), \ 8323 (__v4sf)(__m128)(B), (__mmask8)(U), \ 8329 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8336 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8337 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8338 (__v4sf)(__m128)(B), \ 8339 (__v4sf)(__m128)(C), (__mmask8)(U), \ 8345 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8352 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ 8353 (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ 8354 (__v4sf)(__m128)(X), \ 8355 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8361 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8368 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8369 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 8370 -(__v4sf)(__m128)(A), \ 8371 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 8377 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8384 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8385 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8386 (__v4sf)(__m128)(B), \ 8387 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8388 _MM_FROUND_CUR_DIRECTION); }) 8393 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8400 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ 8401 (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \ 8402 (__v4sf)(__m128)(X), \ 8403 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8409 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8416 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ 8417 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8418 (__v2df)(__m128d)(A), \ 8419 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8425 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8432 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8433 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8434 (__v2df)(__m128d)(B), \ 8435 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8436 _MM_FROUND_CUR_DIRECTION); }) 8441 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8448 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ 8449 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8450 (__v2df)(__m128d)(X), \ 8451 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8457 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8464 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8465 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8466 (__v2df)(__m128d)(A), \ 8467 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8473 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8480 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8481 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8482 (__v2df)(__m128d)(B), \ 8483 -(__v2df)(__m128d)(C), \ 8484 (__mmask8)(U), (int)(R)); }) 8489 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8496 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ 8497 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8498 (__v2df)(__m128d)(X), \ 8499 (__v2df)(__m128d)(Y), \ 8500 (__mmask8)(U), (int)(R)); }) 8505 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8512 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ 8513 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8514 -(__v2df)(__m128d)(A), \ 8515 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8521 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8528 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8529 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8530 (__v2df)(__m128d)(B), \ 8531 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8537 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8544 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ 8545 (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ 8546 (__v2df)(__m128d)(X), \ 8547 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8553 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8560 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8561 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8562 -(__v2df)(__m128d)(A), \ 8563 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8569 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8576 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8577 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8578 (__v2df)(__m128d)(B), \ 8579 -(__v2df)(__m128d)(C), \ 8581 _MM_FROUND_CUR_DIRECTION); }) 8586 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8593 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ 8594 (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \ 8595 (__v2df)(__m128d)(X), \ 8596 (__v2df)(__m128d)(Y), \ 8597 (__mmask8)(U), (int)(R)); }) 8599 #define _mm512_permutex_pd(X, C) __extension__ ({ \ 8600 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ 8601 (__v8df)_mm512_undefined_pd(), \ 8602 0 + (((C) >> 0) & 0x3), \ 8603 0 + (((C) >> 2) & 0x3), \ 8604 0 + (((C) >> 4) & 0x3), \ 8605 0 + (((C) >> 6) & 0x3), \ 8606 4 + (((C) >> 0) & 0x3), \ 8607 4 + (((C) >> 2) & 0x3), \ 8608 4 + (((C) >> 4) & 0x3), \ 8609 4 + (((C) >> 6) & 0x3)); }) 8611 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8612 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8613 (__v8df)_mm512_permutex_pd((X), (C)), \ 8614 (__v8df)(__m512d)(W)); }) 8616 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8617 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8618 (__v8df)_mm512_permutex_pd((X), (C)), \ 8619 (__v8df)_mm512_setzero_pd()); }) 8621 #define _mm512_permutex_epi64(X, C) __extension__ ({ \ 8622 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ 8623 (__v8di)_mm512_undefined_epi32(), \ 8624 0 + (((C) >> 0) & 0x3), \ 8625 0 + (((C) >> 2) & 0x3), \ 8626 0 + (((C) >> 4) & 0x3), \ 8627 0 + (((C) >> 6) & 0x3), \ 8628 4 + (((C) >> 0) & 0x3), \ 8629 4 + (((C) >> 2) & 0x3), \ 8630 4 + (((C) >> 4) & 0x3), \ 8631 4 + (((C) >> 6) & 0x3)); }) 8633 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8634 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8635 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8636 (__v8di)(__m512i)(W)); }) 8638 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8639 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8640 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8641 (__v8di)_mm512_setzero_si512()); }) 8646 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8655 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8664 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8673 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8682 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8692 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8701 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8710 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8719 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8728 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8737 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8743 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8749 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8755 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8760 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8766 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8772 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8778 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8784 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8790 return (__mmask16) (( __A & 0xFF) | ( __B << 8));
8796 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8802 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8809 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8816 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)__P);
8823 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8829 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8830 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8836 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8844 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8853 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8861 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8870 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8878 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8887 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8895 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8901 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ 8902 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8903 (__v4sf)(__m128)(Y), (int)(P), \ 8904 (__mmask8)-1, (int)(R)); }) 8906 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ 8907 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8908 (__v4sf)(__m128)(Y), (int)(P), \ 8909 (__mmask8)(M), (int)(R)); }) 8911 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ 8912 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8913 (__v4sf)(__m128)(Y), (int)(P), \ 8915 _MM_FROUND_CUR_DIRECTION); }) 8917 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ 8918 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8919 (__v4sf)(__m128)(Y), (int)(P), \ 8921 _MM_FROUND_CUR_DIRECTION); }) 8923 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ 8924 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8925 (__v2df)(__m128d)(Y), (int)(P), \ 8926 (__mmask8)-1, (int)(R)); }) 8928 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ 8929 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8930 (__v2df)(__m128d)(Y), (int)(P), \ 8931 (__mmask8)(M), (int)(R)); }) 8933 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ 8934 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8935 (__v2df)(__m128d)(Y), (int)(P), \ 8937 _MM_FROUND_CUR_DIRECTION); }) 8939 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ 8940 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8941 (__v2df)(__m128d)(Y), (int)(P), \ 8943 _MM_FROUND_CUR_DIRECTION); }) 9006 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9007 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9013 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9021 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9029 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9030 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9036 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9044 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9053 res[0] = (__U & 1) ? __B[0] : __W[0];
9061 res[0] = (__U & 1) ? __B[0] : 0;
9069 res[0] = (__U & 1) ? __B[0] : __W[0];
9077 res[0] = (__U & 1) ? __B[0] : 0;
9084 __builtin_ia32_storess128_mask ((__v16sf *)__W,
9086 (__mmask16) __U & (__mmask16)1);
9092 __builtin_ia32_storesd128_mask ((__v8df *)__W,
9094 (__mmask8) __U & 1);
9100 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9101 (__v4sf) {0.0, 0.0, 0.0, 0.0},
9104 return (__m128) __builtin_shufflevector(
9105 __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9107 (__mmask16) __U & 1),
9114 return (__m128) __builtin_shufflevector(
9115 __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9117 (__mmask16) __U & 1),
9124 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9125 (__v2df) {0.0, 0.0}, 0, 2);
9127 return (__m128d) __builtin_shufflevector(
9128 __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9130 (__mmask8) __U & 1),
9137 return (__m128d) __builtin_shufflevector(
9138 __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9140 (__mmask8) __U & 1),
9144 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ 9145 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 9146 (__v16si)_mm512_undefined_epi32(), \ 9147 0 + (((I) >> 0) & 0x3), \ 9148 0 + (((I) >> 2) & 0x3), \ 9149 0 + (((I) >> 4) & 0x3), \ 9150 0 + (((I) >> 6) & 0x3), \ 9151 4 + (((I) >> 0) & 0x3), \ 9152 4 + (((I) >> 2) & 0x3), \ 9153 4 + (((I) >> 4) & 0x3), \ 9154 4 + (((I) >> 6) & 0x3), \ 9155 8 + (((I) >> 0) & 0x3), \ 9156 8 + (((I) >> 2) & 0x3), \ 9157 8 + (((I) >> 4) & 0x3), \ 9158 8 + (((I) >> 6) & 0x3), \ 9159 12 + (((I) >> 0) & 0x3), \ 9160 12 + (((I) >> 2) & 0x3), \ 9161 12 + (((I) >> 4) & 0x3), \ 9162 12 + (((I) >> 6) & 0x3)); }) 9164 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ 9165 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9166 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9167 (__v16si)(__m512i)(W)); }) 9169 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ 9170 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 9171 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 9172 (__v16si)_mm512_setzero_si512()); }) 9177 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9185 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9193 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9201 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9209 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
9217 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
9225 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
9233 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
9241 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
9249 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
9257 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
9265 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
9273 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9281 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9289 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9297 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9302 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ 9303 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9304 (__v8df)_mm512_undefined_pd(), \ 9305 (__mmask8)-1, (int)(R)); }) 9307 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ 9308 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9309 (__v8df)(__m512d)(W), \ 9310 (__mmask8)(U), (int)(R)); }) 9312 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ 9313 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9314 (__v8df)_mm512_setzero_pd(), \ 9315 (__mmask8)(U), (int)(R)); }) 9320 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9330 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9339 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9361 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9369 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9377 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9385 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9393 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9400 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9407 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9414 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9418 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ 9419 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9420 (__v2df)(__m128d)(B), \ 9421 (__v4sf)_mm_undefined_ps(), \ 9422 (__mmask8)-1, (int)(R)); }) 9424 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ 9425 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9426 (__v2df)(__m128d)(B), \ 9427 (__v4sf)(__m128)(W), \ 9428 (__mmask8)(U), (int)(R)); }) 9430 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ 9431 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9432 (__v2df)(__m128d)(B), \ 9433 (__v4sf)_mm_setzero_ps(), \ 9434 (__mmask8)(U), (int)(R)); }) 9439 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9448 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9454 #define _mm_cvtss_i32 _mm_cvtss_si32 9455 #define _mm_cvtsd_i32 _mm_cvtsd_si32 9456 #define _mm_cvti32_sd _mm_cvtsi32_sd 9457 #define _mm_cvti32_ss _mm_cvtsi32_ss 9459 #define _mm_cvtss_i64 _mm_cvtss_si64 9460 #define _mm_cvtsd_i64 _mm_cvtsd_si64 9461 #define _mm_cvti64_sd _mm_cvtsi64_sd 9462 #define _mm_cvti64_ss _mm_cvtsi64_ss 9466 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ 9467 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9470 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ 9471 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9475 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ 9476 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9478 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ 9479 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9482 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ 9483 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9486 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ 9487 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9491 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ 9492 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9493 (__v4sf)(__m128)(B), \ 9494 (__v2df)_mm_undefined_pd(), \ 9495 (__mmask8)-1, (int)(R)); }) 9497 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ 9498 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9499 (__v4sf)(__m128)(B), \ 9500 (__v2df)(__m128d)(W), \ 9501 (__mmask8)(U), (int)(R)); }) 9503 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ 9504 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9505 (__v4sf)(__m128)(B), \ 9506 (__v2df)_mm_setzero_pd(), \ 9507 (__mmask8)(U), (int)(R)); }) 9512 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9521 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9530 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9534 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ 9535 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9536 (unsigned long long)(B), (int)(R)); }) 9539 _mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9541 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9546 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ 9547 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9553 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9558 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ 9559 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9560 (unsigned long long)(B), (int)(R)); }) 9563 _mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9565 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9573 return (__m512i) __builtin_ia32_selectd_512(__M,
9580 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M,
long long __A)
9582 return (__m512i) __builtin_ia32_selectq_512(__M,
9590 char __e58,
char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
9591 char __e52,
char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
9592 char __e46,
char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
9593 char __e40,
char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
9594 char __e34,
char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
9595 char __e28,
char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
9596 char __e22,
char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
9597 char __e16,
char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
9598 char __e10,
char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
9599 char __e4,
char __e3,
char __e2,
char __e1,
char __e0) {
9601 return __extension__ (__m512i)(__v64qi)
9602 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9603 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9604 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9605 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9606 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9607 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9608 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9609 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9614 short __e27,
short __e26,
short __e25,
short __e24,
short __e23,
9615 short __e22,
short __e21,
short __e20,
short __e19,
short __e18,
9616 short __e17,
short __e16,
short __e15,
short __e14,
short __e13,
9617 short __e12,
short __e11,
short __e10,
short __e9,
short __e8,
9618 short __e7,
short __e6,
short __e5,
short __e4,
short __e3,
9619 short __e2,
short __e1,
short __e0) {
9620 return __extension__ (__m512i)(__v32hi)
9621 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9622 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9623 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9624 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9629 int __E,
int __F,
int __G,
int __H,
9630 int __I,
int __J,
int __K,
int __L,
9631 int __M,
int __N,
int __O,
int __P)
9633 return __extension__ (__m512i)(__v16si)
9634 { __P, __O, __N, __M, __L, __K, __J, __I,
9635 __H, __G, __F, __E, __D, __C, __B, __A };
9638 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9639 e8,e9,e10,e11,e12,e13,e14,e15) \ 9640 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9641 (e5),(e4),(e3),(e2),(e1),(e0)) 9645 long long __D,
long long __E,
long long __F,
9646 long long __G,
long long __H)
9648 return __extension__ (__m512i) (__v8di)
9649 { __H, __G, __F, __E, __D, __C, __B, __A };
9652 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9653 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9657 double __E,
double __F,
double __G,
double __H)
9659 return __extension__ (__m512d)
9660 { __H, __G, __F, __E, __D, __C, __B, __A };
9663 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9664 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9668 float __E,
float __F,
float __G,
float __H,
9669 float __I,
float __J,
float __K,
float __L,
9670 float __M,
float __N,
float __O,
float __P)
9672 return __extension__ (__m512)
9673 { __P, __O, __N, __M, __L, __K, __J, __I,
9674 __H, __G, __F, __E, __D, __C, __B, __A };
9677 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9678 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9679 (e4),(e3),(e2),(e1),(e0)) 9719 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ 9721 __m256##T1 Vec256 = __builtin_shufflevector( \ 9722 (__v8d##T2)Vec512, \ 9723 (__v8d##T2)Vec512, \ 9726 __builtin_shufflevector( \ 9727 (__v8d##T2)Vec512, \ 9728 (__v8d##T2)Vec512, \ 9730 __m128##T1 Vec128 = __builtin_shufflevector( \ 9731 (__v4d##T2)Vec256, \ 9732 (__v4d##T2)Vec256, \ 9735 __builtin_shufflevector( \ 9736 (__v4d##T2)Vec256, \ 9737 (__v4d##T2)Vec256, \ 9739 Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \ 9740 (__v2d##T2)Vec128, 0, -1) \ 9742 __builtin_shufflevector((__v2d##T2)Vec128, \ 9743 (__v2d##T2)Vec128, 1, -1); \ 9780 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \ 9783 Vec512 = __builtin_ia32_select##T3##_512( \ 9785 (__v8d##T2)Vec512, \ 9786 (__v8d##T2)Vec512Neutral); \ 9787 _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \ 9829 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \ 9830 __m256##T1 Vec256 = \ 9831 (__m256##T1)(__builtin_shufflevector( \ 9832 (__v16s##T2)Vec512, \ 9833 (__v16s##T2)Vec512, \ 9834 0, 1, 2, 3, 4, 5, 6, 7) \ 9836 __builtin_shufflevector( \ 9837 (__v16s##T2)Vec512, \ 9838 (__v16s##T2)Vec512, \ 9839 8, 9, 10, 11, 12, 13, 14, 15)); \ 9840 __m128##T1 Vec128 = \ 9841 (__m128##T1)(__builtin_shufflevector( \ 9842 (__v8s##T2)Vec256, \ 9843 (__v8s##T2)Vec256, \ 9846 __builtin_shufflevector( \ 9847 (__v8s##T2)Vec256, \ 9848 (__v8s##T2)Vec256, \ 9850 Vec128 = (__m128##T1)(__builtin_shufflevector( \ 9851 (__v4s##T2)Vec128, \ 9852 (__v4s##T2)Vec128, \ 9855 __builtin_shufflevector( \ 9856 (__v4s##T2)Vec128, \ 9857 (__v4s##T2)Vec128, \ 9859 Vec128 = (__m128##T1)(__builtin_shufflevector( \ 9860 (__v4s##T2)Vec128, \ 9861 (__v4s##T2)Vec128, \ 9864 __builtin_shufflevector( \ 9865 (__v4s##T2)Vec128, \ 9866 (__v4s##T2)Vec128, \ 9910 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \ 9913 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 9915 (__v16s##T2)Vec512, \ 9916 (__v16s##T2)Vec512Neutral); \ 9917 _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \ 9962 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \ 9963 Vec512 = _mm512_##IntrinName( \ 9964 (__m512##T1)__builtin_shufflevector( \ 9965 (__v8d##T2)Vec512, \ 9966 (__v8d##T2)Vec512, \ 9967 0, 1, 2, 3, -1, -1, -1, -1), \ 9968 (__m512##T1)__builtin_shufflevector( \ 9969 (__v8d##T2)Vec512, \ 9970 (__v8d##T2)Vec512, \ 9971 4, 5, 6, 7, -1, -1, -1, -1)); \ 9972 Vec512 = _mm512_##IntrinName( \ 9973 (__m512##T1)__builtin_shufflevector( \ 9974 (__v8d##T2)Vec512, \ 9975 (__v8d##T2)Vec512, \ 9976 0, 1, -1, -1, -1, -1, -1, -1),\ 9977 (__m512##T1)__builtin_shufflevector( \ 9978 (__v8d##T2)Vec512, \ 9979 (__v8d##T2)Vec512, \ 9980 2, 3, -1, -1, -1, -1, -1, \ 9982 Vec512 = _mm512_##IntrinName( \ 9983 (__m512##T1)__builtin_shufflevector( \ 9984 (__v8d##T2)Vec512, \ 9985 (__v8d##T2)Vec512, \ 9986 0, -1, -1, -1, -1, -1, -1, -1),\ 9987 (__m512##T1)__builtin_shufflevector( \ 9988 (__v8d##T2)Vec512, \ 9989 (__v8d##T2)Vec512, \ 9990 1, -1, -1, -1, -1, -1, -1, -1))\ 10042 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10045 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10047 (__v8d##T2)Vec512, \ 10048 (__v8d##T2)Vec512Neutral); \ 10049 _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \ 10055 max_epi64, i, i, q, __M);
10061 max_epu64, i, i, q, __M);
10067 max_pd, d, f, pd, __M);
10073 min_epi64, i, i, q, __M);
10079 min_epu64, i, i, q, __M);
10085 min_pd, d, f, pd, __M);
10094 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \ 10095 Vec512 = _mm512_##IntrinName( \ 10096 (__m512##T1)__builtin_shufflevector( \ 10097 (__v16s##T2)Vec512, \ 10098 (__v16s##T2)Vec512, \ 10099 0, 1, 2, 3, 4, 5, 6, 7, \ 10100 -1, -1, -1, -1, -1, -1, -1, -1), \ 10101 (__m512##T1)__builtin_shufflevector( \ 10102 (__v16s##T2)Vec512, \ 10103 (__v16s##T2)Vec512, \ 10104 8, 9, 10, 11, 12, 13, 14, 15, \ 10105 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10106 Vec512 = _mm512_##IntrinName( \ 10107 (__m512##T1)__builtin_shufflevector( \ 10108 (__v16s##T2)Vec512, \ 10109 (__v16s##T2)Vec512, \ 10110 0, 1, 2, 3, -1, -1, -1, -1, \ 10111 -1, -1, -1, -1, -1, -1, -1, -1), \ 10112 (__m512##T1)__builtin_shufflevector( \ 10113 (__v16s##T2)Vec512, \ 10114 (__v16s##T2)Vec512, \ 10115 4, 5, 6, 7, -1, -1, -1, -1, \ 10116 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10117 Vec512 = _mm512_##IntrinName( \ 10118 (__m512##T1)__builtin_shufflevector( \ 10119 (__v16s##T2)Vec512, \ 10120 (__v16s##T2)Vec512, \ 10121 0, 1, -1, -1, -1, -1, -1, -1, \ 10122 -1, -1, -1, -1, -1, -1, -1, -1), \ 10123 (__m512##T1)__builtin_shufflevector( \ 10124 (__v16s##T2)Vec512, \ 10125 (__v16s##T2)Vec512, \ 10126 2, 3, -1, -1, -1, -1, -1, -1, \ 10127 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10128 Vec512 = _mm512_##IntrinName( \ 10129 (__m512##T1)__builtin_shufflevector( \ 10130 (__v16s##T2)Vec512, \ 10131 (__v16s##T2)Vec512, \ 10132 0, -1, -1, -1, -1, -1, -1, -1, \ 10133 -1, -1, -1, -1, -1, -1, -1, -1), \ 10134 (__m512##T1)__builtin_shufflevector( \ 10135 (__v16s##T2)Vec512, \ 10136 (__v16s##T2)Vec512, \ 10137 1, -1, -1, -1, -1, -1, -1, -1, \ 10138 -1, -1, -1, -1, -1, -1, -1, -1)); \ 10139 return Vec512[0]; \ 10185 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \ 10188 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \ 10190 (__v16s##T2)Vec512, \ 10191 (__v16s##T2)Vec512Neutral); \ 10192 _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \ 10231 #undef __DEFAULT_FN_ATTRS 10233 #endif // __AVX512FINTRIN_H static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_cvtph_ps(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu8_epi64(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i64x4(__m256i __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtepu32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_floor_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m128 __DEFAULT_FN_ATTRS _mm512_castps512_ps128(__m512 __a)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_si512(__m512i *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srai_epi64(__m512i __A, int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline __m256 __DEFAULT_FN_ATTRS _mm512_castps512_ps256(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi8(char __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_ps(float *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_castps_si512(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm512_castsi512_si256(__m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_cvtss_f32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_slli_epi64(__m512i __A, int __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_setzero_ps(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi32(int __s)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_undefined_ps(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_add_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_movehdup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_sqrt_pd(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_abs_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtps_pd(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_stream_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_set1_pd(double __w)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_slli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_max_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_load_ps(void const *__p)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_i32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_loadu_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi16_epi64(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_undefined_epi32(void)
#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, T2, T3, Mask)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_reduce_max_epu32(__m512i a)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvttss_u32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_moveldup_ps(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_abs_epi64(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, Mask, T2, T1, T3)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_set1_ps(float __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_sqrt_ps(__m512 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi16(short __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ vector float vector float __b
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastq_epi64(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_ceil_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi8_epi32(__m128i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rsqrt14_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castsi512_ps(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtusepi32_epi8(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_castps256_ps512(__m256 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srai_epi32(__m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu16_epi64(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_load_pd(void const *__p)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W)
static __inline __m128d __DEFAULT_FN_ATTRS _mm512_castpd512_pd128(__m512d __a)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castpd256_pd512(__m256d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_load_epi32(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_castpd_si512(__m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi32_pd(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srli_epi64(__m512i __A, int __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_reduce_max_epu64(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srli_epi32(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_movedup_pd(__m512d __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_reduce_min_epu32(__m512i a)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm512_castpd512_pd256(__m512d __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_getexp_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, T2, T3, Mask)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_load_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_pd(void *__P, __m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi64(long long __d)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_floor_ps(__m512 __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_loadu_si512(void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_add_ps(__m512 __W)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_castps_pd(__m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_setzero_si512(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_sd(__m128d __A, __m128d __B)
#define _mm512_setzero_epi32
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_undefined_pd(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_castps128_ps512(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_abs_ps(__m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_pd(double *__P, __m512d __A)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_ceil_ps(__m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_rcp14_ps(__m512 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, Mask, T2, T1, T3)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_undefined(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS _mm_cvtss_u32(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_i32(__m128d __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_abs_epi32(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline void __DEFAULT_FN_ATTRS _mm512_store_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)