25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 28 #ifndef __AVX512DQINTRIN_H 29 #define __AVX512DQINTRIN_H 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) 36 return (__m512i) ((__v8du) __A * (__v8du) __B);
41 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
48 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
55 return (__m512d)((__v8du)__A ^ (__v8du)__B);
60 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
67 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
74 return (__m512)((__v16su)__A ^ (__v16su)__B);
79 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
86 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
93 return (__m512d)((__v8du)__A | (__v8du)__B);
98 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
105 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
112 return (__m512)((__v16su)__A | (__v16su)__B);
117 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
124 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
131 return (__m512d)((__v8du)__A & (__v8du)__B);
136 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
143 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
150 return (__m512)((__v16su)__A & (__v16su)__B);
155 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
162 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
169 return (__m512d)(~(__v8du)__A & (__v8du)__B);
174 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
181 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
188 return (__m512)(~(__v16su)__A & (__v16su)__B);
193 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
200 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
207 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
215 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
223 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
229 #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \ 230 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 231 (__v8di)_mm512_setzero_si512(), \ 232 (__mmask8)-1, (int)(R)); }) 234 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 235 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 236 (__v8di)(__m512i)(W), \ 237 (__mmask8)(U), (int)(R)); }) 239 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \ 240 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 241 (__v8di)_mm512_setzero_si512(), \ 242 (__mmask8)(U), (int)(R)); }) 246 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
254 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
262 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
268 #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \ 269 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 270 (__v8di)_mm512_setzero_si512(), \ 271 (__mmask8)-1, (int)(R)); }) 273 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 274 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 275 (__v8di)(__m512i)(W), \ 276 (__mmask8)(U), (int)(R)); }) 278 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \ 279 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 280 (__v8di)_mm512_setzero_si512(), \ 281 (__mmask8)(U), (int)(R)); }) 285 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
293 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
301 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
307 #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \ 308 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 309 (__v8di)_mm512_setzero_si512(), \ 310 (__mmask8)-1, (int)(R)); }) 312 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \ 313 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 314 (__v8di)(__m512i)(W), \ 315 (__mmask8)(U), (int)(R)); }) 317 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \ 318 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 319 (__v8di)_mm512_setzero_si512(), \ 320 (__mmask8)(U), (int)(R)); }) 324 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
332 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
340 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
346 #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \ 347 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 348 (__v8di)_mm512_setzero_si512(), \ 349 (__mmask8)-1, (int)(R)); }) 351 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \ 352 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 353 (__v8di)(__m512i)(W), \ 354 (__mmask8)(U), (int)(R)); }) 356 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \ 357 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 358 (__v8di)_mm512_setzero_si512(), \ 359 (__mmask8)(U), (int)(R)); }) 364 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
372 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
380 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
386 #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \ 387 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 388 (__v8df)_mm512_setzero_pd(), \ 389 (__mmask8)-1, (int)(R)); }) 391 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \ 392 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 393 (__v8df)(__m512d)(W), \ 394 (__mmask8)(U), (int)(R)); }) 396 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \ 397 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 398 (__v8df)_mm512_setzero_pd(), \ 399 (__mmask8)(U), (int)(R)); }) 403 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
411 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
419 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
425 #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \ 426 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 427 (__v8sf)_mm256_setzero_ps(), \ 428 (__mmask8)-1, (int)(R)); }) 430 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \ 431 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 432 (__v8sf)(__m256)(W), (__mmask8)(U), \ 435 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \ 436 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 437 (__v8sf)_mm256_setzero_ps(), \ 438 (__mmask8)(U), (int)(R)); }) 443 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
451 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
459 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
465 #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \ 466 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 467 (__v8di)_mm512_setzero_si512(), \ 468 (__mmask8)-1, (int)(R)); }) 470 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 471 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 472 (__v8di)(__m512i)(W), \ 473 (__mmask8)(U), (int)(R)); }) 475 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \ 476 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 477 (__v8di)_mm512_setzero_si512(), \ 478 (__mmask8)(U), (int)(R)); }) 482 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
490 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
498 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
504 #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \ 505 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 506 (__v8di)_mm512_setzero_si512(), \ 507 (__mmask8)-1, (int)(R)); }) 509 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 510 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 511 (__v8di)(__m512i)(W), \ 512 (__mmask8)(U), (int)(R)); }) 514 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \ 515 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 516 (__v8di)_mm512_setzero_si512(), \ 517 (__mmask8)(U), (int)(R)); }) 521 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
529 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
537 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
543 #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \ 544 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 545 (__v8di)_mm512_setzero_si512(), \ 546 (__mmask8)-1, (int)(R)); }) 548 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \ 549 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 550 (__v8di)(__m512i)(W), \ 551 (__mmask8)(U), (int)(R)); }) 553 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \ 554 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 555 (__v8di)_mm512_setzero_si512(), \ 556 (__mmask8)(U), (int)(R)); }) 560 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
568 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
582 #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \ 583 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 584 (__v8di)_mm512_setzero_si512(), \ 585 (__mmask8)-1, (int)(R)); }) 587 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \ 588 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 589 (__v8di)(__m512i)(W), \ 590 (__mmask8)(U), (int)(R)); }) 592 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \ 593 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 594 (__v8di)_mm512_setzero_si512(), \ 595 (__mmask8)(U), (int)(R)); }) 599 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
607 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
615 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
621 #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \ 622 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 623 (__v8df)_mm512_setzero_pd(), \ 624 (__mmask8)-1, (int)(R)); }) 626 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \ 627 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 628 (__v8df)(__m512d)(W), \ 629 (__mmask8)(U), (int)(R)); }) 632 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \ 633 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 634 (__v8df)_mm512_setzero_pd(), \ 635 (__mmask8)(U), (int)(R)); }) 640 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
648 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
656 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
662 #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \ 663 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 664 (__v8sf)_mm256_setzero_ps(), \ 665 (__mmask8)-1, (int)(R)); }) 667 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \ 668 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 669 (__v8sf)(__m256)(W), (__mmask8)(U), \ 672 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \ 673 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 674 (__v8sf)_mm256_setzero_ps(), \ 675 (__mmask8)(U), (int)(R)); }) 677 #define _mm512_range_pd(A, B, C) __extension__ ({ \ 678 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 679 (__v8df)(__m512d)(B), (int)(C), \ 680 (__v8df)_mm512_setzero_pd(), \ 682 _MM_FROUND_CUR_DIRECTION); }) 684 #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 685 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 686 (__v8df)(__m512d)(B), (int)(C), \ 687 (__v8df)(__m512d)(W), (__mmask8)(U), \ 688 _MM_FROUND_CUR_DIRECTION); }) 690 #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \ 691 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 692 (__v8df)(__m512d)(B), (int)(C), \ 693 (__v8df)_mm512_setzero_pd(), \ 695 _MM_FROUND_CUR_DIRECTION); }) 697 #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \ 698 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 699 (__v8df)(__m512d)(B), (int)(C), \ 700 (__v8df)_mm512_setzero_pd(), \ 701 (__mmask8)-1, (int)(R)); }) 703 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \ 704 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 705 (__v8df)(__m512d)(B), (int)(C), \ 706 (__v8df)(__m512d)(W), (__mmask8)(U), \ 709 #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \ 710 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 711 (__v8df)(__m512d)(B), (int)(C), \ 712 (__v8df)_mm512_setzero_pd(), \ 713 (__mmask8)(U), (int)(R)); }) 715 #define _mm512_range_ps(A, B, C) __extension__ ({ \ 716 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 717 (__v16sf)(__m512)(B), (int)(C), \ 718 (__v16sf)_mm512_setzero_ps(), \ 720 _MM_FROUND_CUR_DIRECTION); }) 722 #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 723 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 724 (__v16sf)(__m512)(B), (int)(C), \ 725 (__v16sf)(__m512)(W), (__mmask16)(U), \ 726 _MM_FROUND_CUR_DIRECTION); }) 728 #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \ 729 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 730 (__v16sf)(__m512)(B), (int)(C), \ 731 (__v16sf)_mm512_setzero_ps(), \ 733 _MM_FROUND_CUR_DIRECTION); }) 735 #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \ 736 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 737 (__v16sf)(__m512)(B), (int)(C), \ 738 (__v16sf)_mm512_setzero_ps(), \ 739 (__mmask16)-1, (int)(R)); }) 741 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \ 742 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 743 (__v16sf)(__m512)(B), (int)(C), \ 744 (__v16sf)(__m512)(W), (__mmask16)(U), \ 747 #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \ 748 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 749 (__v16sf)(__m512)(B), (int)(C), \ 750 (__v16sf)_mm512_setzero_ps(), \ 751 (__mmask16)(U), (int)(R)); }) 753 #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ 754 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 755 (__v4sf)(__m128)(B), \ 756 (__v4sf)_mm_setzero_ps(), \ 757 (__mmask8) -1, (int)(C),\ 760 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 762 #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ 763 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 764 (__v4sf)(__m128)(B), \ 765 (__v4sf)(__m128)(W),\ 766 (__mmask8)(U), (int)(C),\ 769 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 771 #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ 772 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 773 (__v4sf)(__m128)(B), \ 774 (__v4sf)_mm_setzero_ps(), \ 775 (__mmask8)(U), (int)(C),\ 778 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 780 #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ 781 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 782 (__v2df)(__m128d)(B), \ 783 (__v2df)_mm_setzero_pd(), \ 784 (__mmask8) -1, (int)(C),\ 787 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 789 #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ 790 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 791 (__v2df)(__m128d)(B), \ 792 (__v2df)(__m128d)(W),\ 793 (__mmask8)(U), (int)(C),\ 796 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 798 #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ 799 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 800 (__v2df)(__m128d)(B), \ 801 (__v2df)_mm_setzero_pd(), \ 802 (__mmask8)(U), (int)(C),\ 805 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 807 #define _mm512_reduce_pd(A, B) __extension__ ({ \ 808 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 809 (__v8df)_mm512_setzero_pd(), \ 811 _MM_FROUND_CUR_DIRECTION); }) 813 #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 814 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 815 (__v8df)(__m512d)(W), \ 817 _MM_FROUND_CUR_DIRECTION); }) 819 #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \ 820 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 821 (__v8df)_mm512_setzero_pd(), \ 823 _MM_FROUND_CUR_DIRECTION); }) 825 #define _mm512_reduce_ps(A, B) __extension__ ({ \ 826 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 827 (__v16sf)_mm512_setzero_ps(), \ 829 _MM_FROUND_CUR_DIRECTION); }) 831 #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 832 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 833 (__v16sf)(__m512)(W), \ 835 _MM_FROUND_CUR_DIRECTION); }) 837 #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \ 838 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 839 (__v16sf)_mm512_setzero_ps(), \ 841 _MM_FROUND_CUR_DIRECTION); }) 843 #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\ 844 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 845 (__v8df)_mm512_setzero_pd(), \ 846 (__mmask8)-1, (int)(R)); }) 848 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\ 849 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 850 (__v8df)(__m512d)(W), \ 851 (__mmask8)(U), (int)(R)); }) 853 #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\ 854 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 855 (__v8df)_mm512_setzero_pd(), \ 856 (__mmask8)(U), (int)(R)); }) 858 #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\ 859 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 860 (__v16sf)_mm512_setzero_ps(), \ 861 (__mmask16)-1, (int)(R)); }) 863 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\ 864 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 865 (__v16sf)(__m512)(W), \ 866 (__mmask16)(U), (int)(R)); }) 868 #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\ 869 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 870 (__v16sf)_mm512_setzero_ps(), \ 871 (__mmask16)(U), (int)(R)); }) 873 #define _mm_reduce_ss(A, B, C) __extension__ ({ \ 874 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 875 (__v4sf)(__m128)(B), \ 876 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 877 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 879 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ 880 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 881 (__v4sf)(__m128)(B), \ 882 (__v4sf)(__m128)(W), (__mmask8)(U), \ 883 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 885 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ 886 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 887 (__v4sf)(__m128)(B), \ 888 (__v4sf)_mm_setzero_ps(), \ 889 (__mmask8)(U), (int)(C), \ 890 _MM_FROUND_CUR_DIRECTION); }) 892 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ 893 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 894 (__v4sf)(__m128)(B), \ 895 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 896 (int)(C), (int)(R)); }) 898 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ 899 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 900 (__v4sf)(__m128)(B), \ 901 (__v4sf)(__m128)(W), (__mmask8)(U), \ 902 (int)(C), (int)(R)); }) 904 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ 905 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 906 (__v4sf)(__m128)(B), \ 907 (__v4sf)_mm_setzero_ps(), \ 908 (__mmask8)(U), (int)(C), (int)(R)); }) 910 #define _mm_reduce_sd(A, B, C) __extension__ ({ \ 911 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 912 (__v2df)(__m128d)(B), \ 913 (__v2df)_mm_setzero_pd(), \ 914 (__mmask8)-1, (int)(C), \ 915 _MM_FROUND_CUR_DIRECTION); }) 917 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ 918 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 919 (__v2df)(__m128d)(B), \ 920 (__v2df)(__m128d)(W), (__mmask8)(U), \ 921 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 923 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \ 924 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 925 (__v2df)(__m128d)(B), \ 926 (__v2df)_mm_setzero_pd(), \ 927 (__mmask8)(U), (int)(C), \ 928 _MM_FROUND_CUR_DIRECTION); }) 930 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \ 931 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 932 (__v2df)(__m128d)(B), \ 933 (__v2df)_mm_setzero_pd(), \ 934 (__mmask8)-1, (int)(C), (int)(R)); }) 936 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \ 937 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 938 (__v2df)(__m128d)(B), \ 939 (__v2df)(__m128d)(W), (__mmask8)(U), \ 940 (int)(C), (int)(R)); }) 942 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \ 943 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 944 (__v2df)(__m128d)(B), \ 945 (__v2df)_mm_setzero_pd(), \ 946 (__mmask8)(U), (int)(C), (int)(R)); }) 951 return (
__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
957 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
963 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
969 return (
__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
976 return (__m512)__builtin_shufflevector((__v4sf)__A,
978 0, 1, 0, 1, 0, 1, 0, 1,
979 0, 1, 0, 1, 0, 1, 0, 1);
985 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
993 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
1001 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1002 0, 1, 2, 3, 4, 5, 6, 7,
1003 0, 1, 2, 3, 4, 5, 6, 7);
1009 return (__m512)__builtin_ia32_selectps_512((
__mmask8)__M,
1017 return (__m512)__builtin_ia32_selectps_512((
__mmask8)__M,
1025 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1026 0, 1, 0, 1, 0, 1, 0, 1);
1032 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
1040 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
1048 return (__m512i)__builtin_shufflevector((__v4si)__A,
1050 0, 1, 0, 1, 0, 1, 0, 1,
1051 0, 1, 0, 1, 0, 1, 0, 1);
1057 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1065 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1073 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1074 0, 1, 2, 3, 4, 5, 6, 7,
1075 0, 1, 2, 3, 4, 5, 6, 7);
1081 return (__m512i)__builtin_ia32_selectd_512((
__mmask8)__M,
1089 return (__m512i)__builtin_ia32_selectd_512((
__mmask8)__M,
1097 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1098 0, 1, 0, 1, 0, 1, 0, 1);
1104 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1112 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1117 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ 1118 (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ 1119 (__v16sf)_mm512_undefined_ps(), \ 1120 ((imm) & 1) ? 8 : 0, \ 1121 ((imm) & 1) ? 9 : 1, \ 1122 ((imm) & 1) ? 10 : 2, \ 1123 ((imm) & 1) ? 11 : 3, \ 1124 ((imm) & 1) ? 12 : 4, \ 1125 ((imm) & 1) ? 13 : 5, \ 1126 ((imm) & 1) ? 14 : 6, \ 1127 ((imm) & 1) ? 15 : 7); }) 1129 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ 1130 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 1131 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ 1134 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ 1135 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 1136 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ 1137 (__v8sf)_mm256_setzero_ps()); }) 1139 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ 1140 (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 1141 (__v8df)_mm512_undefined_pd(), \ 1142 0 + ((imm) & 0x3) * 2, \ 1143 1 + ((imm) & 0x3) * 2); }) 1145 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1146 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1147 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ 1150 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1151 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1152 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ 1153 (__v2df)_mm_setzero_pd()); }) 1155 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ 1156 (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 1157 (__v16si)_mm512_undefined_epi32(), \ 1158 ((imm) & 1) ? 8 : 0, \ 1159 ((imm) & 1) ? 9 : 1, \ 1160 ((imm) & 1) ? 10 : 2, \ 1161 ((imm) & 1) ? 11 : 3, \ 1162 ((imm) & 1) ? 12 : 4, \ 1163 ((imm) & 1) ? 13 : 5, \ 1164 ((imm) & 1) ? 14 : 6, \ 1165 ((imm) & 1) ? 15 : 7); }) 1167 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ 1168 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 1169 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ 1172 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ 1173 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 1174 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ 1175 (__v8si)_mm256_setzero_si256()); }) 1177 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ 1178 (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 1179 (__v8di)_mm512_undefined_epi32(), \ 1180 0 + ((imm) & 0x3) * 2, \ 1181 1 + ((imm) & 0x3) * 2); }) 1183 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1184 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ 1185 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ 1188 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1189 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ 1190 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ 1191 (__v2di)_mm_setzero_di()); }) 1193 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ 1194 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 1195 (__v16sf)_mm512_castps256_ps512((__m256)(B)),\ 1196 ((imm) & 0x1) ? 0 : 16, \ 1197 ((imm) & 0x1) ? 1 : 17, \ 1198 ((imm) & 0x1) ? 2 : 18, \ 1199 ((imm) & 0x1) ? 3 : 19, \ 1200 ((imm) & 0x1) ? 4 : 20, \ 1201 ((imm) & 0x1) ? 5 : 21, \ 1202 ((imm) & 0x1) ? 6 : 22, \ 1203 ((imm) & 0x1) ? 7 : 23, \ 1204 ((imm) & 0x1) ? 16 : 8, \ 1205 ((imm) & 0x1) ? 17 : 9, \ 1206 ((imm) & 0x1) ? 18 : 10, \ 1207 ((imm) & 0x1) ? 19 : 11, \ 1208 ((imm) & 0x1) ? 20 : 12, \ 1209 ((imm) & 0x1) ? 21 : 13, \ 1210 ((imm) & 0x1) ? 22 : 14, \ 1211 ((imm) & 0x1) ? 23 : 15); }) 1213 #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \ 1214 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1215 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1218 #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \ 1219 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1220 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1221 (__v16sf)_mm512_setzero_ps()); }) 1223 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \ 1224 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 1225 (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\ 1226 (((imm) & 0x3) == 0) ? 8 : 0, \ 1227 (((imm) & 0x3) == 0) ? 9 : 1, \ 1228 (((imm) & 0x3) == 1) ? 8 : 2, \ 1229 (((imm) & 0x3) == 1) ? 9 : 3, \ 1230 (((imm) & 0x3) == 2) ? 8 : 4, \ 1231 (((imm) & 0x3) == 2) ? 9 : 5, \ 1232 (((imm) & 0x3) == 3) ? 8 : 6, \ 1233 (((imm) & 0x3) == 3) ? 9 : 7); }) 1235 #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1236 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1237 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1240 #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1241 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1242 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1243 (__v8df)_mm512_setzero_pd()); }) 1245 #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \ 1246 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 1247 (__v16si)_mm512_castsi256_si512((__m256i)(B)),\ 1248 ((imm) & 0x1) ? 0 : 16, \ 1249 ((imm) & 0x1) ? 1 : 17, \ 1250 ((imm) & 0x1) ? 2 : 18, \ 1251 ((imm) & 0x1) ? 3 : 19, \ 1252 ((imm) & 0x1) ? 4 : 20, \ 1253 ((imm) & 0x1) ? 5 : 21, \ 1254 ((imm) & 0x1) ? 6 : 22, \ 1255 ((imm) & 0x1) ? 7 : 23, \ 1256 ((imm) & 0x1) ? 16 : 8, \ 1257 ((imm) & 0x1) ? 17 : 9, \ 1258 ((imm) & 0x1) ? 18 : 10, \ 1259 ((imm) & 0x1) ? 19 : 11, \ 1260 ((imm) & 0x1) ? 20 : 12, \ 1261 ((imm) & 0x1) ? 21 : 13, \ 1262 ((imm) & 0x1) ? 22 : 14, \ 1263 ((imm) & 0x1) ? 23 : 15); }) 1265 #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \ 1266 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1267 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1270 #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \ 1271 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1272 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1273 (__v16si)_mm512_setzero_si512()); }) 1275 #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \ 1276 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 1277 (__v8di)_mm512_castsi128_si512((__m128i)(B)),\ 1278 (((imm) & 0x3) == 0) ? 8 : 0, \ 1279 (((imm) & 0x3) == 0) ? 9 : 1, \ 1280 (((imm) & 0x3) == 1) ? 8 : 2, \ 1281 (((imm) & 0x3) == 1) ? 9 : 3, \ 1282 (((imm) & 0x3) == 2) ? 8 : 4, \ 1283 (((imm) & 0x3) == 2) ? 9 : 5, \ 1284 (((imm) & 0x3) == 3) ? 8 : 6, \ 1285 (((imm) & 0x3) == 3) ? 9 : 7); }) 1287 #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1288 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1289 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1292 #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1293 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1294 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1295 (__v8di)_mm512_setzero_si512()); }) 1297 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1298 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1299 (int)(imm), (__mmask16)(U)); }) 1301 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \ 1302 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1303 (int)(imm), (__mmask16)-1); }) 1305 #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1306 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1309 #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \ 1310 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1313 #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \ 1314 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1317 #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \ 1318 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1321 #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \ 1322 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1325 #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \ 1326 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1329 #undef __DEFAULT_FN_ATTRS static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x2(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epu64(__mmask8 __U, __m256 __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_xor_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epi64(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtepi64_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epi64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epi64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_andnot_pd(__m512d __A, __m512d __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttpd_epu64(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_or_pd(__m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epi64(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_movm_epi32(__mmask16 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_andnot_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_movepi64_mask(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epu64(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_xor_ps(__m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epu64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epu64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epu64(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu64_pd(__m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtepu64_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epu64(__mmask8 __U, __m512d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x8(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epi64(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i64x2(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtpd_epu64(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x2(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcast_f64x2(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epu64(__m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi64_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epi64(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x8(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtpd_epi64(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epu64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_and_ps(__m512 __A, __m512 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_movm_epi64(__mmask8 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epi64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_and_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epu64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_or_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi64(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epi64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_movepi32_mask(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epi64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)