25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 28 #ifndef __AVX512VLINTRIN_H 29 #define __AVX512VLINTRIN_H 31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) 34 static __inline __m128i
__attribute__((__always_inline__, __nodebug__, __target__(
"avx512f")))
35 _mm_setzero_di(
void) {
36 return (__m128i)(__v2di){ 0LL, 0LL};
41 #define _mm_cmpeq_epi32_mask(A, B) \ 42 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 43 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 44 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 45 #define _mm_cmpge_epi32_mask(A, B) \ 46 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 47 #define _mm_mask_cmpge_epi32_mask(k, A, B) \ 48 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 49 #define _mm_cmpgt_epi32_mask(A, B) \ 50 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 51 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 52 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 53 #define _mm_cmple_epi32_mask(A, B) \ 54 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 55 #define _mm_mask_cmple_epi32_mask(k, A, B) \ 56 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 57 #define _mm_cmplt_epi32_mask(A, B) \ 58 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 59 #define _mm_mask_cmplt_epi32_mask(k, A, B) \ 60 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 61 #define _mm_cmpneq_epi32_mask(A, B) \ 62 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 63 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 64 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 66 #define _mm256_cmpeq_epi32_mask(A, B) \ 67 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 68 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 69 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 70 #define _mm256_cmpge_epi32_mask(A, B) \ 71 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 72 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 73 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 74 #define _mm256_cmpgt_epi32_mask(A, B) \ 75 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 76 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 77 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 78 #define _mm256_cmple_epi32_mask(A, B) \ 79 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 80 #define _mm256_mask_cmple_epi32_mask(k, A, B) \ 81 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 82 #define _mm256_cmplt_epi32_mask(A, B) \ 83 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 84 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 85 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 86 #define _mm256_cmpneq_epi32_mask(A, B) \ 87 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 88 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 89 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 91 #define _mm_cmpeq_epu32_mask(A, B) \ 92 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 93 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 94 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 95 #define _mm_cmpge_epu32_mask(A, B) \ 96 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 97 #define _mm_mask_cmpge_epu32_mask(k, A, B) \ 98 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 99 #define _mm_cmpgt_epu32_mask(A, B) \ 100 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 101 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 102 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 103 #define _mm_cmple_epu32_mask(A, B) \ 104 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 105 #define _mm_mask_cmple_epu32_mask(k, A, B) \ 106 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 107 #define _mm_cmplt_epu32_mask(A, B) \ 108 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 109 #define _mm_mask_cmplt_epu32_mask(k, A, B) \ 110 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 111 #define _mm_cmpneq_epu32_mask(A, B) \ 112 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 113 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 114 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 116 #define _mm256_cmpeq_epu32_mask(A, B) \ 117 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 118 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 119 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 120 #define _mm256_cmpge_epu32_mask(A, B) \ 121 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 122 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 123 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 124 #define _mm256_cmpgt_epu32_mask(A, B) \ 125 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 126 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 127 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 128 #define _mm256_cmple_epu32_mask(A, B) \ 129 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 130 #define _mm256_mask_cmple_epu32_mask(k, A, B) \ 131 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 132 #define _mm256_cmplt_epu32_mask(A, B) \ 133 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 134 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 135 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 136 #define _mm256_cmpneq_epu32_mask(A, B) \ 137 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 138 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 139 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 141 #define _mm_cmpeq_epi64_mask(A, B) \ 142 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 143 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 144 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 145 #define _mm_cmpge_epi64_mask(A, B) \ 146 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 147 #define _mm_mask_cmpge_epi64_mask(k, A, B) \ 148 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 149 #define _mm_cmpgt_epi64_mask(A, B) \ 150 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 151 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 152 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 153 #define _mm_cmple_epi64_mask(A, B) \ 154 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 155 #define _mm_mask_cmple_epi64_mask(k, A, B) \ 156 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 157 #define _mm_cmplt_epi64_mask(A, B) \ 158 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 159 #define _mm_mask_cmplt_epi64_mask(k, A, B) \ 160 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 161 #define _mm_cmpneq_epi64_mask(A, B) \ 162 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 163 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 164 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 166 #define _mm256_cmpeq_epi64_mask(A, B) \ 167 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 168 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 169 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 170 #define _mm256_cmpge_epi64_mask(A, B) \ 171 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 172 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 173 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 174 #define _mm256_cmpgt_epi64_mask(A, B) \ 175 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 176 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 177 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 178 #define _mm256_cmple_epi64_mask(A, B) \ 179 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 180 #define _mm256_mask_cmple_epi64_mask(k, A, B) \ 181 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 182 #define _mm256_cmplt_epi64_mask(A, B) \ 183 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 184 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 185 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 186 #define _mm256_cmpneq_epi64_mask(A, B) \ 187 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 188 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 189 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 191 #define _mm_cmpeq_epu64_mask(A, B) \ 192 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 193 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 194 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 195 #define _mm_cmpge_epu64_mask(A, B) \ 196 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 197 #define _mm_mask_cmpge_epu64_mask(k, A, B) \ 198 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 199 #define _mm_cmpgt_epu64_mask(A, B) \ 200 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 201 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 202 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 203 #define _mm_cmple_epu64_mask(A, B) \ 204 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 205 #define _mm_mask_cmple_epu64_mask(k, A, B) \ 206 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 207 #define _mm_cmplt_epu64_mask(A, B) \ 208 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 209 #define _mm_mask_cmplt_epu64_mask(k, A, B) \ 210 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 211 #define _mm_cmpneq_epu64_mask(A, B) \ 212 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 213 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 214 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 216 #define _mm256_cmpeq_epu64_mask(A, B) \ 217 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 218 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 219 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 220 #define _mm256_cmpge_epu64_mask(A, B) \ 221 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 222 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 223 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 224 #define _mm256_cmpgt_epu64_mask(A, B) \ 225 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 226 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 227 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 228 #define _mm256_cmple_epu64_mask(A, B) \ 229 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 230 #define _mm256_mask_cmple_epu64_mask(k, A, B) \ 231 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 232 #define _mm256_cmplt_epu64_mask(A, B) \ 233 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 234 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 235 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 236 #define _mm256_cmpneq_epu64_mask(A, B) \ 237 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 238 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 239 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 244 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
252 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
260 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
268 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
276 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
284 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
292 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
300 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
308 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
316 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
324 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
332 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
340 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
348 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
356 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
364 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
372 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
380 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
388 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
396 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
404 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
412 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
420 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
428 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
436 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
444 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
452 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
460 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
468 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
482 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
496 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
511 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
525 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
539 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
553 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
568 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
582 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
596 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
610 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
625 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
639 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
653 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
667 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
682 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
693 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ 694 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 695 (__v4si)(__m128i)(b), (int)(p), \ 698 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 699 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 700 (__v4si)(__m128i)(b), (int)(p), \ 703 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ 704 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 705 (__v4si)(__m128i)(b), (int)(p), \ 708 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 709 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 710 (__v4si)(__m128i)(b), (int)(p), \ 713 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ 714 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 715 (__v8si)(__m256i)(b), (int)(p), \ 718 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 719 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 720 (__v8si)(__m256i)(b), (int)(p), \ 723 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ 724 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 725 (__v8si)(__m256i)(b), (int)(p), \ 728 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 729 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 730 (__v8si)(__m256i)(b), (int)(p), \ 733 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ 734 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 735 (__v2di)(__m128i)(b), (int)(p), \ 738 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 739 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 740 (__v2di)(__m128i)(b), (int)(p), \ 743 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ 744 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 745 (__v2di)(__m128i)(b), (int)(p), \ 748 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 749 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 750 (__v2di)(__m128i)(b), (int)(p), \ 753 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ 754 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 755 (__v4di)(__m256i)(b), (int)(p), \ 758 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 759 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 760 (__v4di)(__m256i)(b), (int)(p), \ 763 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ 764 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 765 (__v4di)(__m256i)(b), (int)(p), \ 768 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 769 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 770 (__v4di)(__m256i)(b), (int)(p), \ 773 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ 774 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 775 (__v8sf)(__m256)(b), (int)(p), \ 778 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 779 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 780 (__v8sf)(__m256)(b), (int)(p), \ 783 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ 784 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 785 (__v4df)(__m256d)(b), (int)(p), \ 788 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 789 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 790 (__v4df)(__m256d)(b), (int)(p), \ 793 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ 794 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 795 (__v4sf)(__m128)(b), (int)(p), \ 798 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 799 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 800 (__v4sf)(__m128)(b), (int)(p), \ 803 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ 804 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 805 (__v2df)(__m128d)(b), (int)(p), \ 808 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 809 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 810 (__v2df)(__m128d)(b), (int)(p), \ 816 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
825 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
834 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
843 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
852 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
861 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
870 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
879 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
888 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
897 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
906 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
915 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
924 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
933 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
942 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
951 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
960 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
969 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
978 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
987 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
996 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1005 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1014 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1023 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1032 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1041 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1050 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1059 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1068 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1077 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1086 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1095 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1104 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1113 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1123 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1133 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1142 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1152 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1161 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1171 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1181 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1190 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1200 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1209 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1218 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1227 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1236 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1246 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1255 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1264 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1273 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1282 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1291 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1300 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1309 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1318 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1327 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1337 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1347 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1356 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1365 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1374 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1383 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1392 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1401 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1410 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1419 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1428 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1437 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1446 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1455 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1464 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1472 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1479 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1486 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1493 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1500 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1507 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1514 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1521 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1528 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
1535 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
1542 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
1549 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
1556 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
1563 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
1570 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
1577 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
1584 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1591 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1599 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1606 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1614 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1621 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1629 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1636 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1644 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1651 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1659 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1666 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1674 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1681 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1689 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1696 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1704 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1711 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1718 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1725 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1732 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1739 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1746 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1753 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1760 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1767 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1774 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1781 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1788 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1795 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1803 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1810 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1818 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1825 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1833 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1840 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1848 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1855 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1863 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
1870 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
1878 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1886 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1893 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1901 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1909 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1916 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1924 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
1931 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
1939 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
1946 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
1954 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1961 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1969 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1976 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1984 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1992 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1999 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2007 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2015 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2022 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2030 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2037 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2045 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2052 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2060 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2068 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2075 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2083 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2091 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2098 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2106 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2113 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2121 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2128 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2136 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2144 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2151 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2159 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2167 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2174 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2182 return (__m128d) __builtin_convertvector(
2183 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2188 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2195 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2202 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2207 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2214 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2221 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2229 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2236 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2244 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2252 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2259 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2267 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2274 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2281 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2288 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2295 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2302 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2309 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2316 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2323 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2330 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2338 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2345 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2353 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2360 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2368 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2375 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2383 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2391 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2400 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2408 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2417 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2425 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2435 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2443 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2452 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2459 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2468 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2475 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2484 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2492 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2501 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2509 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2518 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2525 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2533 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2540 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2548 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2555 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2563 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2570 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2578 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2586 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2593 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2601 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2609 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2616 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2624 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2632 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2639 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2647 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2655 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2662 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2670 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2677 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2684 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2691 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2698 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2705 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2712 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2719 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2726 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2733 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2740 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2747 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2754 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2761 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2768 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2775 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2782 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2789 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2796 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2803 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2810 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2817 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2824 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2831 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2838 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2845 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2852 return (__m256i)__builtin_ia32_selectd_256((
__mmask16)__U,
2859 return (__m256i)__builtin_ia32_selectd_256((
__mmask16)__U,
2866 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
2874 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
2881 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
2889 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
2897 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
2904 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
2912 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
2919 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
2926 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
2933 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
2940 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
2950 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
2957 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
2966 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
2976 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
2983 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
2992 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
2999 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3006 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3013 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3020 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3029 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3039 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3046 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3055 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3065 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3072 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3079 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3086 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3093 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3100 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3110 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3117 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3126 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3136 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3143 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3152 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3159 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3166 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3173 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3180 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3190 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3197 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3206 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3216 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3223 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3230 #define _mm_roundscale_pd(A, imm) __extension__ ({ \ 3231 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3233 (__v2df)_mm_setzero_pd(), \ 3237 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3238 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3240 (__v2df)(__m128d)(W), \ 3244 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3245 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3247 (__v2df)_mm_setzero_pd(), \ 3251 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \ 3252 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3254 (__v4df)_mm256_setzero_pd(), \ 3258 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3259 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3261 (__v4df)(__m256d)(W), \ 3265 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3266 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3268 (__v4df)_mm256_setzero_pd(), \ 3271 #define _mm_roundscale_ps(A, imm) __extension__ ({ \ 3272 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3273 (__v4sf)_mm_setzero_ps(), \ 3277 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3278 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3279 (__v4sf)(__m128)(W), \ 3283 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3284 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3285 (__v4sf)_mm_setzero_ps(), \ 3288 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \ 3289 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3290 (__v8sf)_mm256_setzero_ps(), \ 3293 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3294 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3295 (__v8sf)(__m256)(W), \ 3299 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3300 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3301 (__v8sf)_mm256_setzero_ps(), \ 3306 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3316 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3324 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3333 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3343 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3351 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3360 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3369 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3377 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3386 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3396 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3404 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3411 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3412 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ 3413 (__v2di)(__m128i)(index), \ 3414 (__v2df)(__m128d)(v1), (int)(scale)); }) 3416 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3417 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ 3418 (__v2di)(__m128i)(index), \ 3419 (__v2df)(__m128d)(v1), (int)(scale)); }) 3421 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3422 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ 3423 (__v2di)(__m128i)(index), \ 3424 (__v2di)(__m128i)(v1), (int)(scale)); }) 3426 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3427 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ 3428 (__v2di)(__m128i)(index), \ 3429 (__v2di)(__m128i)(v1), (int)(scale)); }) 3431 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3432 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ 3433 (__v4di)(__m256i)(index), \ 3434 (__v4df)(__m256d)(v1), (int)(scale)); }) 3436 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3437 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ 3438 (__v4di)(__m256i)(index), \ 3439 (__v4df)(__m256d)(v1), (int)(scale)); }) 3441 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3442 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ 3443 (__v4di)(__m256i)(index), \ 3444 (__v4di)(__m256i)(v1), (int)(scale)); }) 3446 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3447 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ 3448 (__v4di)(__m256i)(index), \ 3449 (__v4di)(__m256i)(v1), (int)(scale)); }) 3451 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3452 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ 3453 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3456 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3457 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ 3458 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3461 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3462 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ 3463 (__v2di)(__m128i)(index), \ 3464 (__v4si)(__m128i)(v1), (int)(scale)); }) 3466 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3467 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ 3468 (__v2di)(__m128i)(index), \ 3469 (__v4si)(__m128i)(v1), (int)(scale)); }) 3471 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3472 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ 3473 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3476 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3477 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ 3478 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3481 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3482 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ 3483 (__v4di)(__m256i)(index), \ 3484 (__v4si)(__m128i)(v1), (int)(scale)); }) 3486 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3487 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ 3488 (__v4di)(__m256i)(index), \ 3489 (__v4si)(__m128i)(v1), (int)(scale)); }) 3491 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3492 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ 3493 (__v4si)(__m128i)(index), \ 3494 (__v2df)(__m128d)(v1), (int)(scale)); }) 3496 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3497 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ 3498 (__v4si)(__m128i)(index), \ 3499 (__v2df)(__m128d)(v1), (int)(scale)); }) 3501 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3502 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ 3503 (__v4si)(__m128i)(index), \ 3504 (__v2di)(__m128i)(v1), (int)(scale)); }) 3506 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3507 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ 3508 (__v4si)(__m128i)(index), \ 3509 (__v2di)(__m128i)(v1), (int)(scale)); }) 3511 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3512 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ 3513 (__v4si)(__m128i)(index), \ 3514 (__v4df)(__m256d)(v1), (int)(scale)); }) 3516 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3517 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ 3518 (__v4si)(__m128i)(index), \ 3519 (__v4df)(__m256d)(v1), (int)(scale)); }) 3521 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3522 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ 3523 (__v4si)(__m128i)(index), \ 3524 (__v4di)(__m256i)(v1), (int)(scale)); }) 3526 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3527 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ 3528 (__v4si)(__m128i)(index), \ 3529 (__v4di)(__m256i)(v1), (int)(scale)); }) 3531 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3532 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ 3533 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3536 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3537 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ 3538 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3541 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3542 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ 3543 (__v4si)(__m128i)(index), \ 3544 (__v4si)(__m128i)(v1), (int)(scale)); }) 3546 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3547 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ 3548 (__v4si)(__m128i)(index), \ 3549 (__v4si)(__m128i)(v1), (int)(scale)); }) 3551 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3552 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ 3553 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3556 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3557 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ 3558 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3561 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3562 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ 3563 (__v8si)(__m256i)(index), \ 3564 (__v8si)(__m256i)(v1), (int)(scale)); }) 3566 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3567 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ 3568 (__v8si)(__m256i)(index), \ 3569 (__v8si)(__m256i)(v1), (int)(scale)); }) 3573 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3580 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3587 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3594 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3601 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3608 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3615 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3622 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3629 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3636 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3643 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3650 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3657 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3664 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3671 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3678 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3686 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
3696 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
3706 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
3717 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
3728 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
3738 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
3748 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
3758 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
3767 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
3777 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
3787 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
3797 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
3807 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
3816 __m256i __I, __m256i __B) {
3817 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
3827 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
3838 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
3849 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
3859 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
3870 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
3881 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
3891 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
3901 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
3911 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
3921 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
3931 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
3941 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
3951 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
3961 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
3971 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
3982 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
3992 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4001 __m256i __I, __m256i __B) {
4002 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4013 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4021 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4029 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4037 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4045 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4053 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4061 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4069 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4077 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4085 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4093 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4101 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4109 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4117 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4125 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4133 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4141 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4149 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4157 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4165 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4174 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4182 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4190 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4198 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4206 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4214 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4222 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4230 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4238 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4246 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4254 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4262 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4270 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4278 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4286 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4294 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4302 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4310 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4318 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4326 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4332 #define _mm_rol_epi32(a, b) __extension__ ({\ 4333 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4334 (__v4si)_mm_setzero_si128(), \ 4337 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4338 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4339 (__v4si)(__m128i)(w), (__mmask8)(u)); }) 4341 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ 4342 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4343 (__v4si)_mm_setzero_si128(), \ 4346 #define _mm256_rol_epi32(a, b) __extension__ ({\ 4347 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4348 (__v8si)_mm256_setzero_si256(), \ 4351 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4352 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4353 (__v8si)(__m256i)(w), (__mmask8)(u)); }) 4355 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ 4356 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4357 (__v8si)_mm256_setzero_si256(), \ 4360 #define _mm_rol_epi64(a, b) __extension__ ({\ 4361 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4362 (__v2di)_mm_setzero_di(), \ 4365 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4366 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4367 (__v2di)(__m128i)(w), (__mmask8)(u)); }) 4369 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ 4370 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4371 (__v2di)_mm_setzero_di(), \ 4374 #define _mm256_rol_epi64(a, b) __extension__ ({\ 4375 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4376 (__v4di)_mm256_setzero_si256(), \ 4379 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4380 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4381 (__v4di)(__m256i)(w), (__mmask8)(u)); }) 4383 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ 4384 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4385 (__v4di)_mm256_setzero_si256(), \ 4391 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4402 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4411 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4421 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4432 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4441 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4451 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4462 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4471 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4481 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4492 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4501 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4508 #define _mm_ror_epi32(A, B) __extension__ ({ \ 4509 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4510 (__v4si)_mm_setzero_si128(), \ 4513 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4514 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4515 (__v4si)(__m128i)(W), (__mmask8)(U)); }) 4517 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4518 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4519 (__v4si)_mm_setzero_si128(), \ 4522 #define _mm256_ror_epi32(A, B) __extension__ ({ \ 4523 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4524 (__v8si)_mm256_setzero_si256(), \ 4527 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4528 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4529 (__v8si)(__m256i)(W), (__mmask8)(U)); }) 4531 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4532 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4533 (__v8si)_mm256_setzero_si256(), \ 4536 #define _mm_ror_epi64(A, B) __extension__ ({ \ 4537 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4538 (__v2di)_mm_setzero_di(), \ 4541 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4542 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4543 (__v2di)(__m128i)(W), (__mmask8)(U)); }) 4545 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4546 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4547 (__v2di)_mm_setzero_di(), \ 4550 #define _mm256_ror_epi64(A, B) __extension__ ({ \ 4551 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4552 (__v4di)_mm256_setzero_si256(), \ 4555 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4556 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4557 (__v4di)(__m256i)(W), (__mmask8)(U)); }) 4559 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4560 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4561 (__v4di)_mm256_setzero_si256(), \ 4567 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4575 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4583 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4591 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4599 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4607 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4615 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4623 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4631 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4639 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4641 (__v2di)_mm_setzero_di());
4647 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4655 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4663 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4671 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4673 (__v2di)_mm_setzero_di());
4679 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4687 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4695 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
4706 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
4715 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
4725 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
4736 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
4745 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
4755 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
4766 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
4775 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
4785 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
4796 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
4805 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
4815 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4823 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4825 (__v2di)_mm_setzero_di());
4831 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4839 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4847 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4855 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4863 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4871 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4879 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4887 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4889 (__v2di)_mm_setzero_di());
4895 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4903 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4911 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4919 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4927 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4935 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4943 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4951 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4959 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4967 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4975 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4983 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4991 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4999 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5007 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5015 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5017 (__v2di)_mm_setzero_di());
5023 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5031 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5039 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5047 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5049 (__v2di)_mm_setzero_di());
5055 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5063 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5071 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5079 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5087 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5095 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5103 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5109 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5117 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5119 (__v2di)_mm_setzero_di());
5125 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5131 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5139 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5147 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5155 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5164 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5172 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5180 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5189 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5199 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5208 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5218 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5226 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5234 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5242 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5244 (__v2di) _mm_setzero_di ());
5250 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5258 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5266 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5275 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5285 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5294 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5304 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5320 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5328 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5336 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5344 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5352 return (__m128i)__builtin_ia32_selectd_128(__M,
5360 return (__m128i)__builtin_ia32_selectd_128(__M,
5368 return (__m256i)__builtin_ia32_selectd_256(__M,
5376 return (__m256i)__builtin_ia32_selectd_256(__M,
5384 _mm_mask_set1_epi64 (__m128i __O,
__mmask8 __M,
long long __A)
5386 return (__m128i) __builtin_ia32_selectq_128(__M,
5392 _mm_maskz_set1_epi64 (
__mmask8 __M,
long long __A)
5394 return (__m128i) __builtin_ia32_selectq_128(__M,
5400 _mm256_mask_set1_epi64 (__m256i __O,
__mmask8 __M,
long long __A)
5402 return (__m256i) __builtin_ia32_selectq_256(__M,
5408 _mm256_maskz_set1_epi64 (
__mmask8 __M,
long long __A)
5410 return (__m256i) __builtin_ia32_selectq_256(__M,
5417 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5418 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5419 (__v2df)(__m128d)(B), \ 5420 (__v2di)(__m128i)(C), (int)(imm), \ 5423 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5424 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5425 (__v2df)(__m128d)(B), \ 5426 (__v2di)(__m128i)(C), (int)(imm), \ 5429 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5430 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5431 (__v2df)(__m128d)(B), \ 5432 (__v2di)(__m128i)(C), \ 5433 (int)(imm), (__mmask8)(U)); }) 5435 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5436 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5437 (__v4df)(__m256d)(B), \ 5438 (__v4di)(__m256i)(C), (int)(imm), \ 5441 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5442 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5443 (__v4df)(__m256d)(B), \ 5444 (__v4di)(__m256i)(C), (int)(imm), \ 5447 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5448 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5449 (__v4df)(__m256d)(B), \ 5450 (__v4di)(__m256i)(C), \ 5451 (int)(imm), (__mmask8)(U)); }) 5453 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5454 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5455 (__v4sf)(__m128)(B), \ 5456 (__v4si)(__m128i)(C), (int)(imm), \ 5459 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5460 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5461 (__v4sf)(__m128)(B), \ 5462 (__v4si)(__m128i)(C), (int)(imm), \ 5465 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5466 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5467 (__v4sf)(__m128)(B), \ 5468 (__v4si)(__m128i)(C), (int)(imm), \ 5471 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5472 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5473 (__v8sf)(__m256)(B), \ 5474 (__v8si)(__m256i)(C), (int)(imm), \ 5477 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5478 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5479 (__v8sf)(__m256)(B), \ 5480 (__v8si)(__m256i)(C), (int)(imm), \ 5483 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5484 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5485 (__v8sf)(__m256)(B), \ 5486 (__v8si)(__m256i)(C), (int)(imm), \ 5492 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5500 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5509 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5517 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5526 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5534 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5543 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5551 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5560 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5568 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5577 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5585 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5594 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5602 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5611 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5619 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5628 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5636 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5645 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5653 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5662 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5670 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5679 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5687 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5696 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5704 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5712 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5720 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5728 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5736 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5744 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5752 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5760 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5768 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5776 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5784 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5793 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5801 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5809 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5817 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5825 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5833 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5841 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5849 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5857 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5865 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5873 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5881 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5889 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5897 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5905 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5913 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5921 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5930 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5938 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5947 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5956 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5964 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5973 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5982 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5990 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5999 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6008 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6016 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6022 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6023 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6024 (__v2df)_mm_permute_pd((X), (C)), \ 6025 (__v2df)(__m128d)(W)); }) 6027 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ 6028 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6029 (__v2df)_mm_permute_pd((X), (C)), \ 6030 (__v2df)_mm_setzero_pd()); }) 6032 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6033 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6034 (__v4df)_mm256_permute_pd((X), (C)), \ 6035 (__v4df)(__m256d)(W)); }) 6037 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ 6038 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6039 (__v4df)_mm256_permute_pd((X), (C)), \ 6040 (__v4df)_mm256_setzero_pd()); }) 6042 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6043 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6044 (__v4sf)_mm_permute_ps((X), (C)), \ 6045 (__v4sf)(__m128)(W)); }) 6047 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ 6048 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6049 (__v4sf)_mm_permute_ps((X), (C)), \ 6050 (__v4sf)_mm_setzero_ps()); }) 6052 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6053 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6054 (__v8sf)_mm256_permute_ps((X), (C)), \ 6055 (__v8sf)(__m256)(W)); }) 6057 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ 6058 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6059 (__v8sf)_mm256_permute_ps((X), (C)), \ 6060 (__v8sf)_mm256_setzero_ps()); }) 6065 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6073 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6081 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6089 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6097 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6105 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6113 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6121 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6237 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6245 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6253 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6261 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6269 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6277 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6279 (__v2di)_mm_setzero_di());
6285 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6293 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6301 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6309 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6317 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6325 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6333 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6341 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6343 (__v2di)_mm_setzero_di());
6349 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6357 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6365 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6373 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6381 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6389 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6397 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6405 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6413 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6421 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6429 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6435 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6443 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6445 (__v2di)_mm_setzero_di());
6451 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6457 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6465 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6473 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6479 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6487 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6489 (__v2di)_mm_setzero_di());
6495 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6501 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6509 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6514 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6515 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6516 (__v4si)(__m128i)(B), \ 6517 (__v4si)(__m128i)(C), (int)(imm), \ 6520 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6521 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6522 (__v4si)(__m128i)(B), \ 6523 (__v4si)(__m128i)(C), (int)(imm), \ 6526 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6527 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 6528 (__v4si)(__m128i)(B), \ 6529 (__v4si)(__m128i)(C), (int)(imm), \ 6532 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6533 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6534 (__v8si)(__m256i)(B), \ 6535 (__v8si)(__m256i)(C), (int)(imm), \ 6538 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6539 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6540 (__v8si)(__m256i)(B), \ 6541 (__v8si)(__m256i)(C), (int)(imm), \ 6544 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6545 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 6546 (__v8si)(__m256i)(B), \ 6547 (__v8si)(__m256i)(C), (int)(imm), \ 6550 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6551 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6552 (__v2di)(__m128i)(B), \ 6553 (__v2di)(__m128i)(C), (int)(imm), \ 6556 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6557 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6558 (__v2di)(__m128i)(B), \ 6559 (__v2di)(__m128i)(C), (int)(imm), \ 6562 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6563 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 6564 (__v2di)(__m128i)(B), \ 6565 (__v2di)(__m128i)(C), (int)(imm), \ 6568 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6569 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6570 (__v4di)(__m256i)(B), \ 6571 (__v4di)(__m256i)(C), (int)(imm), \ 6574 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6575 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6576 (__v4di)(__m256i)(B), \ 6577 (__v4di)(__m256i)(C), (int)(imm), \ 6580 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6581 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 6582 (__v4di)(__m256i)(B), \ 6583 (__v4di)(__m256i)(C), (int)(imm), \ 6588 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ 6589 (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ 6590 (__v8sf)(__m256)(B), \ 6591 0 + ((((imm) >> 0) & 0x1) * 4), \ 6592 1 + ((((imm) >> 0) & 0x1) * 4), \ 6593 2 + ((((imm) >> 0) & 0x1) * 4), \ 6594 3 + ((((imm) >> 0) & 0x1) * 4), \ 6595 8 + ((((imm) >> 1) & 0x1) * 4), \ 6596 9 + ((((imm) >> 1) & 0x1) * 4), \ 6597 10 + ((((imm) >> 1) & 0x1) * 4), \ 6598 11 + ((((imm) >> 1) & 0x1) * 4)); }) 6600 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 6601 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6602 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6603 (__v8sf)(__m256)(W)); }) 6605 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 6606 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6607 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6608 (__v8sf)_mm256_setzero_ps()); }) 6610 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ 6611 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ 6612 (__v4df)(__m256d)(B), \ 6613 0 + ((((imm) >> 0) & 0x1) * 2), \ 6614 1 + ((((imm) >> 0) & 0x1) * 2), \ 6615 4 + ((((imm) >> 1) & 0x1) * 2), \ 6616 5 + ((((imm) >> 1) & 0x1) * 2)); }) 6618 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 6619 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6620 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6621 (__v4df)(__m256)(W)); }) 6623 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 6624 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6625 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6626 (__v4df)_mm256_setzero_pd()); }) 6628 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ 6629 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ 6630 (__v4di)(__m256i)(B), \ 6631 0 + ((((imm) >> 0) & 0x1) * 2), \ 6632 1 + ((((imm) >> 0) & 0x1) * 2), \ 6633 4 + ((((imm) >> 1) & 0x1) * 2), \ 6634 5 + ((((imm) >> 1) & 0x1) * 2)); }) 6636 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 6637 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6638 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6639 (__v8si)(__m256)(W)); }) 6641 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 6642 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6643 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6644 (__v8si)_mm256_setzero_si256()); }) 6646 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ 6647 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ 6648 (__v4di)(__m256i)(B), \ 6649 0 + ((((imm) >> 0) & 0x1) * 2), \ 6650 1 + ((((imm) >> 0) & 0x1) * 2), \ 6651 4 + ((((imm) >> 1) & 0x1) * 2), \ 6652 5 + ((((imm) >> 1) & 0x1) * 2)); }) 6654 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 6655 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6656 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6657 (__v4di)(__m256)(W)); }) 6660 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 6661 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6662 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6663 (__v4di)_mm256_setzero_si256()); }) 6665 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 6666 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6667 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6668 (__v2df)(__m128d)(W)); }) 6670 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 6671 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6672 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6673 (__v2df)_mm_setzero_pd()); }) 6675 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 6676 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6677 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6678 (__v4df)(__m256d)(W)); }) 6680 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 6681 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6682 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6683 (__v4df)_mm256_setzero_pd()); }) 6685 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 6686 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6687 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6688 (__v4sf)(__m128)(W)); }) 6690 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 6691 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6692 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6693 (__v4sf)_mm_setzero_ps()); }) 6695 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 6696 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6697 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6698 (__v8sf)(__m256)(W)); }) 6700 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 6701 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6702 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6703 (__v8sf)_mm256_setzero_ps()); }) 6708 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6717 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6725 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6734 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6743 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6751 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6760 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6769 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6777 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6786 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6795 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6803 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6812 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6813 0, 1, 2, 3, 0, 1, 2, 3);
6819 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
6827 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
6835 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6836 0, 1, 2, 3, 0, 1, 2, 3);
6842 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
6850 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
6858 return (__m256d)__builtin_ia32_selectpd_256(__M,
6866 return (__m256d)__builtin_ia32_selectpd_256(__M,
6874 return (__m128)__builtin_ia32_selectps_128(__M,
6882 return (__m128)__builtin_ia32_selectps_128(__M,
6890 return (__m256)__builtin_ia32_selectps_256(__M,
6898 return (__m256)__builtin_ia32_selectps_256(__M,
6906 return (__m128i)__builtin_ia32_selectd_128(__M,
6914 return (__m128i)__builtin_ia32_selectd_128(__M,
6922 return (__m256i)__builtin_ia32_selectd_256(__M,
6930 return (__m256i)__builtin_ia32_selectd_256(__M,
6938 return (__m128i)__builtin_ia32_selectq_128(__M,
6946 return (__m128i)__builtin_ia32_selectq_128(__M,
6954 return (__m256i)__builtin_ia32_selectq_256(__M,
6962 return (__m256i)__builtin_ia32_selectq_256(__M,
6970 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6978 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6979 (__v16qi) __O, __M);
6985 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6993 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6999 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7007 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7008 (__v16qi) __O, __M);
7014 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7022 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7028 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7036 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7044 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7052 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7058 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7066 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7073 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7081 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7087 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7095 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7096 (__v16qi) __O, __M);
7102 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7110 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7116 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7124 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7125 (__v16qi) __O, __M);
7131 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7139 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7145 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7153 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7160 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7168 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7174 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7182 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7190 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7198 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7204 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7212 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7219 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7227 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7233 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7241 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7248 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7256 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7262 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7270 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7278 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7286 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7292 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7300 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7308 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7316 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7322 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7330 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7337 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7345 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7351 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7359 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7366 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7374 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7380 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7388 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7396 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7404 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7410 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7418 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7426 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7434 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7440 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7448 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7455 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7463 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7469 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7477 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7484 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7492 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7498 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7506 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7513 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7521 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7527 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7535 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7542 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7550 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7556 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7564 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7565 (__v16qi) __O, __M);
7571 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7580 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7586 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7594 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7595 (__v16qi) __O, __M);
7601 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7609 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7615 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7623 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7630 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7638 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7644 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7652 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7659 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7667 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7673 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7681 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7682 (__v16qi) __O, __M);
7688 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7696 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7702 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7710 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7711 (__v16qi) __O, __M);
7717 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7725 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7731 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7739 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7746 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7754 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7760 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
7768 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
7775 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
7783 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7789 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7797 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7805 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7813 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7819 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7827 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7834 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7842 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7845 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ 7846 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ 7847 (__v8sf)_mm256_undefined_ps(), \ 7848 ((imm) & 1) ? 4 : 0, \ 7849 ((imm) & 1) ? 5 : 1, \ 7850 ((imm) & 1) ? 6 : 2, \ 7851 ((imm) & 1) ? 7 : 3); }) 7853 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ 7854 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7855 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 7858 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ 7859 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7860 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 7861 (__v4sf)_mm_setzero_ps()); }) 7863 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ 7864 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ 7865 (__v8si)_mm256_undefined_si256(), \ 7866 ((imm) & 1) ? 4 : 0, \ 7867 ((imm) & 1) ? 5 : 1, \ 7868 ((imm) & 1) ? 6 : 2, \ 7869 ((imm) & 1) ? 7 : 3); }) 7871 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 7872 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7873 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 7876 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 7877 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 7878 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 7879 (__v4si)_mm_setzero_si128()); }) 7881 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ 7882 (__m256)__builtin_shufflevector((__v8sf)(A), \ 7883 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ 7884 ((imm) & 0x1) ? 0 : 8, \ 7885 ((imm) & 0x1) ? 1 : 9, \ 7886 ((imm) & 0x1) ? 2 : 10, \ 7887 ((imm) & 0x1) ? 3 : 11, \ 7888 ((imm) & 0x1) ? 8 : 4, \ 7889 ((imm) & 0x1) ? 9 : 5, \ 7890 ((imm) & 0x1) ? 10 : 6, \ 7891 ((imm) & 0x1) ? 11 : 7); }) 7893 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 7894 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7895 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7898 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 7899 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7900 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7901 (__v8sf)_mm256_setzero_ps()); }) 7903 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ 7904 (__m256i)__builtin_shufflevector((__v8si)(A), \ 7905 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ 7906 ((imm) & 0x1) ? 0 : 8, \ 7907 ((imm) & 0x1) ? 1 : 9, \ 7908 ((imm) & 0x1) ? 2 : 10, \ 7909 ((imm) & 0x1) ? 3 : 11, \ 7910 ((imm) & 0x1) ? 8 : 4, \ 7911 ((imm) & 0x1) ? 9 : 5, \ 7912 ((imm) & 0x1) ? 10 : 6, \ 7913 ((imm) & 0x1) ? 11 : 7); }) 7915 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 7916 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7917 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7920 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 7921 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7922 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7923 (__v8si)_mm256_setzero_si256()); }) 7925 #define _mm_getmant_pd(A, B, C) __extension__({\ 7926 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7927 (int)(((C)<<2) | (B)), \ 7928 (__v2df)_mm_setzero_pd(), \ 7931 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ 7932 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7933 (int)(((C)<<2) | (B)), \ 7934 (__v2df)(__m128d)(W), \ 7937 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ 7938 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7939 (int)(((C)<<2) | (B)), \ 7940 (__v2df)_mm_setzero_pd(), \ 7943 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \ 7944 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7945 (int)(((C)<<2) | (B)), \ 7946 (__v4df)_mm256_setzero_pd(), \ 7949 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 7950 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7951 (int)(((C)<<2) | (B)), \ 7952 (__v4df)(__m256d)(W), \ 7955 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 7956 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7957 (int)(((C)<<2) | (B)), \ 7958 (__v4df)_mm256_setzero_pd(), \ 7961 #define _mm_getmant_ps(A, B, C) __extension__ ({ \ 7962 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7963 (int)(((C)<<2) | (B)), \ 7964 (__v4sf)_mm_setzero_ps(), \ 7967 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 7968 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7969 (int)(((C)<<2) | (B)), \ 7970 (__v4sf)(__m128)(W), \ 7973 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 7974 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7975 (int)(((C)<<2) | (B)), \ 7976 (__v4sf)_mm_setzero_ps(), \ 7979 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \ 7980 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7981 (int)(((C)<<2) | (B)), \ 7982 (__v8sf)_mm256_setzero_ps(), \ 7985 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 7986 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7987 (int)(((C)<<2) | (B)), \ 7988 (__v8sf)(__m256)(W), \ 7991 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 7992 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7993 (int)(((C)<<2) | (B)), \ 7994 (__v8sf)_mm256_setzero_ps(), \ 7997 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 7998 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 7999 (double const *)(addr), \ 8000 (__v2di)(__m128i)(index), \ 8001 (__mmask8)(mask), (int)(scale)); }) 8003 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8004 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 8005 (long long const *)(addr), \ 8006 (__v2di)(__m128i)(index), \ 8007 (__mmask8)(mask), (int)(scale)); }) 8009 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8010 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 8011 (double const *)(addr), \ 8012 (__v4di)(__m256i)(index), \ 8013 (__mmask8)(mask), (int)(scale)); }) 8015 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8016 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 8017 (long long const *)(addr), \ 8018 (__v4di)(__m256i)(index), \ 8019 (__mmask8)(mask), (int)(scale)); }) 8021 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8022 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 8023 (float const *)(addr), \ 8024 (__v2di)(__m128i)(index), \ 8025 (__mmask8)(mask), (int)(scale)); }) 8027 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8028 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8029 (int const *)(addr), \ 8030 (__v2di)(__m128i)(index), \ 8031 (__mmask8)(mask), (int)(scale)); }) 8033 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8034 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8035 (float const *)(addr), \ 8036 (__v4di)(__m256i)(index), \ 8037 (__mmask8)(mask), (int)(scale)); }) 8039 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8040 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8041 (int const *)(addr), \ 8042 (__v4di)(__m256i)(index), \ 8043 (__mmask8)(mask), (int)(scale)); }) 8045 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8046 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8047 (double const *)(addr), \ 8048 (__v4si)(__m128i)(index), \ 8049 (__mmask8)(mask), (int)(scale)); }) 8051 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8052 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8053 (long long const *)(addr), \ 8054 (__v4si)(__m128i)(index), \ 8055 (__mmask8)(mask), (int)(scale)); }) 8057 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8058 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8059 (double const *)(addr), \ 8060 (__v4si)(__m128i)(index), \ 8061 (__mmask8)(mask), (int)(scale)); }) 8063 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8064 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8065 (long long const *)(addr), \ 8066 (__v4si)(__m128i)(index), \ 8067 (__mmask8)(mask), (int)(scale)); }) 8069 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8070 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8071 (float const *)(addr), \ 8072 (__v4si)(__m128i)(index), \ 8073 (__mmask8)(mask), (int)(scale)); }) 8075 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8076 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8077 (int const *)(addr), \ 8078 (__v4si)(__m128i)(index), \ 8079 (__mmask8)(mask), (int)(scale)); }) 8081 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8082 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8083 (float const *)(addr), \ 8084 (__v8si)(__m256i)(index), \ 8085 (__mmask8)(mask), (int)(scale)); }) 8087 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8088 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8089 (int const *)(addr), \ 8090 (__v8si)(__m256i)(index), \ 8091 (__mmask8)(mask), (int)(scale)); }) 8093 #define _mm256_permutex_pd(X, C) __extension__ ({ \ 8094 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ 8095 (__v4df)_mm256_undefined_pd(), \ 8096 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8097 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8099 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8100 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8101 (__v4df)_mm256_permutex_pd((X), (C)), \ 8102 (__v4df)(__m256d)(W)); }) 8104 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8105 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8106 (__v4df)_mm256_permutex_pd((X), (C)), \ 8107 (__v4df)_mm256_setzero_pd()); }) 8109 #define _mm256_permutex_epi64(X, C) __extension__ ({ \ 8110 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ 8111 (__v4di)_mm256_undefined_si256(), \ 8112 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8113 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8115 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8116 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8117 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8118 (__v4di)(__m256i)(W)); }) 8120 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8121 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8122 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8123 (__v4di)_mm256_setzero_si256()); }) 8128 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8138 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8147 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8156 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8165 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8175 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8185 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8194 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8203 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8212 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8222 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8231 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8237 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ 8238 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ 8239 (__v4si)(__m128i)(A), \ 8240 ((int)(imm) & 0x3) + 0, \ 8241 ((int)(imm) & 0x3) + 1, \ 8242 ((int)(imm) & 0x3) + 2, \ 8243 ((int)(imm) & 0x3) + 3); }) 8245 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8246 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8247 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8248 (__v4si)(__m128i)(W)); }) 8250 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8251 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8252 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8253 (__v4si)_mm_setzero_si128()); }) 8255 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ 8256 (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ 8257 (__v8si)(__m256i)(A), \ 8258 ((int)(imm) & 0x7) + 0, \ 8259 ((int)(imm) & 0x7) + 1, \ 8260 ((int)(imm) & 0x7) + 2, \ 8261 ((int)(imm) & 0x7) + 3, \ 8262 ((int)(imm) & 0x7) + 4, \ 8263 ((int)(imm) & 0x7) + 5, \ 8264 ((int)(imm) & 0x7) + 6, \ 8265 ((int)(imm) & 0x7) + 7); }) 8267 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8268 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8269 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8270 (__v8si)(__m256i)(W)); }) 8272 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8273 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8274 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8275 (__v8si)_mm256_setzero_si256()); }) 8277 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ 8278 (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ 8279 (__v2di)(__m128i)(A), \ 8280 ((int)(imm) & 0x1) + 0, \ 8281 ((int)(imm) & 0x1) + 1); }) 8283 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8284 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8285 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8286 (__v2di)(__m128i)(W)); }) 8288 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8289 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8290 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8291 (__v2di)_mm_setzero_di()); }) 8293 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ 8294 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ 8295 (__v4di)(__m256i)(A), \ 8296 ((int)(imm) & 0x3) + 0, \ 8297 ((int)(imm) & 0x3) + 1, \ 8298 ((int)(imm) & 0x3) + 2, \ 8299 ((int)(imm) & 0x3) + 3); }) 8301 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8302 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8303 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8304 (__v4di)(__m256i)(W)); }) 8306 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8307 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8308 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8309 (__v4di)_mm256_setzero_si256()); }) 8314 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8322 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8330 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8338 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8346 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8354 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8362 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8370 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8375 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8376 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8377 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8378 (__v8si)(__m256i)(W)); }) 8380 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ 8381 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8382 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8383 (__v8si)_mm256_setzero_si256()); }) 8385 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8386 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8387 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8388 (__v4si)(__m128i)(W)); }) 8390 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ 8391 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8392 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8393 (__v4si)_mm_setzero_si128()); }) 8398 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8406 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8414 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8422 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8430 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8438 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8446 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8454 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8462 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8470 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8479 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8487 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8509 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8510 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8511 (__v8hi)(__m128i)(W), \ 8514 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8515 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8516 (__v8hi)_mm_setzero_si128(), \ 8534 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8535 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8536 (__v8hi)(__m128i)(W), \ 8539 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8540 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8541 (__v8hi)_mm_setzero_si128(), \ 8545 #undef __DEFAULT_FN_ATTRS static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcastss_ps(__m128 __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi64(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi32(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu32_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi32(__m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i32x4(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi64(__m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi32(__m128i __V)
static __inline __m128 __DEFAULT_FN_ATTRS _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi64(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
static __inline __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi32(__m256i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastq_epi64(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
Moves and duplicates low-order (even-indexed) values from a 256-bit vector of [8 x float] to float va...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi32(__m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition. ...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi32(__m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi64(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_ps(__mmask16 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) _mm_setzero_di(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ps(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi32(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epu32(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi32(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastq_epi64(__m128i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_ps(__mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastd_epi32(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi32(__m256i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_andnot_si256(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi32(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_broadcastss_ps(__m128 __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi64(__m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi32(__m256i __a, int __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi16(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_broadcastsd_pd(__m128d __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu32(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi64(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi32(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi64(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mul_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi64(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
#define __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastd_epi32(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi64(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi32(__m128i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline __m128d __DEFAULT_FN_ATTRS _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi16(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_cvtepu32_pd(__m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu32(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_xor_si256(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi32(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi64(__m128i __A, int __imm)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi8(__m256i __A)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_rcp14_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi32(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi64(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_or_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi64(__m128i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi64(__m128i __V)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutexvar_ps(__m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi32(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi32(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi64(__m256i __X, __m256i __Y)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
Moves and duplicates high-order (odd-indexed) values from a 256-bit vector of [8 x float] to float va...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi32(__m256i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mul_epu32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi64(__m256i __A, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi64(__m256i __a, __m128i __count)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_getexp_ps(__m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_getexp_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_getexp_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi32(__m128i __V)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi32(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)