25 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." 28 #ifndef __AVX512VLBWINTRIN_H 29 #define __AVX512VLBWINTRIN_H 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) 36 return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
41 #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ 42 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 43 (__v16qi)(__m128i)(b), (int)(p), \ 46 #define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ 47 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 48 (__v16qi)(__m128i)(b), (int)(p), \ 51 #define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \ 52 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 53 (__v16qi)(__m128i)(b), (int)(p), \ 56 #define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ 57 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 58 (__v16qi)(__m128i)(b), (int)(p), \ 61 #define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \ 62 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 63 (__v32qi)(__m256i)(b), (int)(p), \ 66 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ 67 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 68 (__v32qi)(__m256i)(b), (int)(p), \ 71 #define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \ 72 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 73 (__v32qi)(__m256i)(b), (int)(p), \ 76 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ 77 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 78 (__v32qi)(__m256i)(b), (int)(p), \ 81 #define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \ 82 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 83 (__v8hi)(__m128i)(b), (int)(p), \ 86 #define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ 87 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 88 (__v8hi)(__m128i)(b), (int)(p), \ 91 #define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \ 92 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 93 (__v8hi)(__m128i)(b), (int)(p), \ 96 #define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ 97 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 98 (__v8hi)(__m128i)(b), (int)(p), \ 101 #define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \ 102 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 103 (__v16hi)(__m256i)(b), (int)(p), \ 106 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ 107 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 108 (__v16hi)(__m256i)(b), (int)(p), \ 111 #define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \ 112 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 113 (__v16hi)(__m256i)(b), (int)(p), \ 116 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ 117 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 118 (__v16hi)(__m256i)(b), (int)(p), \ 121 #define _mm_cmpeq_epi8_mask(A, B) \ 122 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 123 #define _mm_mask_cmpeq_epi8_mask(k, A, B) \ 124 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 125 #define _mm_cmpge_epi8_mask(A, B) \ 126 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 127 #define _mm_mask_cmpge_epi8_mask(k, A, B) \ 128 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 129 #define _mm_cmpgt_epi8_mask(A, B) \ 130 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 131 #define _mm_mask_cmpgt_epi8_mask(k, A, B) \ 132 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 133 #define _mm_cmple_epi8_mask(A, B) \ 134 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 135 #define _mm_mask_cmple_epi8_mask(k, A, B) \ 136 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 137 #define _mm_cmplt_epi8_mask(A, B) \ 138 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 139 #define _mm_mask_cmplt_epi8_mask(k, A, B) \ 140 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 141 #define _mm_cmpneq_epi8_mask(A, B) \ 142 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 143 #define _mm_mask_cmpneq_epi8_mask(k, A, B) \ 144 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 146 #define _mm256_cmpeq_epi8_mask(A, B) \ 147 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 148 #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ 149 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 150 #define _mm256_cmpge_epi8_mask(A, B) \ 151 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 152 #define _mm256_mask_cmpge_epi8_mask(k, A, B) \ 153 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 154 #define _mm256_cmpgt_epi8_mask(A, B) \ 155 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 156 #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ 157 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 158 #define _mm256_cmple_epi8_mask(A, B) \ 159 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 160 #define _mm256_mask_cmple_epi8_mask(k, A, B) \ 161 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 162 #define _mm256_cmplt_epi8_mask(A, B) \ 163 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 164 #define _mm256_mask_cmplt_epi8_mask(k, A, B) \ 165 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 166 #define _mm256_cmpneq_epi8_mask(A, B) \ 167 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 168 #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ 169 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 171 #define _mm_cmpeq_epu8_mask(A, B) \ 172 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 173 #define _mm_mask_cmpeq_epu8_mask(k, A, B) \ 174 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 175 #define _mm_cmpge_epu8_mask(A, B) \ 176 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 177 #define _mm_mask_cmpge_epu8_mask(k, A, B) \ 178 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 179 #define _mm_cmpgt_epu8_mask(A, B) \ 180 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 181 #define _mm_mask_cmpgt_epu8_mask(k, A, B) \ 182 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 183 #define _mm_cmple_epu8_mask(A, B) \ 184 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 185 #define _mm_mask_cmple_epu8_mask(k, A, B) \ 186 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 187 #define _mm_cmplt_epu8_mask(A, B) \ 188 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 189 #define _mm_mask_cmplt_epu8_mask(k, A, B) \ 190 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 191 #define _mm_cmpneq_epu8_mask(A, B) \ 192 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 193 #define _mm_mask_cmpneq_epu8_mask(k, A, B) \ 194 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 196 #define _mm256_cmpeq_epu8_mask(A, B) \ 197 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 198 #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ 199 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 200 #define _mm256_cmpge_epu8_mask(A, B) \ 201 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 202 #define _mm256_mask_cmpge_epu8_mask(k, A, B) \ 203 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 204 #define _mm256_cmpgt_epu8_mask(A, B) \ 205 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 206 #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ 207 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 208 #define _mm256_cmple_epu8_mask(A, B) \ 209 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 210 #define _mm256_mask_cmple_epu8_mask(k, A, B) \ 211 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 212 #define _mm256_cmplt_epu8_mask(A, B) \ 213 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 214 #define _mm256_mask_cmplt_epu8_mask(k, A, B) \ 215 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 216 #define _mm256_cmpneq_epu8_mask(A, B) \ 217 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 218 #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ 219 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 221 #define _mm_cmpeq_epi16_mask(A, B) \ 222 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 223 #define _mm_mask_cmpeq_epi16_mask(k, A, B) \ 224 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 225 #define _mm_cmpge_epi16_mask(A, B) \ 226 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 227 #define _mm_mask_cmpge_epi16_mask(k, A, B) \ 228 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 229 #define _mm_cmpgt_epi16_mask(A, B) \ 230 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 231 #define _mm_mask_cmpgt_epi16_mask(k, A, B) \ 232 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 233 #define _mm_cmple_epi16_mask(A, B) \ 234 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 235 #define _mm_mask_cmple_epi16_mask(k, A, B) \ 236 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 237 #define _mm_cmplt_epi16_mask(A, B) \ 238 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 239 #define _mm_mask_cmplt_epi16_mask(k, A, B) \ 240 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 241 #define _mm_cmpneq_epi16_mask(A, B) \ 242 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 243 #define _mm_mask_cmpneq_epi16_mask(k, A, B) \ 244 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 246 #define _mm256_cmpeq_epi16_mask(A, B) \ 247 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 248 #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ 249 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 250 #define _mm256_cmpge_epi16_mask(A, B) \ 251 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 252 #define _mm256_mask_cmpge_epi16_mask(k, A, B) \ 253 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 254 #define _mm256_cmpgt_epi16_mask(A, B) \ 255 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 256 #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ 257 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 258 #define _mm256_cmple_epi16_mask(A, B) \ 259 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 260 #define _mm256_mask_cmple_epi16_mask(k, A, B) \ 261 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 262 #define _mm256_cmplt_epi16_mask(A, B) \ 263 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 264 #define _mm256_mask_cmplt_epi16_mask(k, A, B) \ 265 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 266 #define _mm256_cmpneq_epi16_mask(A, B) \ 267 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 268 #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ 269 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 271 #define _mm_cmpeq_epu16_mask(A, B) \ 272 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 273 #define _mm_mask_cmpeq_epu16_mask(k, A, B) \ 274 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 275 #define _mm_cmpge_epu16_mask(A, B) \ 276 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 277 #define _mm_mask_cmpge_epu16_mask(k, A, B) \ 278 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 279 #define _mm_cmpgt_epu16_mask(A, B) \ 280 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 281 #define _mm_mask_cmpgt_epu16_mask(k, A, B) \ 282 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 283 #define _mm_cmple_epu16_mask(A, B) \ 284 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 285 #define _mm_mask_cmple_epu16_mask(k, A, B) \ 286 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 287 #define _mm_cmplt_epu16_mask(A, B) \ 288 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 289 #define _mm_mask_cmplt_epu16_mask(k, A, B) \ 290 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 291 #define _mm_cmpneq_epu16_mask(A, B) \ 292 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 293 #define _mm_mask_cmpneq_epu16_mask(k, A, B) \ 294 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 296 #define _mm256_cmpeq_epu16_mask(A, B) \ 297 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 298 #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ 299 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 300 #define _mm256_cmpge_epu16_mask(A, B) \ 301 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 302 #define _mm256_mask_cmpge_epu16_mask(k, A, B) \ 303 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 304 #define _mm256_cmpgt_epu16_mask(A, B) \ 305 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 306 #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ 307 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 308 #define _mm256_cmple_epu16_mask(A, B) \ 309 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 310 #define _mm256_mask_cmple_epu16_mask(k, A, B) \ 311 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 312 #define _mm256_cmplt_epu16_mask(A, B) \ 313 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 314 #define _mm256_mask_cmplt_epu16_mask(k, A, B) \ 315 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 316 #define _mm256_cmpneq_epu16_mask(A, B) \ 317 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 318 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ 319 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 323 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
330 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
337 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
344 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
351 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
358 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
365 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
372 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
379 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
386 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
393 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
400 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
407 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
414 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
421 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
428 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
435 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
442 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
449 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
456 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
464 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
472 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
480 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
488 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
496 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
504 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
512 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
520 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
528 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
536 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
544 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
552 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
559 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
567 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
575 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
583 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
591 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
599 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
607 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
615 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
623 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
631 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
639 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
647 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
655 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
663 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
671 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
679 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
687 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
695 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
703 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
711 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
719 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
727 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
735 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
743 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
751 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
759 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
767 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
775 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
783 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
791 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
799 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
807 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
815 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
823 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
831 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
839 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
847 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
855 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
863 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
871 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
879 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
887 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
895 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
903 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
911 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
919 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
927 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
935 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
943 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
951 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
959 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
967 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
975 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
983 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
991 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
999 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1007 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1015 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1023 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1031 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1039 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1047 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1055 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1063 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1071 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1079 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1087 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1095 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1103 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1111 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1119 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1127 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1135 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1143 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1151 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1159 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1167 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1175 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1183 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1191 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1199 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1207 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1215 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1223 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1231 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1239 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1247 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1255 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1263 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1271 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1279 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1287 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1296 return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
1306 return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
1315 return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I,
1325 return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I,
1335 return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I,
1344 return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I,
1352 __m256i __I, __m256i __B)
1354 return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I,
1362 __m256i __I, __m256i __B)
1364 return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I,
1372 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1379 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1387 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1394 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1401 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1408 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1415 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1422 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1429 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1436 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1443 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1450 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1457 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1464 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1471 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1478 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1485 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1492 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1499 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1506 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1514 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1521 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1528 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1536 __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1543 __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1549 __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1554 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
1561 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
1568 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
1576 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1582 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1588 __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1593 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1600 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1607 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1614 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1621 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1628 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1635 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1642 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1649 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1656 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1663 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1670 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1677 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1684 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1691 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1698 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1705 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1712 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1719 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1726 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1733 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1740 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1747 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1754 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1761 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1768 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1775 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1782 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1790 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1798 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1806 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1814 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1823 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1831 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1839 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1847 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1853 #define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ 1854 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1855 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1856 (__v8hi)(__m128i)(W)); }) 1858 #define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ 1859 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1860 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1861 (__v8hi)_mm_setzero_hi()); }) 1863 #define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ 1864 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1865 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1866 (__v16hi)(__m256i)(W)); }) 1868 #define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ 1869 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1870 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1871 (__v16hi)_mm256_setzero_si256()); }) 1873 #define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ 1874 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1875 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1876 (__v8hi)(__m128i)(W)); }) 1878 #define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ 1879 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1880 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1881 (__v8hi)_mm_setzero_hi()); }) 1883 #define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ 1884 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1885 (__v16hi)_mm256_shufflelo_epi16((A), \ 1887 (__v16hi)(__m256i)(W)); }) 1889 #define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ 1890 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1891 (__v16hi)_mm256_shufflelo_epi16((A), \ 1893 (__v16hi)_mm256_setzero_si256()); }) 1898 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1904 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1912 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1920 return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1926 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1934 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1942 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1950 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1958 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1966 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1974 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1982 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1990 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1998 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2006 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
2012 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2020 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2028 return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2034 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2042 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2050 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2056 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2064 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2072 return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2078 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2086 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2094 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2102 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2110 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2118 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2126 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2134 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2142 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2150 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2158 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2166 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2174 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2182 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2190 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2198 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2206 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2214 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2222 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2230 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2238 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2246 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2254 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2262 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2270 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2278 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2287 return (__m128i) __builtin_ia32_selectb_128(__M,
2295 return (__m128i) __builtin_ia32_selectb_128(__M,
2303 return (__m256i) __builtin_ia32_selectb_256(__M,
2311 return (__m256i) __builtin_ia32_selectb_256(__M,
2319 return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
2327 return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
2336 return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2344 return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2353 return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
2361 return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
2370 return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2378 return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2386 __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2394 __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2402 __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2410 __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2525 return (
__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2531 return (
__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2537 return (
__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2543 return (
__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2549 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2555 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2561 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2567 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2573 return (__m128i)__builtin_ia32_selectb_128(__M,
2581 return (__m128i)__builtin_ia32_selectb_128(__M,
2589 return (__m256i)__builtin_ia32_selectb_256(__M,
2597 return (__m256i)__builtin_ia32_selectb_256(__M,
2605 return (__m128i)__builtin_ia32_selectw_128(__M,
2613 return (__m128i)__builtin_ia32_selectw_128(__M,
2621 return (__m256i)__builtin_ia32_selectw_256(__M,
2629 return (__m256i)__builtin_ia32_selectw_256(__M,
2637 return (__m256i) __builtin_ia32_selectw_256 (__M,
2645 return (__m256i) __builtin_ia32_selectw_256(__M,
2653 return (__m128i) __builtin_ia32_selectw_128(__M,
2661 return (__m128i) __builtin_ia32_selectw_128(__M,
2669 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
2678 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
2688 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
2697 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
2707 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
2717 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
2723 #define _mm_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ 2724 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2725 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2726 (__v16qi)(__m128i)(W)); }) 2728 #define _mm_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ 2729 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2730 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2731 (__v16qi)_mm_setzero_si128()); }) 2733 #define _mm256_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \ 2734 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2735 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2736 (__v32qi)(__m256i)(W)); }) 2738 #define _mm256_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \ 2739 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2740 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2741 (__v32qi)_mm256_setzero_si256()); }) 2743 #define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \ 2744 (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ 2745 (__v16qi)(__m128i)(B), (int)(imm), \ 2746 (__v8hi)_mm_setzero_hi(), \ 2749 #define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ 2750 (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ 2751 (__v16qi)(__m128i)(B), (int)(imm), \ 2752 (__v8hi)(__m128i)(W), \ 2755 #define _mm_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ 2756 (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ 2757 (__v16qi)(__m128i)(B), (int)(imm), \ 2758 (__v8hi)_mm_setzero_si128(), \ 2761 #define _mm256_dbsad_epu8(A, B, imm) __extension__ ({ \ 2762 (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ 2763 (__v32qi)(__m256i)(B), (int)(imm), \ 2764 (__v16hi)_mm256_setzero_si256(), \ 2767 #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ 2768 (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ 2769 (__v32qi)(__m256i)(B), (int)(imm), \ 2770 (__v16hi)(__m256i)(W), \ 2773 #define _mm256_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \ 2774 (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \ 2775 (__v32qi)(__m256i)(B), (int)(imm), \ 2776 (__v16hi)_mm256_setzero_si256(), \ 2779 #undef __DEFAULT_FN_ATTRS static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi16(__m256i __a, __m128i __count)
#define _mm_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_set1_epi16(__mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi16(__m256i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutexvar_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi16(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_packus_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit signed integers...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_slli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi16(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b)
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_test_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_shuffle_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit unsigned intege...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi8(__mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi8(__m256i __a)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi16(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_set1_epi8(__m128i __O, __mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A)
#define _mm256_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movm_epi8(__mmask16 __A)
#define _mm_cmpeq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maddubs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_movepi8_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packus_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
#define _mm256_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi16(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi16(__m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movm_epi16(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A)
#define _mm_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi16(__m256i __a, __m256i __b)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mulhi_epu16(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_movepi8_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastb_epi8(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_movepi16_mask(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit signed integers...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastw_epi16(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V)
Zero-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi16(__m256i __a, __m128i __count)
#define _mm256_cmpeq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_movm_epi8(__mmask32 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_set1_epi16(__mmask8 __M, short __A)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w)
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements s...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi16(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi16(__m256i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_testn_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packs_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_movepi16_mask(__m256i __A)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm_setzero_hi(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm256_test_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastb_epi8(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_set1_epi8(__mmask32 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_testn_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
#define _mm_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastw_epi16(__m128i __X)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_test_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_adds_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_set1_epi8(__mmask16 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit unsigned integer...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B)
#define _mm256_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_madd_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
#define _mm_cmpeq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi16(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_movm_epi16(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mulhi_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
#define _mm256_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi16(__m256i __a, __m256i __b)
#define _mm256_cmpneq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm256_testn_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_packus_epi32(__m256i __V1, __m256i __V2)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_subs_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
#define __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi16(__m128i __A, __m128i __B)
#define _mm_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi16(__m256i __A, __m256i __B)
#define _mm_cmpneq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi8(__m256i __A)