25 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." 28 #ifndef __AVX512VLBWINTRIN_H 29 #define __AVX512VLBWINTRIN_H 32 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128))) 33 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256))) 37 #define _mm_cmp_epi8_mask(a, b, p) \ 38 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 39 (__v16qi)(__m128i)(b), (int)(p), \ 42 #define _mm_mask_cmp_epi8_mask(m, a, b, p) \ 43 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 44 (__v16qi)(__m128i)(b), (int)(p), \ 47 #define _mm_cmp_epu8_mask(a, b, p) \ 48 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 49 (__v16qi)(__m128i)(b), (int)(p), \ 52 #define _mm_mask_cmp_epu8_mask(m, a, b, p) \ 53 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 54 (__v16qi)(__m128i)(b), (int)(p), \ 57 #define _mm256_cmp_epi8_mask(a, b, p) \ 58 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 59 (__v32qi)(__m256i)(b), (int)(p), \ 62 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ 63 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 64 (__v32qi)(__m256i)(b), (int)(p), \ 67 #define _mm256_cmp_epu8_mask(a, b, p) \ 68 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 69 (__v32qi)(__m256i)(b), (int)(p), \ 72 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ 73 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 74 (__v32qi)(__m256i)(b), (int)(p), \ 77 #define _mm_cmp_epi16_mask(a, b, p) \ 78 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 79 (__v8hi)(__m128i)(b), (int)(p), \ 82 #define _mm_mask_cmp_epi16_mask(m, a, b, p) \ 83 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 84 (__v8hi)(__m128i)(b), (int)(p), \ 87 #define _mm_cmp_epu16_mask(a, b, p) \ 88 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 89 (__v8hi)(__m128i)(b), (int)(p), \ 92 #define _mm_mask_cmp_epu16_mask(m, a, b, p) \ 93 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 94 (__v8hi)(__m128i)(b), (int)(p), \ 97 #define _mm256_cmp_epi16_mask(a, b, p) \ 98 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 99 (__v16hi)(__m256i)(b), (int)(p), \ 102 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ 103 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 104 (__v16hi)(__m256i)(b), (int)(p), \ 107 #define _mm256_cmp_epu16_mask(a, b, p) \ 108 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 109 (__v16hi)(__m256i)(b), (int)(p), \ 112 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ 113 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 114 (__v16hi)(__m256i)(b), (int)(p), \ 117 #define _mm_cmpeq_epi8_mask(A, B) \ 118 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 119 #define _mm_mask_cmpeq_epi8_mask(k, A, B) \ 120 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 121 #define _mm_cmpge_epi8_mask(A, B) \ 122 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 123 #define _mm_mask_cmpge_epi8_mask(k, A, B) \ 124 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 125 #define _mm_cmpgt_epi8_mask(A, B) \ 126 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 127 #define _mm_mask_cmpgt_epi8_mask(k, A, B) \ 128 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 129 #define _mm_cmple_epi8_mask(A, B) \ 130 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 131 #define _mm_mask_cmple_epi8_mask(k, A, B) \ 132 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 133 #define _mm_cmplt_epi8_mask(A, B) \ 134 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 135 #define _mm_mask_cmplt_epi8_mask(k, A, B) \ 136 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 137 #define _mm_cmpneq_epi8_mask(A, B) \ 138 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 139 #define _mm_mask_cmpneq_epi8_mask(k, A, B) \ 140 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 142 #define _mm256_cmpeq_epi8_mask(A, B) \ 143 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 144 #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ 145 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 146 #define _mm256_cmpge_epi8_mask(A, B) \ 147 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 148 #define _mm256_mask_cmpge_epi8_mask(k, A, B) \ 149 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 150 #define _mm256_cmpgt_epi8_mask(A, B) \ 151 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 152 #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ 153 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 154 #define _mm256_cmple_epi8_mask(A, B) \ 155 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 156 #define _mm256_mask_cmple_epi8_mask(k, A, B) \ 157 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 158 #define _mm256_cmplt_epi8_mask(A, B) \ 159 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 160 #define _mm256_mask_cmplt_epi8_mask(k, A, B) \ 161 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 162 #define _mm256_cmpneq_epi8_mask(A, B) \ 163 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 164 #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ 165 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 167 #define _mm_cmpeq_epu8_mask(A, B) \ 168 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 169 #define _mm_mask_cmpeq_epu8_mask(k, A, B) \ 170 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 171 #define _mm_cmpge_epu8_mask(A, B) \ 172 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 173 #define _mm_mask_cmpge_epu8_mask(k, A, B) \ 174 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 175 #define _mm_cmpgt_epu8_mask(A, B) \ 176 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 177 #define _mm_mask_cmpgt_epu8_mask(k, A, B) \ 178 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 179 #define _mm_cmple_epu8_mask(A, B) \ 180 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 181 #define _mm_mask_cmple_epu8_mask(k, A, B) \ 182 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 183 #define _mm_cmplt_epu8_mask(A, B) \ 184 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 185 #define _mm_mask_cmplt_epu8_mask(k, A, B) \ 186 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 187 #define _mm_cmpneq_epu8_mask(A, B) \ 188 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 189 #define _mm_mask_cmpneq_epu8_mask(k, A, B) \ 190 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 192 #define _mm256_cmpeq_epu8_mask(A, B) \ 193 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 194 #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ 195 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 196 #define _mm256_cmpge_epu8_mask(A, B) \ 197 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 198 #define _mm256_mask_cmpge_epu8_mask(k, A, B) \ 199 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 200 #define _mm256_cmpgt_epu8_mask(A, B) \ 201 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 202 #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ 203 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 204 #define _mm256_cmple_epu8_mask(A, B) \ 205 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 206 #define _mm256_mask_cmple_epu8_mask(k, A, B) \ 207 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 208 #define _mm256_cmplt_epu8_mask(A, B) \ 209 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 210 #define _mm256_mask_cmplt_epu8_mask(k, A, B) \ 211 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 212 #define _mm256_cmpneq_epu8_mask(A, B) \ 213 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 214 #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ 215 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 217 #define _mm_cmpeq_epi16_mask(A, B) \ 218 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 219 #define _mm_mask_cmpeq_epi16_mask(k, A, B) \ 220 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 221 #define _mm_cmpge_epi16_mask(A, B) \ 222 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 223 #define _mm_mask_cmpge_epi16_mask(k, A, B) \ 224 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 225 #define _mm_cmpgt_epi16_mask(A, B) \ 226 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 227 #define _mm_mask_cmpgt_epi16_mask(k, A, B) \ 228 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 229 #define _mm_cmple_epi16_mask(A, B) \ 230 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 231 #define _mm_mask_cmple_epi16_mask(k, A, B) \ 232 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 233 #define _mm_cmplt_epi16_mask(A, B) \ 234 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 235 #define _mm_mask_cmplt_epi16_mask(k, A, B) \ 236 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 237 #define _mm_cmpneq_epi16_mask(A, B) \ 238 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 239 #define _mm_mask_cmpneq_epi16_mask(k, A, B) \ 240 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 242 #define _mm256_cmpeq_epi16_mask(A, B) \ 243 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 244 #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ 245 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 246 #define _mm256_cmpge_epi16_mask(A, B) \ 247 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 248 #define _mm256_mask_cmpge_epi16_mask(k, A, B) \ 249 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 250 #define _mm256_cmpgt_epi16_mask(A, B) \ 251 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 252 #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ 253 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 254 #define _mm256_cmple_epi16_mask(A, B) \ 255 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 256 #define _mm256_mask_cmple_epi16_mask(k, A, B) \ 257 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 258 #define _mm256_cmplt_epi16_mask(A, B) \ 259 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 260 #define _mm256_mask_cmplt_epi16_mask(k, A, B) \ 261 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 262 #define _mm256_cmpneq_epi16_mask(A, B) \ 263 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 264 #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ 265 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 267 #define _mm_cmpeq_epu16_mask(A, B) \ 268 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 269 #define _mm_mask_cmpeq_epu16_mask(k, A, B) \ 270 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 271 #define _mm_cmpge_epu16_mask(A, B) \ 272 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 273 #define _mm_mask_cmpge_epu16_mask(k, A, B) \ 274 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 275 #define _mm_cmpgt_epu16_mask(A, B) \ 276 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 277 #define _mm_mask_cmpgt_epu16_mask(k, A, B) \ 278 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 279 #define _mm_cmple_epu16_mask(A, B) \ 280 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 281 #define _mm_mask_cmple_epu16_mask(k, A, B) \ 282 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 283 #define _mm_cmplt_epu16_mask(A, B) \ 284 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 285 #define _mm_mask_cmplt_epu16_mask(k, A, B) \ 286 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 287 #define _mm_cmpneq_epu16_mask(A, B) \ 288 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 289 #define _mm_mask_cmpneq_epu16_mask(k, A, B) \ 290 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 292 #define _mm256_cmpeq_epu16_mask(A, B) \ 293 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 294 #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ 295 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 296 #define _mm256_cmpge_epu16_mask(A, B) \ 297 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 298 #define _mm256_mask_cmpge_epu16_mask(k, A, B) \ 299 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 300 #define _mm256_cmpgt_epu16_mask(A, B) \ 301 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 302 #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ 303 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 304 #define _mm256_cmple_epu16_mask(A, B) \ 305 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 306 #define _mm256_mask_cmple_epu16_mask(k, A, B) \ 307 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 308 #define _mm256_cmplt_epu16_mask(A, B) \ 309 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 310 #define _mm256_mask_cmplt_epu16_mask(k, A, B) \ 311 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 312 #define _mm256_cmpneq_epu16_mask(A, B) \ 313 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 314 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ 315 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 319 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
326 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
333 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
340 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
347 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
354 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
361 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
368 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
375 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
382 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
389 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
396 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
403 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
410 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
417 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
424 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
431 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
438 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
445 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
452 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
460 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
468 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
476 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
484 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
492 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
500 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
508 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
516 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
524 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
532 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
540 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
548 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
555 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
563 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
571 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
579 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
587 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
595 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
603 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
611 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
619 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
627 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
635 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
643 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
651 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
659 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
667 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
675 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
683 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
691 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
699 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
707 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
715 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
723 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
731 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
739 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
747 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
755 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
763 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
771 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
779 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
787 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
795 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
803 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
811 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
819 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
827 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
835 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
843 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
851 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
859 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
867 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
875 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
883 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
891 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
899 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
907 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
915 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
923 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
931 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
939 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
947 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
955 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
963 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
971 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
979 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
987 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
995 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1003 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1011 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1019 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1027 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1035 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1043 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1051 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1059 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1067 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1075 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1083 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1091 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1099 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1107 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1115 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1123 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1131 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1139 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1147 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1155 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1163 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1171 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1179 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1187 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1195 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1203 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1211 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1219 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1227 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1235 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1243 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1251 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1259 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1267 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1275 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1283 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1291 return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1299 return (__m128i)__builtin_ia32_selectw_128(__U,
1308 return (__m128i)__builtin_ia32_selectw_128(__U,
1317 return (__m128i)__builtin_ia32_selectw_128(__U,
1325 return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1333 return (__m256i)__builtin_ia32_selectw_256(__U,
1342 return (__m256i)__builtin_ia32_selectw_256(__U,
1351 return (__m256i)__builtin_ia32_selectw_256(__U,
1358 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1365 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1373 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1380 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1387 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1394 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1401 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1408 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1415 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1422 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1429 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1436 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1443 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1450 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1457 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1464 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1471 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1478 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1485 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1492 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1499 return (__m128i)__builtin_shufflevector(
1500 __builtin_convertvector((__v8hi)__A, __v8qi),
1501 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1507 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1514 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1522 __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1529 __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1535 __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1540 return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
1545 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1552 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1560 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1566 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1572 __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1577 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1584 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1591 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1598 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1605 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1612 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1619 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1626 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1633 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1640 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1647 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1654 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1661 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1668 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1675 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1682 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1689 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1696 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1703 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1710 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1717 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1724 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1731 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1738 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1745 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1752 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1759 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1766 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1774 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1782 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1790 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1798 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1807 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1815 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1823 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1831 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1837 #define _mm_mask_shufflehi_epi16(W, U, A, imm) \ 1838 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1839 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1840 (__v8hi)(__m128i)(W)) 1842 #define _mm_maskz_shufflehi_epi16(U, A, imm) \ 1843 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1844 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1845 (__v8hi)_mm_setzero_si128()) 1847 #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ 1848 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1849 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1850 (__v16hi)(__m256i)(W)) 1852 #define _mm256_maskz_shufflehi_epi16(U, A, imm) \ 1853 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1854 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1855 (__v16hi)_mm256_setzero_si256()) 1857 #define _mm_mask_shufflelo_epi16(W, U, A, imm) \ 1858 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1859 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1860 (__v8hi)(__m128i)(W)) 1862 #define _mm_maskz_shufflelo_epi16(U, A, imm) \ 1863 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1864 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1865 (__v8hi)_mm_setzero_si128()) 1867 #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ 1868 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1869 (__v16hi)_mm256_shufflelo_epi16((A), \ 1871 (__v16hi)(__m256i)(W)) 1873 #define _mm256_maskz_shufflelo_epi16(U, A, imm) \ 1874 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1875 (__v16hi)_mm256_shufflelo_epi16((A), \ 1877 (__v16hi)_mm256_setzero_si256()) 1882 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1888 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1896 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1904 return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
1910 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1918 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1926 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1934 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1942 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1950 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1958 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1966 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1974 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1982 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1990 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1996 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2004 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2012 return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
2018 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2026 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2034 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2040 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2048 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2056 return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
2062 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2070 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2078 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2086 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2094 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2102 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2110 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2118 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2126 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2134 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2142 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2150 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2158 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2166 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2174 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2182 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2190 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2198 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2206 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2214 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2222 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2230 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2238 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2246 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2254 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2262 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2271 return (__m128i) __builtin_ia32_selectb_128(__M,
2279 return (__m128i) __builtin_ia32_selectb_128(__M,
2287 return (__m256i) __builtin_ia32_selectb_256(__M,
2295 return (__m256i) __builtin_ia32_selectb_256(__M,
2303 struct __loadu_epi16 {
2306 return ((
struct __loadu_epi16*)__P)->__v;
2312 return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
2320 return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
2329 struct __loadu_epi16 {
2332 return ((
struct __loadu_epi16*)__P)->__v;
2338 return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2346 return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2355 struct __loadu_epi8 {
2358 return ((
struct __loadu_epi8*)__P)->__v;
2364 return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
2372 return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
2381 struct __loadu_epi8 {
2384 return ((
struct __loadu_epi8*)__P)->__v;
2390 return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2398 return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2407 struct __storeu_epi16 {
2410 ((
struct __storeu_epi16*)__P)->__v = __A;
2416 __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2424 struct __storeu_epi16 {
2427 ((
struct __storeu_epi16*)__P)->__v = __A;
2433 __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2441 struct __storeu_epi8 {
2444 ((
struct __storeu_epi8*)__P)->__v = __A;
2450 __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2458 struct __storeu_epi8 {
2461 ((
struct __storeu_epi8*)__P)->__v = __A;
2467 __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2582 return (
__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2588 return (
__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2594 return (
__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2600 return (
__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2606 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2612 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2618 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2624 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2630 return (__m128i)__builtin_ia32_selectb_128(__M,
2638 return (__m128i)__builtin_ia32_selectb_128(__M,
2646 return (__m256i)__builtin_ia32_selectb_256(__M,
2654 return (__m256i)__builtin_ia32_selectb_256(__M,
2662 return (__m128i)__builtin_ia32_selectw_128(__M,
2670 return (__m128i)__builtin_ia32_selectw_128(__M,
2678 return (__m256i)__builtin_ia32_selectw_256(__M,
2686 return (__m256i)__builtin_ia32_selectw_256(__M,
2694 return (__m256i) __builtin_ia32_selectw_256 (__M,
2702 return (__m256i) __builtin_ia32_selectw_256(__M,
2710 return (__m128i) __builtin_ia32_selectw_128(__M,
2718 return (__m128i) __builtin_ia32_selectw_128(__M,
2726 return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
2732 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
2741 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
2749 return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2756 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
2765 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
2770 #define _mm_mask_alignr_epi8(W, U, A, B, N) \ 2771 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2772 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2773 (__v16qi)(__m128i)(W)) 2775 #define _mm_maskz_alignr_epi8(U, A, B, N) \ 2776 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2777 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2778 (__v16qi)_mm_setzero_si128()) 2780 #define _mm256_mask_alignr_epi8(W, U, A, B, N) \ 2781 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2782 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2783 (__v32qi)(__m256i)(W)) 2785 #define _mm256_maskz_alignr_epi8(U, A, B, N) \ 2786 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2787 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2788 (__v32qi)_mm256_setzero_si256()) 2790 #define _mm_dbsad_epu8(A, B, imm) \ 2791 (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ 2792 (__v16qi)(__m128i)(B), (int)(imm)) 2794 #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ 2795 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 2796 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2797 (__v8hi)(__m128i)(W)) 2799 #define _mm_maskz_dbsad_epu8(U, A, B, imm) \ 2800 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 2801 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2802 (__v8hi)_mm_setzero_si128()) 2804 #define _mm256_dbsad_epu8(A, B, imm) \ 2805 (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ 2806 (__v32qi)(__m256i)(B), (int)(imm)) 2808 #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ 2809 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 2810 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2811 (__v16hi)(__m256i)(W)) 2813 #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ 2814 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 2815 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2816 (__v16hi)_mm256_setzero_si256()) 2818 #undef __DEFAULT_FN_ATTRS128 2819 #undef __DEFAULT_FN_ATTRS256 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B)
#define _mm_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
struct __storeu_i16 *__P __v
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
vector signed char unaligned_vec_schar __attribute__((aligned(1)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit signed integers...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b)
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit unsigned intege...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_mask_cmpeq_epi16_mask(k, A, B)
#define _mm_cmpeq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B)
#define _mm256_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A)
#define _mm_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8(__mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8(__m128i __O, __mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit signed integers...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V)
Zero-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask(__m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w)
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements s...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8(__mmask32 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16(__mmask8 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8(__mmask32 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16(__mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8(__mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b)
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B)
#define _mm_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a)
#define _mm256_cmpeq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit unsigned integer...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
#define _mm256_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpeq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a)
#define _mm256_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A)
#define _mm256_cmpneq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16(void *__P, __m256i __A)
#define _mm_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
#define _mm_cmpneq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b)